[llvm] a2eb07a - [VE] Support atomic exchange instructions

Kazushi Marukawa via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 15 00:44:57 PST 2020


Author: Kazushi (Jam) Marukawa
Date: 2020-12-15T17:43:11+09:00
New Revision: a2eb07aa55405f6e9bca0a0a31681053147e6540

URL: https://github.com/llvm/llvm-project/commit/a2eb07aa55405f6e9bca0a0a31681053147e6540
DIFF: https://github.com/llvm/llvm-project/commit/a2eb07aa55405f6e9bca0a0a31681053147e6540.diff

LOG: [VE] Support atomic exchange instructions

Support atomic exchange and atomic compare and exchange instructions.
Change CAS and TS1AM instructions for ISel patterns.  Add selectADDRzi
pattern for them.  Add TS1AM pseudo instruction also for better ISel.
Add shouldExpandAtomicRMWInIR() function to expand all atomicrmw
instructions except atomicrmw xchg.  Add custom lower for i8/i16
atomicrmw xchg.  Modify replaceFI to support CAS/TS1AM instructions
which use "reg+disp" operands instead of "reg+imm+disp" operands.
And, add several regression tests to check the correctness.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D93161

Added: 
    llvm/test/CodeGen/VE/Scalar/atomic.ll
    llvm/test/CodeGen/VE/Scalar/atomic_cmp_swap.ll
    llvm/test/CodeGen/VE/Scalar/atomic_swap.ll

Modified: 
    llvm/lib/Target/VE/VEISelDAGToDAG.cpp
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.h
    llvm/lib/Target/VE/VEInstrInfo.td
    llvm/lib/Target/VE/VERegisterInfo.cpp
    llvm/test/CodeGen/VE/Scalar/atomic_load.ll
    llvm/test/CodeGen/VE/Scalar/atomic_store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index 2ece23c0792f..761baa79b4ab 100644
--- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -139,6 +139,7 @@ class VEDAGToDAGISel : public SelectionDAGISel {
   bool selectADDRzri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
   bool selectADDRzii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
   bool selectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+  bool selectADDRzi(SDValue N, SDValue &Base, SDValue &Offset);
 
   StringRef getPassName() const override {
     return "VE DAG->DAG Pattern Instruction Selection";
@@ -249,6 +250,26 @@ bool VEDAGToDAGISel::selectADDRri(SDValue Addr, SDValue &Base,
   return true;
 }
 
+bool VEDAGToDAGISel::selectADDRzi(SDValue Addr, SDValue &Base,
+                                  SDValue &Offset) {
+  if (dyn_cast<FrameIndexSDNode>(Addr))
+    return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress ||
+      Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+    return false; // direct calls.
+
+  if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) {
+    if (isInt<32>(CN->getSExtValue())) {
+      Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+      Offset =
+          CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
 bool VEDAGToDAGISel::matchADDRrr(SDValue Addr, SDValue &Base, SDValue &Index) {
   if (dyn_cast<FrameIndexSDNode>(Addr))
     return false;

diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index f8235bb16622..ca548e056c51 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -248,6 +248,28 @@ void VETargetLowering::initSPUActions() {
   // Use custom inserter for ATOMIC_FENCE.
   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 
+  // Other atomic instructions.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    // Support i8/i16 atomic swap.
+    setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
+
+    // FIXME: Support "atmam" isntructions.
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+
+    // VE doesn't have follwing instructions.
+    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+  }
+
   /// } Atomic isntructions
 }
 
@@ -850,6 +872,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
     TARGET_NODE_CASE(GETTLSADDR)
     TARGET_NODE_CASE(MEMBARRIER)
     TARGET_NODE_CASE(CALL)
+    TARGET_NODE_CASE(TS1AM)
     TARGET_NODE_CASE(VEC_BROADCAST)
     TARGET_NODE_CASE(RET_FLAG)
     TARGET_NODE_CASE(GLOBAL_BASE_REG)
@@ -1038,6 +1061,116 @@ SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
   return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
 }
 
+TargetLowering::AtomicExpansionKind
+VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  // We have TS1AM implementation for i8/i16/i32/i64, so use it.
+  if (AI->getOperation() == AtomicRMWInst::Xchg) {
+    return AtomicExpansionKind::None;
+  }
+  // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
+
+  // Otherwise, expand it using compare and exchange instruction to not call
+  // __sync_fetch_and_* functions.
+  return AtomicExpansionKind::CmpXChg;
+}
+
+static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
+                            SDValue &Bits) {
+  SDLoc DL(Op);
+  AtomicSDNode *N = cast<AtomicSDNode>(Op);
+  SDValue Ptr = N->getOperand(1);
+  SDValue Val = N->getOperand(2);
+  EVT PtrVT = Ptr.getValueType();
+  bool Byte = N->getMemoryVT() == MVT::i8;
+  //   Remainder = AND Ptr, 3
+  //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
+  //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
+  //   Bits = Remainder << 3
+  //   NewVal = Val << Bits
+  SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
+  SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
+  SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
+                      : DAG.getConstant(3, DL, MVT::i32);
+  Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
+  Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
+  return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
+}
+
+static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
+                             SDValue Bits) {
+  SDLoc DL(Op);
+  EVT VT = Data.getValueType();
+  bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
+  //   NewData = Data >> Bits
+  //   Result = NewData & 0xff   ; If Byte is true (1 byte)
+  //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
+
+  SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
+  return DAG.getNode(ISD::AND, DL, VT,
+                     {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
+}
+
+SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  AtomicSDNode *N = cast<AtomicSDNode>(Op);
+
+  if (N->getMemoryVT() == MVT::i8) {
+    // For i8, use "ts1am"
+    //   Input:
+    //     ATOMIC_SWAP Ptr, Val, Order
+    //
+    //   Output:
+    //     Remainder = AND Ptr, 3
+    //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
+    //     Bits = Remainder << 3
+    //     NewVal = Val << Bits
+    //
+    //     Aligned = AND Ptr, -4
+    //     Data = TS1AM Aligned, Flag, NewVal
+    //
+    //     NewData = Data >> Bits
+    //     Result = NewData & 0xff ; 1 byte result
+    SDValue Flag;
+    SDValue Bits;
+    SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+    SDValue Ptr = N->getOperand(1);
+    SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+                                  {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+    SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+                                  DAG.getVTList(Op.getNode()->getValueType(0),
+                                                Op.getNode()->getValueType(1)),
+                                  {N->getChain(), Aligned, Flag, NewVal},
+                                  N->getMemOperand());
+
+    SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+    SDValue Chain = TS1AM.getValue(1);
+    return DAG.getMergeValues({Result, Chain}, DL);
+  }
+  if (N->getMemoryVT() == MVT::i16) {
+    // For i16, use "ts1am"
+    SDValue Flag;
+    SDValue Bits;
+    SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+    SDValue Ptr = N->getOperand(1);
+    SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+                                  {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+    SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+                                  DAG.getVTList(Op.getNode()->getValueType(0),
+                                                Op.getNode()->getValueType(1)),
+                                  {N->getChain(), Aligned, Flag, NewVal},
+                                  N->getMemOperand());
+
+    SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+    SDValue Chain = TS1AM.getValue(1);
+    return DAG.getMergeValues({Result, Chain}, DL);
+  }
+  // Otherwise, let llvm legalize it.
+  return Op;
+}
+
 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   return makeAddress(Op, DAG);
@@ -1388,6 +1521,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     llvm_unreachable("Should not custom lower this!");
   case ISD::ATOMIC_FENCE:
     return lowerATOMIC_FENCE(Op, DAG);
+  case ISD::ATOMIC_SWAP:
+    return lowerATOMIC_SWAP(Op, DAG);
   case ISD::BlockAddress:
     return lowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:
@@ -1418,6 +1553,19 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 }
 /// } Custom Lower
 
+void VETargetLowering::ReplaceNodeResults(SDNode *N,
+                                          SmallVectorImpl<SDValue> &Results,
+                                          SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  case ISD::ATOMIC_SWAP:
+    // Let LLVM expand atomic swap instruction through LowerOperation.
+    return;
+  default:
+    LLVM_DEBUG(N->dumpr(&DAG));
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
+  }
+}
+
 /// JumpTable for VE.
 ///
 ///   VE cannot generate relocatable symbol in jump table.  VE cannot

diff  --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 9924db647f46..0eea838ff55d 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -33,14 +33,15 @@ enum NodeType : unsigned {
                // locals and temporaries)
 
   MEMBARRIER, // Compiler barrier only; generate a no-op.
+  TS1AM,      // A TS1AM instruction used for 1/2 bytes swap.
 
-  VEC_BROADCAST,    // 0: scalar value, 1: VL
+  VEC_BROADCAST, // 0: scalar value, 1: VL
 
   CALL,            // A call instruction.
   RET_FLAG,        // Return with a flag operand.
   GLOBAL_BASE_REG, // Global base reg for PIC.
 
-  // VVP_* nodes.
+// VVP_* nodes.
 #define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
 #include "VVPNodes.def"
 };
@@ -95,6 +96,8 @@ class VETargetLowering : public TargetLowering {
                                 AtomicOrdering Ord) const override;
   Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
                                  AtomicOrdering Ord) const override;
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
   /// Custom Lower {
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -109,6 +112,7 @@ class VETargetLowering : public TargetLowering {
   // EK_LabelDifference32.
 
   SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerATOMIC_SWAP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
@@ -124,6 +128,12 @@ class VETargetLowering : public TargetLowering {
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   /// } Custom Lower
 
+  /// Replace the results of node with an illegal result
+  /// type with new values built out of custom code.
+  ///
+  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
+
   /// VVP Lowering {
   SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
   /// } VVPLowering

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 5837267aa63b..fce3bf06b9d3 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -462,6 +462,13 @@ def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
 def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
                         [SDNPHasChain, SDNPSideEffect]>;
 
+// TS1AM
+def SDT_TS1AM : SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisPtrTy<1>,
+                                     SDTCisVT<2, i32>, SDTCisInt<3>]>;
+def ts1am     : SDNode<"VEISD::TS1AM", SDT_TS1AM,
+                       [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+                        SDNPMemOperand]>;
+
 //===----------------------------------------------------------------------===//
 // VE Flag Conditions
 //===----------------------------------------------------------------------===//
@@ -1111,9 +1118,9 @@ defm ATMAM : RRCASm<"atmam", 0x53, I64, i64, uimm0to2>;
 
 // Section 8.2.20 - CAS (Compare and Swap)
 let DecoderMethod = "DecodeCASI64" in
-defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7>;
+defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7, atomic_cmp_swap_64>;
 let DecoderMethod = "DecodeCASI32", cx = 1 in
-defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7>;
+defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7, atomic_cmp_swap_32>;
 
 //-----------------------------------------------------------------------------
 // Section 8.3 - Transfer Control Instructions
@@ -1859,6 +1866,14 @@ defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
 defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
 defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
 
+// Atomic swaps
+def : Pat<(i32 (ts1am i64:$src, i32:$flag, i32:$new)),
+          (TS1AMWrir $src, 0, $flag, $new)>;
+def : Pat<(i32 (atomic_swap_32 ADDRri:$src, i32:$new)),
+          (TS1AMWrii MEMriRRM:$src, 15, $new)>;
+def : Pat<(i64 (atomic_swap_64 ADDRri:$src, i64:$new)),
+          (TS1AMLrir MEMriRRM:$src, (LEAzii 0, 0, 255), i64:$new)>;
+
 // Branches
 def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;
 

diff  --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index 07249616c249..d175ad26c742 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -109,6 +110,29 @@ VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
   return &VE::I64RegClass;
 }
 
+static unsigned offsetToDisp(MachineInstr &MI) {
+  // Default offset in instruction's operands (reg+reg+imm).
+  unsigned OffDisp = 2;
+
+#define RRCAS_multi_cases(NAME) NAME##rir : case NAME##rii
+
+  {
+    using namespace llvm::VE;
+    switch (MI.getOpcode()) {
+    case RRCAS_multi_cases(TS1AML):
+    case RRCAS_multi_cases(TS1AMW):
+    case RRCAS_multi_cases(CASL):
+    case RRCAS_multi_cases(CASW):
+      // These instructions use AS format (reg+imm).
+      OffDisp = 1;
+      break;
+    }
+  }
+#undef RRCAS_multi_cases
+
+  return OffDisp;
+}
+
 static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
                       MachineInstr &MI, const DebugLoc &dl,
                       unsigned FIOperandNum, int Offset, Register FrameReg) {
@@ -116,7 +140,7 @@ static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
   // VE has 32 bit offset field, so no need to expand a target instruction.
   // Directly encode it.
   MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
-  MI.getOperand(FIOperandNum + 2).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset);
 }
 
 void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
@@ -134,7 +158,7 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int Offset;
   Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
 
-  Offset += MI.getOperand(FIOperandNum + 2).getImm();
+  Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm();
 
   if (MI.getOpcode() == VE::STQrii) {
     const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

diff  --git a/llvm/test/CodeGen/VE/Scalar/atomic.ll b/llvm/test/CodeGen/VE/Scalar/atomic.ll
new file mode 100644
index 000000000000..46c0f8f95887
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Scalar/atomic.ll
@@ -0,0 +1,281 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+;;; Test atomicrmw operations
+
+ at c = common global i8 0, align 4
+ at s = common global i16 0, align 4
+ at i = common global i32 0, align 4
+ at l = common global i64 0, align 4
+
+; Function Attrs: norecurse nounwind
+define signext i8 @test_atomic_fetch_add_1() {
+; CHECK-LABEL: test_atomic_fetch_add_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, c at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, c at hi(, %s0)
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    ldl.sx %s2, (, %s0)
+; CHECK-NEXT:    lea %s1, -256
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s3, 0, %s2
+; CHECK-NEXT:    adds.w.sx %s2, 1, %s2
+; CHECK-NEXT:    and %s2, %s2, (56)0
+; CHECK-NEXT:    and %s4, %s3, %s1
+; CHECK-NEXT:    or %s2, %s4, %s2
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    brne.w %s2, %s3, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    sll %s0, %s2, 56
+; CHECK-NEXT:    sra.l %s0, %s0, 56
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw add i8* @c, i8 1 seq_cst
+  ret i8 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i16 @test_atomic_fetch_sub_2() {
+; CHECK-LABEL: test_atomic_fetch_sub_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, s at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, s at hi(, %s0)
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    ldl.sx %s2, (, %s0)
+; CHECK-NEXT:    lea %s1, -65536
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s3, 0, %s2
+; CHECK-NEXT:    adds.w.sx %s2, -1, %s2
+; CHECK-NEXT:    and %s2, %s2, (48)0
+; CHECK-NEXT:    and %s4, %s3, %s1
+; CHECK-NEXT:    or %s2, %s4, %s2
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    brne.w %s2, %s3, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    sll %s0, %s2, 48
+; CHECK-NEXT:    sra.l %s0, %s0, 48
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw sub i16* @s, i16 1 seq_cst
+  ret i16 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i32 @test_atomic_fetch_and_4() {
+; CHECK-LABEL: test_atomic_fetch_and_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, i at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, i at hi(, %s0)
+; CHECK-NEXT:    ldl.sx %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s2, 0, %s1
+; CHECK-NEXT:    and %s1, 1, %s2
+; CHECK-NEXT:    cas.w %s1, (%s0), %s2
+; CHECK-NEXT:    brne.w %s1, %s2, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw and i32* @i, i32 1 seq_cst
+  ret i32 %0
+}
+; Function Attrs: norecurse nounwind
+define i64 @test_atomic_fetch_or_8() {
+; CHECK-LABEL: test_atomic_fetch_or_8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, l at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, l at hi(, %s0)
+; CHECK-NEXT:    ld %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s2, 0, %s0
+; CHECK-NEXT:    or %s0, 1, %s0
+; CHECK-NEXT:    cas.l %s0, (%s1), %s2
+; CHECK-NEXT:    brne.l %s0, %s2, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw or i64* @l, i64 1 seq_cst
+  ret i64 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i8 @test_atomic_fetch_xor_1() {
+; CHECK-LABEL: test_atomic_fetch_xor_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, c at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, c at hi(, %s0)
+; CHECK-NEXT:    and %s1, -4, %s0
+; CHECK-NEXT:    ldl.sx %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s2, 0, %s0
+; CHECK-NEXT:    xor %s0, 1, %s2
+; CHECK-NEXT:    cas.w %s0, (%s1), %s2
+; CHECK-NEXT:    brne.w %s0, %s2, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    sll %s0, %s0, 56
+; CHECK-NEXT:    sra.l %s0, %s0, 56
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw xor i8* @c, i8 1 seq_cst
+  ret i8 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i16 @test_atomic_fetch_nand_2() {
+; CHECK-LABEL: test_atomic_fetch_nand_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, s at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, s at hi(, %s0)
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    ldl.sx %s2, (, %s0)
+; CHECK-NEXT:    lea %s1, 65534
+; CHECK-NEXT:    lea %s3, -65536
+; CHECK-NEXT:    and %s3, %s3, (32)0
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s4, 0, %s2
+; CHECK-NEXT:    xor %s2, -1, %s4
+; CHECK-NEXT:    or %s2, %s2, %s1
+; CHECK-NEXT:    and %s2, %s2, (48)0
+; CHECK-NEXT:    and %s5, %s4, %s3
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    cas.w %s2, (%s0), %s4
+; CHECK-NEXT:    brne.w %s2, %s4, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    sll %s0, %s2, 48
+; CHECK-NEXT:    sra.l %s0, %s0, 48
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw nand i16* @s, i16 1 seq_cst
+  ret i16 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i32 @test_atomic_fetch_max_4() {
+; CHECK-LABEL: test_atomic_fetch_max_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, i at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, i at hi(, %s0)
+; CHECK-NEXT:    ldl.sx %s0, (, %s1)
+; CHECK-NEXT:    or %s2, 1, (0)1
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s3, 0, %s0
+; CHECK-NEXT:    maxs.w.sx %s0, %s0, %s2
+; CHECK-NEXT:    cas.w %s0, (%s1), %s3
+; CHECK-NEXT:    brne.w %s0, %s3, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw max i32* @i, i32 1 seq_cst
+  ret i32 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i32 @test_atomic_fetch_min_4() {
+; CHECK-LABEL: test_atomic_fetch_min_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, i at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, i at hi(, %s0)
+; CHECK-NEXT:    ldl.sx %s1, (, %s0)
+; CHECK-NEXT:    or %s2, 2, (0)1
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s3, 0, %s1
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s2
+; CHECK-NEXT:    or %s1, 1, (0)1
+; CHECK-NEXT:    cmov.w.lt %s1, %s3, %s4
+; CHECK-NEXT:    cas.w %s1, (%s0), %s3
+; CHECK-NEXT:    brne.w %s1, %s3, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw min i32* @i, i32 1 seq_cst
+  ret i32 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i32 @test_atomic_fetch_umax_4() {
+; CHECK-LABEL: test_atomic_fetch_umax_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, i at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, i at hi(, %s0)
+; CHECK-NEXT:    ldl.sx %s1, (, %s0)
+; CHECK-NEXT:    or %s2, 1, (0)1
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s3, 0, %s1
+; CHECK-NEXT:    cmpu.w %s4, %s1, %s2
+; CHECK-NEXT:    or %s1, 1, (0)1
+; CHECK-NEXT:    cmov.w.gt %s1, %s3, %s4
+; CHECK-NEXT:    cas.w %s1, (%s0), %s3
+; CHECK-NEXT:    brne.w %s1, %s3, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw umax i32* @i, i32 1 seq_cst
+  ret i32 %0
+}
+
+; Function Attrs: norecurse nounwind
+define signext i32 @test_atomic_fetch_umin_4() {
+; CHECK-LABEL: test_atomic_fetch_umin_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s0, i at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, i at hi(, %s0)
+; CHECK-NEXT:    ldl.sx %s1, (, %s0)
+; CHECK-NEXT:    or %s2, 2, (0)1
+; CHECK-NEXT:  .LBB{{[0-9]+}}_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or %s3, 0, %s1
+; CHECK-NEXT:    cmpu.w %s4, %s1, %s2
+; CHECK-NEXT:    or %s1, 1, (0)1
+; CHECK-NEXT:    cmov.w.lt %s1, %s3, %s4
+; CHECK-NEXT:    cas.w %s1, (%s0), %s3
+; CHECK-NEXT:    brne.w %s1, %s3, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+entry:
+  %0 = atomicrmw umin i32* @i, i32 1 seq_cst
+  ret i32 %0
+}

diff  --git a/llvm/test/CodeGen/VE/Scalar/atomic_cmp_swap.ll b/llvm/test/CodeGen/VE/Scalar/atomic_cmp_swap.ll
new file mode 100644
index 000000000000..43b33866889f
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Scalar/atomic_cmp_swap.ll
@@ -0,0 +1,2150 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+;;; Test atomic compare and exchange weak for all types and all memory order
+;;;
+;;; Note:
+;;;   - We test i1/i8/i16/i32/i64/i128/u8/u16/u32/u64/u128.
+;;;   - We test relaxed, acquire, and seq_cst.
+;;;   - We test only exchange with variables since VE doesn't have exchange
+;;;     instructions with immediate values.
+;;;   - We test against an object, a stack object, and a global variable.
+
+%"struct.std::__1::atomic" = type { %"struct.std::__1::__atomic_base" }
+%"struct.std::__1::__atomic_base" = type { %"struct.std::__1::__cxx_atomic_impl" }
+%"struct.std::__1::__cxx_atomic_impl" = type { %"struct.std::__1::__cxx_atomic_base_impl" }
+%"struct.std::__1::__cxx_atomic_base_impl" = type { i8 }
+%"struct.std::__1::atomic.0" = type { %"struct.std::__1::__atomic_base.1" }
+%"struct.std::__1::__atomic_base.1" = type { %"struct.std::__1::__atomic_base.2" }
+%"struct.std::__1::__atomic_base.2" = type { %"struct.std::__1::__cxx_atomic_impl.3" }
+%"struct.std::__1::__cxx_atomic_impl.3" = type { %"struct.std::__1::__cxx_atomic_base_impl.4" }
+%"struct.std::__1::__cxx_atomic_base_impl.4" = type { i8 }
+%"struct.std::__1::atomic.5" = type { %"struct.std::__1::__atomic_base.6" }
+%"struct.std::__1::__atomic_base.6" = type { %"struct.std::__1::__atomic_base.7" }
+%"struct.std::__1::__atomic_base.7" = type { %"struct.std::__1::__cxx_atomic_impl.8" }
+%"struct.std::__1::__cxx_atomic_impl.8" = type { %"struct.std::__1::__cxx_atomic_base_impl.9" }
+%"struct.std::__1::__cxx_atomic_base_impl.9" = type { i8 }
+%"struct.std::__1::atomic.10" = type { %"struct.std::__1::__atomic_base.11" }
+%"struct.std::__1::__atomic_base.11" = type { %"struct.std::__1::__atomic_base.12" }
+%"struct.std::__1::__atomic_base.12" = type { %"struct.std::__1::__cxx_atomic_impl.13" }
+%"struct.std::__1::__cxx_atomic_impl.13" = type { %"struct.std::__1::__cxx_atomic_base_impl.14" }
+%"struct.std::__1::__cxx_atomic_base_impl.14" = type { i16 }
+%"struct.std::__1::atomic.15" = type { %"struct.std::__1::__atomic_base.16" }
+%"struct.std::__1::__atomic_base.16" = type { %"struct.std::__1::__atomic_base.17" }
+%"struct.std::__1::__atomic_base.17" = type { %"struct.std::__1::__cxx_atomic_impl.18" }
+%"struct.std::__1::__cxx_atomic_impl.18" = type { %"struct.std::__1::__cxx_atomic_base_impl.19" }
+%"struct.std::__1::__cxx_atomic_base_impl.19" = type { i16 }
+%"struct.std::__1::atomic.20" = type { %"struct.std::__1::__atomic_base.21" }
+%"struct.std::__1::__atomic_base.21" = type { %"struct.std::__1::__atomic_base.22" }
+%"struct.std::__1::__atomic_base.22" = type { %"struct.std::__1::__cxx_atomic_impl.23" }
+%"struct.std::__1::__cxx_atomic_impl.23" = type { %"struct.std::__1::__cxx_atomic_base_impl.24" }
+%"struct.std::__1::__cxx_atomic_base_impl.24" = type { i32 }
+%"struct.std::__1::atomic.25" = type { %"struct.std::__1::__atomic_base.26" }
+%"struct.std::__1::__atomic_base.26" = type { %"struct.std::__1::__atomic_base.27" }
+%"struct.std::__1::__atomic_base.27" = type { %"struct.std::__1::__cxx_atomic_impl.28" }
+%"struct.std::__1::__cxx_atomic_impl.28" = type { %"struct.std::__1::__cxx_atomic_base_impl.29" }
+%"struct.std::__1::__cxx_atomic_base_impl.29" = type { i32 }
+%"struct.std::__1::atomic.30" = type { %"struct.std::__1::__atomic_base.31" }
+%"struct.std::__1::__atomic_base.31" = type { %"struct.std::__1::__atomic_base.32" }
+%"struct.std::__1::__atomic_base.32" = type { %"struct.std::__1::__cxx_atomic_impl.33" }
+%"struct.std::__1::__cxx_atomic_impl.33" = type { %"struct.std::__1::__cxx_atomic_base_impl.34" }
+%"struct.std::__1::__cxx_atomic_base_impl.34" = type { i64 }
+%"struct.std::__1::atomic.35" = type { %"struct.std::__1::__atomic_base.36" }
+%"struct.std::__1::__atomic_base.36" = type { %"struct.std::__1::__atomic_base.37" }
+%"struct.std::__1::__atomic_base.37" = type { %"struct.std::__1::__cxx_atomic_impl.38" }
+%"struct.std::__1::__cxx_atomic_impl.38" = type { %"struct.std::__1::__cxx_atomic_base_impl.39" }
+%"struct.std::__1::__cxx_atomic_base_impl.39" = type { i64 }
+%"struct.std::__1::atomic.40" = type { %"struct.std::__1::__atomic_base.41" }
+%"struct.std::__1::__atomic_base.41" = type { %"struct.std::__1::__atomic_base.42" }
+%"struct.std::__1::__atomic_base.42" = type { %"struct.std::__1::__cxx_atomic_impl.43" }
+%"struct.std::__1::__cxx_atomic_impl.43" = type { %"struct.std::__1::__cxx_atomic_base_impl.44" }
+%"struct.std::__1::__cxx_atomic_base_impl.44" = type { i128 }
+%"struct.std::__1::atomic.45" = type { %"struct.std::__1::__atomic_base.46" }
+%"struct.std::__1::__atomic_base.46" = type { %"struct.std::__1::__atomic_base.47" }
+%"struct.std::__1::__atomic_base.47" = type { %"struct.std::__1::__cxx_atomic_impl.48" }
+%"struct.std::__1::__cxx_atomic_impl.48" = type { %"struct.std::__1::__cxx_atomic_base_impl.49" }
+%"struct.std::__1::__cxx_atomic_base_impl.49" = type { i128 }
+
+ at gv_i1 = global %"struct.std::__1::atomic" zeroinitializer, align 4
+ at gv_i8 = global %"struct.std::__1::atomic.0" zeroinitializer, align 4
+ at gv_u8 = global %"struct.std::__1::atomic.5" zeroinitializer, align 4
+ at gv_i16 = global %"struct.std::__1::atomic.10" zeroinitializer, align 4
+ at gv_u16 = global %"struct.std::__1::atomic.15" zeroinitializer, align 4
+ at gv_i32 = global %"struct.std::__1::atomic.20" zeroinitializer, align 4
+ at gv_u32 = global %"struct.std::__1::atomic.25" zeroinitializer, align 4
+ at gv_i64 = global %"struct.std::__1::atomic.30" zeroinitializer, align 8
+ at gv_u64 = global %"struct.std::__1::atomic.35" zeroinitializer, align 8
+ at gv_i128 = global %"struct.std::__1::atomic.40" zeroinitializer, align 16
+ at gv_u128 = global %"struct.std::__1::atomic.45" zeroinitializer, align 16
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z26atomic_cmp_swap_relaxed_i1RNSt3__16atomicIbEERbb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i1 zeroext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_relaxed_i1RNSt3__16atomicIbEERbb:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (56)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = zext i1 %2 to i8
+  %5 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %0, i64 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i8, i8* %1, align 1
+  %7 = cmpxchg weak i8* %5, i8 %6, i8 %4 monotonic monotonic
+  %8 = extractvalue { i8, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %3
+  %10 = extractvalue { i8, i1 } %7, 0
+  store i8 %10, i8* %1, align 1
+  br label %11
+
+11:                                               ; preds = %3, %9
+  ret i1 %8
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z26atomic_cmp_swap_relaxed_i8RNSt3__16atomicIcEERcc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i8 signext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_relaxed_i8RNSt3__16atomicIcEERcc:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    sla.w.sx %s5, (56)0, %s0
+; CHECK-NEXT:    ldl.sx %s6, (, %s4)
+; CHECK-NEXT:    and %s2, %s2, (56)0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s5, %s6
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i8, i8* %1, align 1
+  %6 = cmpxchg weak i8* %4, i8 %5, i8 %2 monotonic monotonic
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %1, align 1
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i8
+  ret i8 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z26atomic_cmp_swap_relaxed_u8RNSt3__16atomicIhEERhh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i8 zeroext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_relaxed_u8RNSt3__16atomicIhEERhh:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (56)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i8, i8* %1, align 1
+  %6 = cmpxchg weak i8* %4, i8 %5, i8 %2 monotonic monotonic
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %1, align 1
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i8
+  ret i8 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z27atomic_cmp_swap_relaxed_i16RNSt3__16atomicIsEERss(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16* nocapture nonnull align 2 dereferenceable(2) %1, i16 signext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_relaxed_i16RNSt3__16atomicIsEERss:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    sla.w.sx %s5, (48)0, %s0
+; CHECK-NEXT:    ldl.sx %s6, (, %s4)
+; CHECK-NEXT:    and %s2, %s2, (48)0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s5, %s6
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i16, i16* %1, align 2
+  %6 = cmpxchg weak i16* %4, i16 %5, i16 %2 monotonic monotonic
+  %7 = extractvalue { i16, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i16, i1 } %6, 0
+  store i16 %9, i16* %1, align 2
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i16
+  ret i16 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z27atomic_cmp_swap_relaxed_u16RNSt3__16atomicItEERtt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16* nocapture nonnull align 2 dereferenceable(2) %1, i16 zeroext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_relaxed_u16RNSt3__16atomicItEERtt:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (48)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i16, i16* %1, align 2
+  %6 = cmpxchg weak i16* %4, i16 %5, i16 %2 monotonic monotonic
+  %7 = extractvalue { i16, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i16, i1 } %6, 0
+  store i16 %9, i16* %1, align 2
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i16
+  ret i16 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z27atomic_cmp_swap_relaxed_i32RNSt3__16atomicIiEERii(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32* nocapture nonnull align 4 dereferenceable(4) %1, i32 signext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_relaxed_i32RNSt3__16atomicIiEERii:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s1)
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s0, (63)0, %s4
+; CHECK-NEXT:    breq.w %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i32, i32* %1, align 4
+  %6 = cmpxchg weak i32* %4, i32 %5, i32 %2 monotonic monotonic
+  %7 = extractvalue { i32, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i32, i1 } %6, 0
+  store i32 %9, i32* %1, align 4
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i32
+  ret i32 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z27atomic_cmp_swap_relaxed_u32RNSt3__16atomicIjEERjj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32* nocapture nonnull align 4 dereferenceable(4) %1, i32 zeroext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_relaxed_u32RNSt3__16atomicIjEERjj:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s1)
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s0, (63)0, %s4
+; CHECK-NEXT:    breq.w %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i32, i32* %1, align 4
+  %6 = cmpxchg weak i32* %4, i32 %5, i32 %2 monotonic monotonic
+  %7 = extractvalue { i32, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i32, i1 } %6, 0
+  store i32 %9, i32* %1, align 4
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i32
+  ret i32 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z27atomic_cmp_swap_relaxed_i64RNSt3__16atomicIlEERll(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64* nocapture nonnull align 8 dereferenceable(8) %1, i64 %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_relaxed_i64RNSt3__16atomicIlEERll:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s1)
+; CHECK-NEXT:    cas.l %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.l %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s0, (63)0, %s4
+; CHECK-NEXT:    breq.l %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i64, i64* %1, align 8
+  %6 = cmpxchg weak i64* %4, i64 %5, i64 %2 monotonic monotonic
+  %7 = extractvalue { i64, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i64, i1 } %6, 0
+  store i64 %9, i64* %1, align 8
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i64
+  ret i64 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z27atomic_cmp_swap_relaxed_u64RNSt3__16atomicImEERmm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64* nocapture nonnull align 8 dereferenceable(8) %1, i64 %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_relaxed_u64RNSt3__16atomicImEERmm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s1)
+; CHECK-NEXT:    cas.l %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.l %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s0, (63)0, %s4
+; CHECK-NEXT:    breq.l %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i64, i64* %1, align 8
+  %6 = cmpxchg weak i64* %4, i64 %5, i64 %2 monotonic monotonic
+  %7 = extractvalue { i64, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i64, i1 } %6, 0
+  store i64 %9, i64* %1, align 8
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i64
+  ret i64 %11
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_cmp_swap_relaxed_i128RNSt3__16atomicInEERnn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128* nonnull align 16 dereferenceable(16) %1, i128 %2) {
+; CHECK-LABEL: _Z28atomic_cmp_swap_relaxed_i128RNSt3__16atomicInEERnn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s1
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    st %s3, 248(, %s11)
+; CHECK-NEXT:    st %s2, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s5, 0, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %2, i128* %4, align 16, !tbaa !2
+  %6 = bitcast %"struct.std::__1::atomic.40"* %0 to i8*
+  %7 = bitcast i128* %1 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %6, i8* nonnull %7, i8* nonnull %5, i32 signext 0, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  %9 = zext i1 %8 to i128
+  ret i128 %9
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_cmp_swap_relaxed_u128RNSt3__16atomicIoEERoo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128* nonnull align 16 dereferenceable(16) %1, i128 %2) {
+; CHECK-LABEL: _Z28atomic_cmp_swap_relaxed_u128RNSt3__16atomicIoEERoo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s1
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    st %s3, 248(, %s11)
+; CHECK-NEXT:    st %s2, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s5, 0, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %2, i128* %4, align 16, !tbaa !2
+  %6 = bitcast %"struct.std::__1::atomic.45"* %0 to i8*
+  %7 = bitcast i128* %1 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %6, i8* nonnull %7, i8* nonnull %5, i32 signext 0, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  %9 = zext i1 %8 to i128
+  ret i128 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z26atomic_cmp_swap_acquire_i1RNSt3__16atomicIbEERbb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i1 zeroext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_acquire_i1RNSt3__16atomicIbEERbb:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (56)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = zext i1 %2 to i8
+  %5 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %0, i64 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i8, i8* %1, align 1
+  %7 = cmpxchg weak i8* %5, i8 %6, i8 %4 acquire acquire
+  %8 = extractvalue { i8, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %3
+  %10 = extractvalue { i8, i1 } %7, 0
+  store i8 %10, i8* %1, align 1
+  br label %11
+
+11:                                               ; preds = %3, %9
+  ret i1 %8
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z26atomic_cmp_swap_acquire_i8RNSt3__16atomicIcEERcc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i8 signext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_acquire_i8RNSt3__16atomicIcEERcc:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    sla.w.sx %s5, (56)0, %s0
+; CHECK-NEXT:    ldl.sx %s6, (, %s4)
+; CHECK-NEXT:    and %s2, %s2, (56)0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s5, %s6
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i8, i8* %1, align 1
+  %6 = cmpxchg weak i8* %4, i8 %5, i8 %2 acquire acquire
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %1, align 1
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i8
+  ret i8 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z26atomic_cmp_swap_acquire_u8RNSt3__16atomicIhEERhh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i8 zeroext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_acquire_u8RNSt3__16atomicIhEERhh:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (56)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i8, i8* %1, align 1
+  %6 = cmpxchg weak i8* %4, i8 %5, i8 %2 acquire acquire
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %1, align 1
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i8
+  ret i8 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z27atomic_cmp_swap_acquire_i16RNSt3__16atomicIsEERss(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16* nocapture nonnull align 2 dereferenceable(2) %1, i16 signext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_acquire_i16RNSt3__16atomicIsEERss:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    sla.w.sx %s5, (48)0, %s0
+; CHECK-NEXT:    ldl.sx %s6, (, %s4)
+; CHECK-NEXT:    and %s2, %s2, (48)0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s5, %s6
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i16, i16* %1, align 2
+  %6 = cmpxchg weak i16* %4, i16 %5, i16 %2 acquire acquire
+  %7 = extractvalue { i16, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i16, i1 } %6, 0
+  store i16 %9, i16* %1, align 2
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i16
+  ret i16 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z27atomic_cmp_swap_acquire_u16RNSt3__16atomicItEERtt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16* nocapture nonnull align 2 dereferenceable(2) %1, i16 zeroext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_acquire_u16RNSt3__16atomicItEERtt:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s3, (, %s1)
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (48)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i16, i16* %1, align 2
+  %6 = cmpxchg weak i16* %4, i16 %5, i16 %2 acquire acquire
+  %7 = extractvalue { i16, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i16, i1 } %6, 0
+  store i16 %9, i16* %1, align 2
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i16
+  ret i16 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z27atomic_cmp_swap_acquire_i32RNSt3__16atomicIiEERii(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32* nocapture nonnull align 4 dereferenceable(4) %1, i32 signext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_acquire_i32RNSt3__16atomicIiEERii:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s1)
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.w %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i32, i32* %1, align 4
+  %6 = cmpxchg weak i32* %4, i32 %5, i32 %2 acquire acquire
+  %7 = extractvalue { i32, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i32, i1 } %6, 0
+  store i32 %9, i32* %1, align 4
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i32
+  ret i32 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z27atomic_cmp_swap_acquire_u32RNSt3__16atomicIjEERjj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32* nocapture nonnull align 4 dereferenceable(4) %1, i32 zeroext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_acquire_u32RNSt3__16atomicIjEERjj:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s1)
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.w %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i32, i32* %1, align 4
+  %6 = cmpxchg weak i32* %4, i32 %5, i32 %2 acquire acquire
+  %7 = extractvalue { i32, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i32, i1 } %6, 0
+  store i32 %9, i32* %1, align 4
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i32
+  ret i32 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z27atomic_cmp_swap_acquire_i64RNSt3__16atomicIlEERll(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64* nocapture nonnull align 8 dereferenceable(8) %1, i64 %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_acquire_i64RNSt3__16atomicIlEERll:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s1)
+; CHECK-NEXT:    cas.l %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.l %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.l %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i64, i64* %1, align 8
+  %6 = cmpxchg weak i64* %4, i64 %5, i64 %2 acquire acquire
+  %7 = extractvalue { i64, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i64, i1 } %6, 0
+  store i64 %9, i64* %1, align 8
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i64
+  ret i64 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z27atomic_cmp_swap_acquire_u64RNSt3__16atomicImEERmm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64* nocapture nonnull align 8 dereferenceable(8) %1, i64 %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_acquire_u64RNSt3__16atomicImEERmm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s1)
+; CHECK-NEXT:    cas.l %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.l %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    breq.l %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i64, i64* %1, align 8
+  %6 = cmpxchg weak i64* %4, i64 %5, i64 %2 acquire acquire
+  %7 = extractvalue { i64, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i64, i1 } %6, 0
+  store i64 %9, i64* %1, align 8
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i64
+  ret i64 %11
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_cmp_swap_acquire_i128RNSt3__16atomicInEERnn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128* nonnull align 16 dereferenceable(16) %1, i128 %2) {
+; CHECK-LABEL: _Z28atomic_cmp_swap_acquire_i128RNSt3__16atomicInEERnn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s1
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    st %s3, 248(, %s11)
+; CHECK-NEXT:    st %s2, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 2, (0)1
+; CHECK-NEXT:    or %s5, 2, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %2, i128* %4, align 16, !tbaa !2
+  %6 = bitcast %"struct.std::__1::atomic.40"* %0 to i8*
+  %7 = bitcast i128* %1 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %6, i8* nonnull %7, i8* nonnull %5, i32 signext 2, i32 signext 2)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  %9 = zext i1 %8 to i128
+  ret i128 %9
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_cmp_swap_acquire_u128RNSt3__16atomicIoEERoo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128* nonnull align 16 dereferenceable(16) %1, i128 %2) {
+; CHECK-LABEL: _Z28atomic_cmp_swap_acquire_u128RNSt3__16atomicIoEERoo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s1
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    st %s3, 248(, %s11)
+; CHECK-NEXT:    st %s2, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 2, (0)1
+; CHECK-NEXT:    or %s5, 2, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %2, i128* %4, align 16, !tbaa !2
+  %6 = bitcast %"struct.std::__1::atomic.45"* %0 to i8*
+  %7 = bitcast i128* %1 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %6, i8* nonnull %7, i8* nonnull %5, i32 signext 2, i32 signext 2)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  %9 = zext i1 %8 to i128
+  ret i128 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z26atomic_cmp_swap_seq_cst_i1RNSt3__16atomicIbEERbb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i1 zeroext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_seq_cst_i1RNSt3__16atomicIbEERbb:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (56)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = zext i1 %2 to i8
+  %5 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %0, i64 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i8, i8* %1, align 1
+  %7 = cmpxchg weak i8* %5, i8 %6, i8 %4 seq_cst seq_cst
+  %8 = extractvalue { i8, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %3
+  %10 = extractvalue { i8, i1 } %7, 0
+  store i8 %10, i8* %1, align 1
+  br label %11
+
+11:                                               ; preds = %3, %9
+  ret i1 %8
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z26atomic_cmp_swap_seq_cst_i8RNSt3__16atomicIcEERcc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i8 signext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_seq_cst_i8RNSt3__16atomicIcEERcc:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    sla.w.sx %s5, (56)0, %s0
+; CHECK-NEXT:    ldl.sx %s6, (, %s4)
+; CHECK-NEXT:    and %s2, %s2, (56)0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s5, %s6
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i8, i8* %1, align 1
+  %6 = cmpxchg weak i8* %4, i8 %5, i8 %2 seq_cst seq_cst
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %1, align 1
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i8
+  ret i8 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z26atomic_cmp_swap_seq_cst_u8RNSt3__16atomicIhEERhh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8* nocapture nonnull align 1 dereferenceable(1) %1, i8 zeroext %2) {
+; CHECK-LABEL: _Z26atomic_cmp_swap_seq_cst_u8RNSt3__16atomicIhEERhh:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (56)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i8, i8* %1, align 1
+  %6 = cmpxchg weak i8* %4, i8 %5, i8 %2 seq_cst seq_cst
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %1, align 1
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i8
+  ret i8 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z27atomic_cmp_swap_seq_cst_i16RNSt3__16atomicIsEERss(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16* nocapture nonnull align 2 dereferenceable(2) %1, i16 signext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_seq_cst_i16RNSt3__16atomicIsEERss:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    sla.w.sx %s5, (48)0, %s0
+; CHECK-NEXT:    ldl.sx %s6, (, %s4)
+; CHECK-NEXT:    and %s2, %s2, (48)0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s5, %s6
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i16, i16* %1, align 2
+  %6 = cmpxchg weak i16* %4, i16 %5, i16 %2 seq_cst seq_cst
+  %7 = extractvalue { i16, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i16, i1 } %6, 0
+  store i16 %9, i16* %1, align 2
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i16
+  ret i16 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z27atomic_cmp_swap_seq_cst_u16RNSt3__16atomicItEERtt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16* nocapture nonnull align 2 dereferenceable(2) %1, i16 zeroext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_seq_cst_u16RNSt3__16atomicItEERtt:
+; CHECK:       # %bb.0: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s4, -4, %s0
+; CHECK-NEXT:    and %s0, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s0, %s0, 3
+; CHECK-NEXT:    ldl.sx %s5, (, %s4)
+; CHECK-NEXT:    sla.w.sx %s6, (48)0, %s0
+; CHECK-NEXT:    sla.w.sx %s2, %s2, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s3, %s0
+; CHECK-NEXT:    nnd %s5, %s6, %s5
+; CHECK-NEXT:    or %s2, %s5, %s2
+; CHECK-NEXT:    or %s5, %s5, %s3
+; CHECK-NEXT:    cas.w %s2, (%s4), %s5
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s5
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s3, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.w %s2, %s5, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    srl %s0, %s2, %s0
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s3, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i16, i16* %1, align 2
+  %6 = cmpxchg weak i16* %4, i16 %5, i16 %2 seq_cst seq_cst
+  %7 = extractvalue { i16, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i16, i1 } %6, 0
+  store i16 %9, i16* %1, align 2
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i16
+  ret i16 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z27atomic_cmp_swap_seq_cst_i32RNSt3__16atomicIiEERii(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32* nocapture nonnull align 4 dereferenceable(4) %1, i32 signext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_seq_cst_i32RNSt3__16atomicIiEERii:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.w %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i32, i32* %1, align 4
+  %6 = cmpxchg weak i32* %4, i32 %5, i32 %2 seq_cst seq_cst
+  %7 = extractvalue { i32, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i32, i1 } %6, 0
+  store i32 %9, i32* %1, align 4
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i32
+  ret i32 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z27atomic_cmp_swap_seq_cst_u32RNSt3__16atomicIjEERjj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32* nocapture nonnull align 4 dereferenceable(4) %1, i32 zeroext %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_seq_cst_u32RNSt3__16atomicIjEERjj:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    cas.w %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.w %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i32, i32* %1, align 4
+  %6 = cmpxchg weak i32* %4, i32 %5, i32 %2 seq_cst seq_cst
+  %7 = extractvalue { i32, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i32, i1 } %6, 0
+  store i32 %9, i32* %1, align 4
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i32
+  ret i32 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z27atomic_cmp_swap_seq_cst_i64RNSt3__16atomicIlEERll(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64* nocapture nonnull align 8 dereferenceable(8) %1, i64 %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_seq_cst_i64RNSt3__16atomicIlEERll:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    cas.l %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.l %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.l %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i64, i64* %1, align 8
+  %6 = cmpxchg weak i64* %4, i64 %5, i64 %2 seq_cst seq_cst
+  %7 = extractvalue { i64, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i64, i1 } %6, 0
+  store i64 %9, i64* %1, align 8
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i64
+  ret i64 %11
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z27atomic_cmp_swap_seq_cst_u64RNSt3__16atomicImEERmm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64* nocapture nonnull align 8 dereferenceable(8) %1, i64 %2) {
+; CHECK-LABEL: _Z27atomic_cmp_swap_seq_cst_u64RNSt3__16atomicImEERmm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s1)
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    cas.l %s2, (%s0), %s3
+; CHECK-NEXT:    cmps.l %s4, %s2, %s3
+; CHECK-NEXT:    or %s0, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s0, (63)0, %s4
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    breq.l %s2, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s2, (, %s1)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = load i64, i64* %1, align 8
+  %6 = cmpxchg weak i64* %4, i64 %5, i64 %2 seq_cst seq_cst
+  %7 = extractvalue { i64, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %3
+  %9 = extractvalue { i64, i1 } %6, 0
+  store i64 %9, i64* %1, align 8
+  br label %10
+
+10:                                               ; preds = %3, %8
+  %11 = zext i1 %7 to i64
+  ret i64 %11
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_cmp_swap_seq_cst_i128RNSt3__16atomicInEERnn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128* nonnull align 16 dereferenceable(16) %1, i128 %2) {
+; CHECK-LABEL: _Z28atomic_cmp_swap_seq_cst_i128RNSt3__16atomicInEERnn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s1
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    st %s3, 248(, %s11)
+; CHECK-NEXT:    st %s2, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 5, (0)1
+; CHECK-NEXT:    or %s5, 5, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %2, i128* %4, align 16, !tbaa !2
+  %6 = bitcast %"struct.std::__1::atomic.40"* %0 to i8*
+  %7 = bitcast i128* %1 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %6, i8* nonnull %7, i8* nonnull %5, i32 signext 5, i32 signext 5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  %9 = zext i1 %8 to i128
+  ret i128 %9
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_cmp_swap_seq_cst_u128RNSt3__16atomicIoEERoo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128* nonnull align 16 dereferenceable(16) %1, i128 %2) {
+; CHECK-LABEL: _Z28atomic_cmp_swap_seq_cst_u128RNSt3__16atomicIoEERoo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s1
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    st %s3, 248(, %s11)
+; CHECK-NEXT:    st %s2, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 5, (0)1
+; CHECK-NEXT:    or %s5, 5, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %2, i128* %4, align 16, !tbaa !2
+  %6 = bitcast %"struct.std::__1::atomic.45"* %0 to i8*
+  %7 = bitcast i128* %1 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %6, i8* nonnull %7, i8* nonnull %5, i32 signext 5, i32 signext 5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  %9 = zext i1 %8 to i128
+  ret i128 %9
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i1 @_Z30atomic_cmp_swap_relaxed_stk_i1Rbb(i8* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_stk_i1Rbb:
+; CHECK:       .LBB{{[0-9]+}}_4: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s2, (, %s0)
+; CHECK-NEXT:    ldl.zx %s3, 8(, %s11)
+; CHECK-NEXT:    lea %s4, -256
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    and %s3, %s3, %s4
+; CHECK-NEXT:    or %s1, %s3, %s1
+; CHECK-NEXT:    or %s3, %s3, %s2
+; CHECK-NEXT:    cas.w %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st1b %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic", align 1
+  %4 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %3, i64 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %4)
+  %5 = zext i1 %1 to i8
+  %6 = load i8, i8* %0, align 1
+  %7 = cmpxchg weak volatile i8* %4, i8 %6, i8 %5 monotonic monotonic
+  %8 = extractvalue { i8, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %2
+  %10 = extractvalue { i8, i1 } %7, 0
+  store i8 %10, i8* %0, align 1
+  br label %11
+
+11:                                               ; preds = %2, %9
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %4)
+  ret i1 %8
+}
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: nofree nounwind mustprogress
+define signext i8 @_Z30atomic_cmp_swap_relaxed_stk_i8Rcc(i8* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_stk_i8Rcc:
+; CHECK:       .LBB{{[0-9]+}}_4: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s2, (, %s0)
+; CHECK-NEXT:    ldl.zx %s3, 8(, %s11)
+; CHECK-NEXT:    and %s1, %s1, (56)0
+; CHECK-NEXT:    lea %s4, -256
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    and %s3, %s3, %s4
+; CHECK-NEXT:    or %s1, %s3, %s1
+; CHECK-NEXT:    or %s3, %s3, %s2
+; CHECK-NEXT:    cas.w %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st1b %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.0", align 1
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %4)
+  %5 = load i8, i8* %0, align 1
+  %6 = cmpxchg weak volatile i8* %4, i8 %5, i8 %1 monotonic monotonic
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %2
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %0, align 1
+  br label %10
+
+10:                                               ; preds = %2, %8
+  %11 = zext i1 %7 to i8
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %4)
+  ret i8 %11
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i8 @_Z30atomic_cmp_swap_relaxed_stk_u8Rhh(i8* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_stk_u8Rhh:
+; CHECK:       .LBB{{[0-9]+}}_4: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld1b.zx %s2, (, %s0)
+; CHECK-NEXT:    ldl.zx %s3, 8(, %s11)
+; CHECK-NEXT:    lea %s4, -256
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    and %s3, %s3, %s4
+; CHECK-NEXT:    or %s1, %s3, %s1
+; CHECK-NEXT:    or %s3, %s3, %s2
+; CHECK-NEXT:    cas.w %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st1b %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.5", align 1
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %4)
+  %5 = load i8, i8* %0, align 1
+  %6 = cmpxchg weak volatile i8* %4, i8 %5, i8 %1 monotonic monotonic
+  %7 = extractvalue { i8, i1 } %6, 1
+  br i1 %7, label %10, label %8
+
+8:                                                ; preds = %2
+  %9 = extractvalue { i8, i1 } %6, 0
+  store i8 %9, i8* %0, align 1
+  br label %10
+
+10:                                               ; preds = %2, %8
+  %11 = zext i1 %7 to i8
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %4)
+  ret i8 %11
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define signext i16 @_Z31atomic_cmp_swap_relaxed_stk_i16Rss(i16* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_stk_i16Rss:
+; CHECK:       .LBB{{[0-9]+}}_4: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s2, (, %s0)
+; CHECK-NEXT:    ldl.zx %s3, 8(, %s11)
+; CHECK-NEXT:    and %s1, %s1, (48)0
+; CHECK-NEXT:    lea %s4, -65536
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    and %s3, %s3, %s4
+; CHECK-NEXT:    or %s1, %s3, %s1
+; CHECK-NEXT:    or %s3, %s3, %s2
+; CHECK-NEXT:    cas.w %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st2b %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.10", align 2
+  %4 = bitcast %"struct.std::__1::atomic.10"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %4)
+  %5 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i16, i16* %0, align 2
+  %7 = cmpxchg weak volatile i16* %5, i16 %6, i16 %1 monotonic monotonic
+  %8 = extractvalue { i16, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %2
+  %10 = extractvalue { i16, i1 } %7, 0
+  store i16 %10, i16* %0, align 2
+  br label %11
+
+11:                                               ; preds = %2, %9
+  %12 = zext i1 %8 to i16
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %4)
+  ret i16 %12
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i16 @_Z31atomic_cmp_swap_relaxed_stk_u16Rtt(i16* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_stk_u16Rtt:
+; CHECK:       .LBB{{[0-9]+}}_4: # %partword.cmpxchg.loop
+; CHECK-NEXT:    ld2b.zx %s2, (, %s0)
+; CHECK-NEXT:    ldl.zx %s3, 8(, %s11)
+; CHECK-NEXT:    lea %s4, -65536
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    and %s3, %s3, %s4
+; CHECK-NEXT:    or %s1, %s3, %s1
+; CHECK-NEXT:    or %s3, %s3, %s2
+; CHECK-NEXT:    cas.w %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st2b %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.15", align 2
+  %4 = bitcast %"struct.std::__1::atomic.15"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %4)
+  %5 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i16, i16* %0, align 2
+  %7 = cmpxchg weak volatile i16* %5, i16 %6, i16 %1 monotonic monotonic
+  %8 = extractvalue { i16, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %2
+  %10 = extractvalue { i16, i1 } %7, 0
+  store i16 %10, i16* %0, align 2
+  br label %11
+
+11:                                               ; preds = %2, %9
+  %12 = zext i1 %8 to i16
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %4)
+  ret i16 %12
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define signext i32 @_Z31atomic_cmp_swap_relaxed_stk_i32Rii(i32* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_stk_i32Rii:
+; CHECK:       .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:    ldl.sx %s3, (, %s0)
+; CHECK-NEXT:    cas.w %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.20", align 4
+  %4 = bitcast %"struct.std::__1::atomic.20"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %4)
+  %5 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i32, i32* %0, align 4
+  %7 = cmpxchg weak volatile i32* %5, i32 %6, i32 %1 monotonic monotonic
+  %8 = extractvalue { i32, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %2
+  %10 = extractvalue { i32, i1 } %7, 0
+  store i32 %10, i32* %0, align 4
+  br label %11
+
+11:                                               ; preds = %2, %9
+  %12 = zext i1 %8 to i32
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %4)
+  ret i32 %12
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i32 @_Z31atomic_cmp_swap_relaxed_stk_u32Rjj(i32* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_stk_u32Rjj:
+; CHECK:       .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:    ldl.sx %s3, (, %s0)
+; CHECK-NEXT:    cas.w %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.25", align 4
+  %4 = bitcast %"struct.std::__1::atomic.25"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %4)
+  %5 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i32, i32* %0, align 4
+  %7 = cmpxchg weak volatile i32* %5, i32 %6, i32 %1 monotonic monotonic
+  %8 = extractvalue { i32, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %2
+  %10 = extractvalue { i32, i1 } %7, 0
+  store i32 %10, i32* %0, align 4
+  br label %11
+
+11:                                               ; preds = %2, %9
+  %12 = zext i1 %8 to i32
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %4)
+  ret i32 %12
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define i64 @_Z31atomic_cmp_swap_relaxed_stk_i64Rll(i64* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_stk_i64Rll:
+; CHECK:       .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:    ld %s3, (, %s0)
+; CHECK-NEXT:    cas.l %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.l %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.l %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.30", align 8
+  %4 = bitcast %"struct.std::__1::atomic.30"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4)
+  %5 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i64, i64* %0, align 8
+  %7 = cmpxchg weak volatile i64* %5, i64 %6, i64 %1 monotonic monotonic
+  %8 = extractvalue { i64, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %2
+  %10 = extractvalue { i64, i1 } %7, 0
+  store i64 %10, i64* %0, align 8
+  br label %11
+
+11:                                               ; preds = %2, %9
+  %12 = zext i1 %8 to i64
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4)
+  ret i64 %12
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define i64 @_Z31atomic_cmp_swap_relaxed_stk_u64Rmm(i64* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_stk_u64Rmm:
+; CHECK:       .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:    ld %s3, (, %s0)
+; CHECK-NEXT:    cas.l %s1, 8(%s11), %s3
+; CHECK-NEXT:    cmps.l %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.l %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = alloca %"struct.std::__1::atomic.35", align 8
+  %4 = bitcast %"struct.std::__1::atomic.35"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4)
+  %5 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %3, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %6 = load i64, i64* %0, align 8
+  %7 = cmpxchg weak volatile i64* %5, i64 %6, i64 %1 monotonic monotonic
+  %8 = extractvalue { i64, i1 } %7, 1
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %2
+  %10 = extractvalue { i64, i1 } %7, 0
+  store i64 %10, i64* %0, align 8
+  br label %11
+
+11:                                               ; preds = %2, %9
+  %12 = zext i1 %8 to i64
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4)
+  ret i64 %12
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z32atomic_cmp_swap_relaxed_stk_i128Rnn(i128* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z32atomic_cmp_swap_relaxed_stk_i128Rnn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s3, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s5, 0, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca %"struct.std::__1::atomic.40", align 16
+  %5 = bitcast %"struct.std::__1::atomic.40"* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast i128* %0 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %5, i8* nonnull %7, i8* nonnull %6, i32 signext 0, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  %9 = zext i1 %8 to i128
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  ret i128 %9
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z32atomic_cmp_swap_relaxed_stk_u128Roo(i128* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z32atomic_cmp_swap_relaxed_stk_u128Roo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s3, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s5, 0, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca %"struct.std::__1::atomic.45", align 16
+  %5 = bitcast %"struct.std::__1::atomic.45"* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast i128* %0 to i8*
+  %8 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull %5, i8* nonnull %7, i8* nonnull %6, i32 signext 0, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  %9 = zext i1 %8 to i128
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  ret i128 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z29atomic_cmp_swap_relaxed_gv_i1Rbb(i8* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
+; CHECK-LABEL: _Z29atomic_cmp_swap_relaxed_gv_i1Rbb:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, %s1, (32)0
+; CHECK-NEXT:    lea %s1, gv_i1 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i1 at hi(, %s1)
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    ldl.zx %s4, (, %s1)
+; CHECK-NEXT:    ld1b.zx %s3, (, %s0)
+; CHECK-NEXT:    lea %s5, -256
+; CHECK-NEXT:    and %s5, %s5, (32)0
+; CHECK-NEXT:    and %s4, %s4, %s5
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    or %s2, %s4, %s2
+; CHECK-NEXT:    or %s3, %s4, %s3
+; CHECK-NEXT:    cas.w %s2, (%s1), %s3
+; CHECK-NEXT:    cmps.w.sx %s3, %s2, %s3
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s1, (63)0, %s3
+; CHECK-NEXT:    brne.w 0, %s1, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st1b %s2, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s1, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = zext i1 %1 to i8
+  %4 = load i8, i8* %0, align 1
+  %5 = cmpxchg weak i8* getelementptr inbounds (%"struct.std::__1::atomic", %"struct.std::__1::atomic"* @gv_i1, i64 0, i32 0, i32 0, i32 0, i32 0), i8 %4, i8 %3 monotonic monotonic
+  %6 = extractvalue { i8, i1 } %5, 1
+  br i1 %6, label %9, label %7
+
+7:                                                ; preds = %2
+  %8 = extractvalue { i8, i1 } %5, 0
+  store i8 %8, i8* %0, align 1
+  br label %9
+
+9:                                                ; preds = %2, %7
+  ret i1 %6
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z29atomic_cmp_swap_relaxed_gv_i8Rcc(i8* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
+; CHECK-LABEL: _Z29atomic_cmp_swap_relaxed_gv_i8Rcc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld1b.zx %s2, (, %s0)
+; CHECK-NEXT:    and %s3, %s1, (56)0
+; CHECK-NEXT:    lea %s1, gv_i8 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i8 at hi(, %s1)
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    ldl.zx %s4, (, %s1)
+; CHECK-NEXT:    and %s3, %s3, (32)0
+; CHECK-NEXT:    lea %s5, -256
+; CHECK-NEXT:    and %s5, %s5, (32)0
+; CHECK-NEXT:    and %s4, %s4, %s5
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    or %s3, %s4, %s3
+; CHECK-NEXT:    or %s2, %s4, %s2
+; CHECK-NEXT:    cas.w %s3, (%s1), %s2
+; CHECK-NEXT:    cmps.w.sx %s2, %s3, %s2
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s1, (63)0, %s2
+; CHECK-NEXT:    brne.w 0, %s1, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st1b %s3, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s1, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i8, i8* %0, align 1
+  %4 = cmpxchg weak i8* getelementptr inbounds (%"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* @gv_i8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 %3, i8 %1 monotonic monotonic
+  %5 = extractvalue { i8, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i8, i1 } %4, 0
+  store i8 %7, i8* %0, align 1
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i8
+  ret i8 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z29atomic_cmp_swap_relaxed_gv_u8Rhh(i8* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
+; CHECK-LABEL: _Z29atomic_cmp_swap_relaxed_gv_u8Rhh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, %s1, (32)0
+; CHECK-NEXT:    lea %s1, gv_u8 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u8 at hi(, %s1)
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    ldl.zx %s4, (, %s1)
+; CHECK-NEXT:    ld1b.zx %s3, (, %s0)
+; CHECK-NEXT:    lea %s5, -256
+; CHECK-NEXT:    and %s5, %s5, (32)0
+; CHECK-NEXT:    and %s4, %s4, %s5
+; CHECK-NEXT:    and %s4, %s4, (32)0
+; CHECK-NEXT:    or %s2, %s4, %s2
+; CHECK-NEXT:    or %s3, %s4, %s3
+; CHECK-NEXT:    cas.w %s2, (%s1), %s3
+; CHECK-NEXT:    cmps.w.sx %s3, %s2, %s3
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s1, (63)0, %s3
+; CHECK-NEXT:    brne.w 0, %s1, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st1b %s2, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s1, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i8, i8* %0, align 1
+  %4 = cmpxchg weak i8* getelementptr inbounds (%"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* @gv_u8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 %3, i8 %1 monotonic monotonic
+  %5 = extractvalue { i8, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i8, i1 } %4, 0
+  store i8 %7, i8* %0, align 1
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i8
+  ret i8 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z30atomic_cmp_swap_relaxed_gv_i16Rss(i16* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_gv_i16Rss:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s2, gv_i16 at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s2, gv_i16 at hi(, %s2)
+; CHECK-NEXT:    and %s2, -4, %s2
+; CHECK-NEXT:    ld2b.zx %s4, 2(, %s2)
+; CHECK-NEXT:    ld2b.zx %s3, (, %s0)
+; CHECK-NEXT:    and %s1, %s1, (48)0
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    sla.w.sx %s4, %s4, 16
+; CHECK-NEXT:    or %s1, %s4, %s1
+; CHECK-NEXT:    or %s3, %s4, %s3
+; CHECK-NEXT:    cas.w %s1, (%s2), %s3
+; CHECK-NEXT:    cmps.w.sx %s3, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s3
+; CHECK-NEXT:    brne.w 0, %s2, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st2b %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i16, i16* %0, align 2
+  %4 = cmpxchg weak i16* getelementptr inbounds (%"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* @gv_i16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i16 %3, i16 %1 monotonic monotonic
+  %5 = extractvalue { i16, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i16, i1 } %4, 0
+  store i16 %7, i16* %0, align 2
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i16
+  ret i16 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z30atomic_cmp_swap_relaxed_gv_u16Rtt(i16* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_gv_u16Rtt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s2, gv_u16 at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s2, gv_u16 at hi(, %s2)
+; CHECK-NEXT:    and %s2, -4, %s2
+; CHECK-NEXT:    ld2b.zx %s4, 2(, %s2)
+; CHECK-NEXT:    ld2b.zx %s3, (, %s0)
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    sla.w.sx %s4, %s4, 16
+; CHECK-NEXT:    or %s1, %s4, %s1
+; CHECK-NEXT:    or %s3, %s4, %s3
+; CHECK-NEXT:    cas.w %s1, (%s2), %s3
+; CHECK-NEXT:    cmps.w.sx %s3, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s3
+; CHECK-NEXT:    brne.w 0, %s2, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st2b %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i16, i16* %0, align 2
+  %4 = cmpxchg weak i16* getelementptr inbounds (%"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* @gv_u16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i16 %3, i16 %1 monotonic monotonic
+  %5 = extractvalue { i16, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i16, i1 } %4, 0
+  store i16 %7, i16* %0, align 2
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i16
+  ret i16 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z30atomic_cmp_swap_relaxed_gv_i32Rii(i32* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_gv_i32Rii:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s0)
+; CHECK-NEXT:    lea %s2, gv_i32 at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s2, gv_i32 at hi(, %s2)
+; CHECK-NEXT:    cas.w %s1, (%s2), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i32, i32* %0, align 4
+  %4 = cmpxchg weak i32* getelementptr inbounds (%"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* @gv_i32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i32 %3, i32 %1 monotonic monotonic
+  %5 = extractvalue { i32, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i32, i1 } %4, 0
+  store i32 %7, i32* %0, align 4
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i32
+  ret i32 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z30atomic_cmp_swap_relaxed_gv_u32Rjj(i32* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_gv_u32Rjj:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ldl.sx %s3, (, %s0)
+; CHECK-NEXT:    lea %s2, gv_u32 at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s2, gv_u32 at hi(, %s2)
+; CHECK-NEXT:    cas.w %s1, (%s2), %s3
+; CHECK-NEXT:    cmps.w.sx %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.w.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.w %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    stl %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i32, i32* %0, align 4
+  %4 = cmpxchg weak i32* getelementptr inbounds (%"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* @gv_u32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i32 %3, i32 %1 monotonic monotonic
+  %5 = extractvalue { i32, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i32, i1 } %4, 0
+  store i32 %7, i32* %0, align 4
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i32
+  ret i32 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z30atomic_cmp_swap_relaxed_gv_i64Rll(i64* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_gv_i64Rll:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s0)
+; CHECK-NEXT:    lea %s2, gv_i64 at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s2, gv_i64 at hi(, %s2)
+; CHECK-NEXT:    cas.l %s1, (%s2), %s3
+; CHECK-NEXT:    cmps.l %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.l %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i64, i64* %0, align 8
+  %4 = cmpxchg weak i64* getelementptr inbounds (%"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* @gv_i64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 %3, i64 %1 monotonic monotonic
+  %5 = extractvalue { i64, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i64, i1 } %4, 0
+  store i64 %7, i64* %0, align 8
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i64
+  ret i64 %9
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z30atomic_cmp_swap_relaxed_gv_u64Rmm(i64* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z30atomic_cmp_swap_relaxed_gv_u64Rmm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld %s3, (, %s0)
+; CHECK-NEXT:    lea %s2, gv_u64 at lo
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    lea.sl %s2, gv_u64 at hi(, %s2)
+; CHECK-NEXT:    cas.l %s1, (%s2), %s3
+; CHECK-NEXT:    cmps.l %s4, %s1, %s3
+; CHECK-NEXT:    or %s2, 0, (0)1
+; CHECK-NEXT:    cmov.l.eq %s2, (63)0, %s4
+; CHECK-NEXT:    breq.l %s1, %s3, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    st %s1, (, %s0)
+; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = load i64, i64* %0, align 8
+  %4 = cmpxchg weak i64* getelementptr inbounds (%"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* @gv_u64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 %3, i64 %1 monotonic monotonic
+  %5 = extractvalue { i64, i1 } %4, 1
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %2
+  %7 = extractvalue { i64, i1 } %4, 0
+  store i64 %7, i64* %0, align 8
+  br label %8
+
+8:                                                ; preds = %2, %6
+  %9 = zext i1 %5 to i64
+  ret i64 %9
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z31atomic_cmp_swap_relaxed_gv_i128Rnn(i128* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_gv_i128Rnn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s0
+; CHECK-NEXT:    st %s2, 248(, %s11)
+; CHECK-NEXT:    st %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_i128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i128 at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s5, 0, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %5 = bitcast i128* %0 to i8*
+  %6 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.40"* @gv_i128 to i8*), i8* nonnull %5, i8* nonnull %4, i32 signext 0, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  %7 = zext i1 %6 to i128
+  ret i128 %7
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z31atomic_cmp_swap_relaxed_gv_u128Roo(i128* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z31atomic_cmp_swap_relaxed_gv_u128Roo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s6, 0, %s0
+; CHECK-NEXT:    st %s2, 248(, %s11)
+; CHECK-NEXT:    st %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_compare_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_compare_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_u128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u128 at hi(, %s0)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s5, 0, (0)1
+; CHECK-NEXT:    or %s2, 0, %s6
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s1, 0, (0)1
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %5 = bitcast i128* %0 to i8*
+  %6 = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.45"* @gv_u128 to i8*), i8* nonnull %5, i8* nonnull %4, i32 signext 0, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  %7 = zext i1 %6 to i128
+  ret i128 %7
+}
+
+; Function Attrs: nounwind willreturn
+declare i1 @__atomic_compare_exchange(i64, i8*, i8*, i8*, i32, i32)
+
+!2 = !{!3, !3, i64 0}
+!3 = !{!"__int128", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}

diff  --git a/llvm/test/CodeGen/VE/Scalar/atomic_load.ll b/llvm/test/CodeGen/VE/Scalar/atomic_load.ll
index af9f64de4ab2..a9d3472c3d26 100644
--- a/llvm/test/CodeGen/VE/Scalar/atomic_load.ll
+++ b/llvm/test/CodeGen/VE/Scalar/atomic_load.ll
@@ -5,6 +5,7 @@
 ;;; Note:
 ;;;   We test i1/i8/i16/i32/i64/i128/u8/u16/u32/u64/u128.
 ;;;   We test relaxed, acquire, and seq_cst.
+;;;   We test an object, a stack object, and a global variable.
 
 %"struct.std::__1::atomic" = type { %"struct.std::__1::__atomic_base" }
 %"struct.std::__1::__atomic_base" = type { %"struct.std::__1::__cxx_atomic_impl" }
@@ -61,7 +62,19 @@
 %"struct.std::__1::__cxx_atomic_impl.48" = type { %"struct.std::__1::__cxx_atomic_base_impl.49" }
 %"struct.std::__1::__cxx_atomic_base_impl.49" = type { i128 }
 
-; Function Attrs: nofree norecurse nounwind
+ at gv_i1 = global %"struct.std::__1::atomic" zeroinitializer, align 4
+ at gv_i8 = global %"struct.std::__1::atomic.0" zeroinitializer, align 4
+ at gv_u8 = global %"struct.std::__1::atomic.5" zeroinitializer, align 4
+ at gv_i16 = global %"struct.std::__1::atomic.10" zeroinitializer, align 4
+ at gv_u16 = global %"struct.std::__1::atomic.15" zeroinitializer, align 4
+ at gv_i32 = global %"struct.std::__1::atomic.20" zeroinitializer, align 4
+ at gv_u32 = global %"struct.std::__1::atomic.25" zeroinitializer, align 4
+ at gv_i64 = global %"struct.std::__1::atomic.30" zeroinitializer, align 8
+ at gv_u64 = global %"struct.std::__1::atomic.35" zeroinitializer, align 8
+ at gv_i128 = global %"struct.std::__1::atomic.40" zeroinitializer, align 16
+ at gv_u128 = global %"struct.std::__1::atomic.45" zeroinitializer, align 16
+
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i1 @_Z22atomic_load_relaxed_i1RNSt3__16atomicIbEE(%"struct.std::__1::atomic"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_relaxed_i1RNSt3__16atomicIbEE:
 ; CHECK:       # %bb.0:
@@ -75,7 +88,7 @@ define zeroext i1 @_Z22atomic_load_relaxed_i1RNSt3__16atomicIbEE(%"struct.std::_
   ret i1 %5
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i8 @_Z22atomic_load_relaxed_i8RNSt3__16atomicIcEE(%"struct.std::__1::atomic.0"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_relaxed_i8RNSt3__16atomicIcEE:
 ; CHECK:       # %bb.0:
@@ -86,7 +99,7 @@ define signext i8 @_Z22atomic_load_relaxed_i8RNSt3__16atomicIcEE(%"struct.std::_
   ret i8 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i8 @_Z22atomic_load_relaxed_u8RNSt3__16atomicIhEE(%"struct.std::__1::atomic.5"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_relaxed_u8RNSt3__16atomicIhEE:
 ; CHECK:       # %bb.0:
@@ -97,7 +110,7 @@ define zeroext i8 @_Z22atomic_load_relaxed_u8RNSt3__16atomicIhEE(%"struct.std::_
   ret i8 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i16 @_Z23atomic_load_relaxed_i16RNSt3__16atomicIsEE(%"struct.std::__1::atomic.10"* nocapture nonnull readonly align 2 dereferenceable(2) %0) {
 ; CHECK-LABEL: _Z23atomic_load_relaxed_i16RNSt3__16atomicIsEE:
 ; CHECK:       # %bb.0:
@@ -108,7 +121,7 @@ define signext i16 @_Z23atomic_load_relaxed_i16RNSt3__16atomicIsEE(%"struct.std:
   ret i16 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i16 @_Z23atomic_load_relaxed_u16RNSt3__16atomicItEE(%"struct.std::__1::atomic.15"* nocapture nonnull readonly align 2 dereferenceable(2) %0) {
 ; CHECK-LABEL: _Z23atomic_load_relaxed_u16RNSt3__16atomicItEE:
 ; CHECK:       # %bb.0:
@@ -119,7 +132,7 @@ define zeroext i16 @_Z23atomic_load_relaxed_u16RNSt3__16atomicItEE(%"struct.std:
   ret i16 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i32 @_Z23atomic_load_relaxed_i32RNSt3__16atomicIiEE(%"struct.std::__1::atomic.20"* nocapture nonnull readonly align 4 dereferenceable(4) %0) {
 ; CHECK-LABEL: _Z23atomic_load_relaxed_i32RNSt3__16atomicIiEE:
 ; CHECK:       # %bb.0:
@@ -130,7 +143,7 @@ define signext i32 @_Z23atomic_load_relaxed_i32RNSt3__16atomicIiEE(%"struct.std:
   ret i32 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i32 @_Z23atomic_load_relaxed_u32RNSt3__16atomicIjEE(%"struct.std::__1::atomic.25"* nocapture nonnull readonly align 4 dereferenceable(4) %0) {
 ; CHECK-LABEL: _Z23atomic_load_relaxed_u32RNSt3__16atomicIjEE:
 ; CHECK:       # %bb.0:
@@ -141,7 +154,7 @@ define zeroext i32 @_Z23atomic_load_relaxed_u32RNSt3__16atomicIjEE(%"struct.std:
   ret i32 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define i64 @_Z23atomic_load_relaxed_i64RNSt3__16atomicIlEE(%"struct.std::__1::atomic.30"* nocapture nonnull readonly align 8 dereferenceable(8) %0) {
 ; CHECK-LABEL: _Z23atomic_load_relaxed_i64RNSt3__16atomicIlEE:
 ; CHECK:       # %bb.0:
@@ -152,7 +165,7 @@ define i64 @_Z23atomic_load_relaxed_i64RNSt3__16atomicIlEE(%"struct.std::__1::at
   ret i64 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define i64 @_Z23atomic_load_relaxed_u64RNSt3__16atomicImEE(%"struct.std::__1::atomic.35"* nocapture nonnull readonly align 8 dereferenceable(8) %0) {
 ; CHECK-LABEL: _Z23atomic_load_relaxed_u64RNSt3__16atomicImEE:
 ; CHECK:       # %bb.0:
@@ -163,7 +176,7 @@ define i64 @_Z23atomic_load_relaxed_u64RNSt3__16atomicImEE(%"struct.std::__1::at
   ret i64 %3
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define i128 @_Z24atomic_load_relaxed_i128RNSt3__16atomicInEE(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0) {
 ; CHECK-LABEL: _Z24atomic_load_relaxed_i128RNSt3__16atomicInEE:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -188,7 +201,7 @@ define i128 @_Z24atomic_load_relaxed_i128RNSt3__16atomicInEE(%"struct.std::__1::
   ret i128 %5
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define i128 @_Z24atomic_load_relaxed_u128RNSt3__16atomicIoEE(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0) {
 ; CHECK-LABEL: _Z24atomic_load_relaxed_u128RNSt3__16atomicIoEE:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -213,7 +226,7 @@ define i128 @_Z24atomic_load_relaxed_u128RNSt3__16atomicIoEE(%"struct.std::__1::
   ret i128 %5
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i1 @_Z22atomic_load_acquire_i1RNSt3__16atomicIbEE(%"struct.std::__1::atomic"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_acquire_i1RNSt3__16atomicIbEE:
 ; CHECK:       # %bb.0:
@@ -228,7 +241,7 @@ define zeroext i1 @_Z22atomic_load_acquire_i1RNSt3__16atomicIbEE(%"struct.std::_
   ret i1 %5
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i8 @_Z22atomic_load_acquire_i8RNSt3__16atomicIcEE(%"struct.std::__1::atomic.0"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_acquire_i8RNSt3__16atomicIcEE:
 ; CHECK:       # %bb.0:
@@ -240,7 +253,7 @@ define signext i8 @_Z22atomic_load_acquire_i8RNSt3__16atomicIcEE(%"struct.std::_
   ret i8 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i8 @_Z22atomic_load_acquire_u8RNSt3__16atomicIhEE(%"struct.std::__1::atomic.5"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_acquire_u8RNSt3__16atomicIhEE:
 ; CHECK:       # %bb.0:
@@ -252,7 +265,7 @@ define zeroext i8 @_Z22atomic_load_acquire_u8RNSt3__16atomicIhEE(%"struct.std::_
   ret i8 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i16 @_Z23atomic_load_acquire_i16RNSt3__16atomicIsEE(%"struct.std::__1::atomic.10"* nocapture nonnull readonly align 2 dereferenceable(2) %0) {
 ; CHECK-LABEL: _Z23atomic_load_acquire_i16RNSt3__16atomicIsEE:
 ; CHECK:       # %bb.0:
@@ -264,7 +277,7 @@ define signext i16 @_Z23atomic_load_acquire_i16RNSt3__16atomicIsEE(%"struct.std:
   ret i16 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i16 @_Z23atomic_load_acquire_u16RNSt3__16atomicItEE(%"struct.std::__1::atomic.15"* nocapture nonnull readonly align 2 dereferenceable(2) %0) {
 ; CHECK-LABEL: _Z23atomic_load_acquire_u16RNSt3__16atomicItEE:
 ; CHECK:       # %bb.0:
@@ -276,7 +289,7 @@ define zeroext i16 @_Z23atomic_load_acquire_u16RNSt3__16atomicItEE(%"struct.std:
   ret i16 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i32 @_Z23atomic_load_acquire_i32RNSt3__16atomicIiEE(%"struct.std::__1::atomic.20"* nocapture nonnull readonly align 4 dereferenceable(4) %0) {
 ; CHECK-LABEL: _Z23atomic_load_acquire_i32RNSt3__16atomicIiEE:
 ; CHECK:       # %bb.0:
@@ -288,7 +301,7 @@ define signext i32 @_Z23atomic_load_acquire_i32RNSt3__16atomicIiEE(%"struct.std:
   ret i32 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i32 @_Z23atomic_load_acquire_u32RNSt3__16atomicIjEE(%"struct.std::__1::atomic.25"* nocapture nonnull readonly align 4 dereferenceable(4) %0) {
 ; CHECK-LABEL: _Z23atomic_load_acquire_u32RNSt3__16atomicIjEE:
 ; CHECK:       # %bb.0:
@@ -300,7 +313,7 @@ define zeroext i32 @_Z23atomic_load_acquire_u32RNSt3__16atomicIjEE(%"struct.std:
   ret i32 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define i64 @_Z23atomic_load_acquire_i64RNSt3__16atomicIlEE(%"struct.std::__1::atomic.30"* nocapture nonnull readonly align 8 dereferenceable(8) %0) {
 ; CHECK-LABEL: _Z23atomic_load_acquire_i64RNSt3__16atomicIlEE:
 ; CHECK:       # %bb.0:
@@ -312,7 +325,7 @@ define i64 @_Z23atomic_load_acquire_i64RNSt3__16atomicIlEE(%"struct.std::__1::at
   ret i64 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define i64 @_Z23atomic_load_acquire_u64RNSt3__16atomicImEE(%"struct.std::__1::atomic.35"* nocapture nonnull readonly align 8 dereferenceable(8) %0) {
 ; CHECK-LABEL: _Z23atomic_load_acquire_u64RNSt3__16atomicImEE:
 ; CHECK:       # %bb.0:
@@ -324,7 +337,7 @@ define i64 @_Z23atomic_load_acquire_u64RNSt3__16atomicImEE(%"struct.std::__1::at
   ret i64 %3
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define i128 @_Z24atomic_load_acquire_i128RNSt3__16atomicInEE(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0) {
 ; CHECK-LABEL: _Z24atomic_load_acquire_i128RNSt3__16atomicInEE:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -349,7 +362,7 @@ define i128 @_Z24atomic_load_acquire_i128RNSt3__16atomicInEE(%"struct.std::__1::
   ret i128 %5
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define i128 @_Z24atomic_load_acquire_u128RNSt3__16atomicIoEE(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0) {
 ; CHECK-LABEL: _Z24atomic_load_acquire_u128RNSt3__16atomicIoEE:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -374,7 +387,7 @@ define i128 @_Z24atomic_load_acquire_u128RNSt3__16atomicIoEE(%"struct.std::__1::
   ret i128 %5
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i1 @_Z22atomic_load_seq_cst_i1RNSt3__16atomicIbEE(%"struct.std::__1::atomic"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_seq_cst_i1RNSt3__16atomicIbEE:
 ; CHECK:       # %bb.0:
@@ -389,7 +402,7 @@ define zeroext i1 @_Z22atomic_load_seq_cst_i1RNSt3__16atomicIbEE(%"struct.std::_
   ret i1 %5
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i8 @_Z22atomic_load_seq_cst_i8RNSt3__16atomicIcEE(%"struct.std::__1::atomic.0"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_seq_cst_i8RNSt3__16atomicIcEE:
 ; CHECK:       # %bb.0:
@@ -401,7 +414,7 @@ define signext i8 @_Z22atomic_load_seq_cst_i8RNSt3__16atomicIcEE(%"struct.std::_
   ret i8 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i8 @_Z22atomic_load_seq_cst_u8RNSt3__16atomicIhEE(%"struct.std::__1::atomic.5"* nocapture nonnull readonly align 1 dereferenceable(1) %0) {
 ; CHECK-LABEL: _Z22atomic_load_seq_cst_u8RNSt3__16atomicIhEE:
 ; CHECK:       # %bb.0:
@@ -413,7 +426,7 @@ define zeroext i8 @_Z22atomic_load_seq_cst_u8RNSt3__16atomicIhEE(%"struct.std::_
   ret i8 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i16 @_Z23atomic_load_seq_cst_i16RNSt3__16atomicIsEE(%"struct.std::__1::atomic.10"* nocapture nonnull readonly align 2 dereferenceable(2) %0) {
 ; CHECK-LABEL: _Z23atomic_load_seq_cst_i16RNSt3__16atomicIsEE:
 ; CHECK:       # %bb.0:
@@ -425,7 +438,7 @@ define signext i16 @_Z23atomic_load_seq_cst_i16RNSt3__16atomicIsEE(%"struct.std:
   ret i16 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i16 @_Z23atomic_load_seq_cst_u16RNSt3__16atomicItEE(%"struct.std::__1::atomic.15"* nocapture nonnull readonly align 2 dereferenceable(2) %0) {
 ; CHECK-LABEL: _Z23atomic_load_seq_cst_u16RNSt3__16atomicItEE:
 ; CHECK:       # %bb.0:
@@ -437,7 +450,7 @@ define zeroext i16 @_Z23atomic_load_seq_cst_u16RNSt3__16atomicItEE(%"struct.std:
   ret i16 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define signext i32 @_Z23atomic_load_seq_cst_i32RNSt3__16atomicIiEE(%"struct.std::__1::atomic.20"* nocapture nonnull readonly align 4 dereferenceable(4) %0) {
 ; CHECK-LABEL: _Z23atomic_load_seq_cst_i32RNSt3__16atomicIiEE:
 ; CHECK:       # %bb.0:
@@ -449,7 +462,7 @@ define signext i32 @_Z23atomic_load_seq_cst_i32RNSt3__16atomicIiEE(%"struct.std:
   ret i32 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define zeroext i32 @_Z23atomic_load_seq_cst_u32RNSt3__16atomicIjEE(%"struct.std::__1::atomic.25"* nocapture nonnull readonly align 4 dereferenceable(4) %0) {
 ; CHECK-LABEL: _Z23atomic_load_seq_cst_u32RNSt3__16atomicIjEE:
 ; CHECK:       # %bb.0:
@@ -461,7 +474,7 @@ define zeroext i32 @_Z23atomic_load_seq_cst_u32RNSt3__16atomicIjEE(%"struct.std:
   ret i32 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define i64 @_Z23atomic_load_seq_cst_i64RNSt3__16atomicIlEE(%"struct.std::__1::atomic.30"* nocapture nonnull readonly align 8 dereferenceable(8) %0) {
 ; CHECK-LABEL: _Z23atomic_load_seq_cst_i64RNSt3__16atomicIlEE:
 ; CHECK:       # %bb.0:
@@ -473,7 +486,7 @@ define i64 @_Z23atomic_load_seq_cst_i64RNSt3__16atomicIlEE(%"struct.std::__1::at
   ret i64 %3
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define i64 @_Z23atomic_load_seq_cst_u64RNSt3__16atomicImEE(%"struct.std::__1::atomic.35"* nocapture nonnull readonly align 8 dereferenceable(8) %0) {
 ; CHECK-LABEL: _Z23atomic_load_seq_cst_u64RNSt3__16atomicImEE:
 ; CHECK:       # %bb.0:
@@ -485,7 +498,7 @@ define i64 @_Z23atomic_load_seq_cst_u64RNSt3__16atomicImEE(%"struct.std::__1::at
   ret i64 %3
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define i128 @_Z24atomic_load_seq_cst_i128RNSt3__16atomicInEE(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0) {
 ; CHECK-LABEL: _Z24atomic_load_seq_cst_i128RNSt3__16atomicInEE:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -510,7 +523,7 @@ define i128 @_Z24atomic_load_seq_cst_i128RNSt3__16atomicInEE(%"struct.std::__1::
   ret i128 %5
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define i128 @_Z24atomic_load_seq_cst_u128RNSt3__16atomicIoEE(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0) {
 ; CHECK-LABEL: _Z24atomic_load_seq_cst_u128RNSt3__16atomicIoEE:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -535,15 +548,466 @@ define i128 @_Z24atomic_load_seq_cst_u128RNSt3__16atomicIoEE(%"struct.std::__1::
   ret i128 %5
 }
 
-; Function Attrs: nofree nounwind willreturn
-declare void @__atomic_load(i64, i8*, i8*, i32)
+; Function Attrs: mustprogress
+define zeroext i1 @_Z26atomic_load_relaxed_stk_i1v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_stk_i1v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z6fun_i1RNSt3__16atomicIbEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z6fun_i1RNSt3__16atomicIbEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld1b.zx %s0, 248(, %s11)
+; CHECK-NEXT:    and %s0, 1, %s0
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic", align 1
+  %2 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %1, i64 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %2)
+  call void @_Z6fun_i1RNSt3__16atomicIbEE(%"struct.std::__1::atomic"* nonnull align 1 dereferenceable(1) %1)
+  %3 = load atomic i8, i8* %2 monotonic, align 1
+  %4 = and i8 %3, 1
+  %5 = icmp ne i8 %4, 0
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %2)
+  ret i1 %5
+}
 
-; Function Attrs: argmemonly nounwind willreturn
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
 declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
 
-; Function Attrs: argmemonly nounwind willreturn
+declare void @_Z6fun_i1RNSt3__16atomicIbEE(%"struct.std::__1::atomic"* nonnull align 1 dereferenceable(1))
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
 declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
 
+; Function Attrs: mustprogress
+define signext i8 @_Z26atomic_load_relaxed_stk_i8v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_stk_i8v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z6fun_i8RNSt3__16atomicIcEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z6fun_i8RNSt3__16atomicIcEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld1b.sx %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.0", align 1
+  %2 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %2)
+  call void @_Z6fun_i8RNSt3__16atomicIcEE(%"struct.std::__1::atomic.0"* nonnull align 1 dereferenceable(1) %1)
+  %3 = load atomic i8, i8* %2 monotonic, align 1
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %2)
+  ret i8 %3
+}
+
+declare void @_Z6fun_i8RNSt3__16atomicIcEE(%"struct.std::__1::atomic.0"* nonnull align 1 dereferenceable(1))
+
+; Function Attrs: mustprogress
+define zeroext i8 @_Z26atomic_load_relaxed_stk_u8v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_stk_u8v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z6fun_u8RNSt3__16atomicIhEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z6fun_u8RNSt3__16atomicIhEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld1b.zx %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.5", align 1
+  %2 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %2)
+  call void @_Z6fun_u8RNSt3__16atomicIhEE(%"struct.std::__1::atomic.5"* nonnull align 1 dereferenceable(1) %1)
+  %3 = load atomic i8, i8* %2 monotonic, align 1
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %2)
+  ret i8 %3
+}
+
+declare void @_Z6fun_u8RNSt3__16atomicIhEE(%"struct.std::__1::atomic.5"* nonnull align 1 dereferenceable(1))
+
+; Function Attrs: mustprogress
+define signext i16 @_Z27atomic_load_relaxed_stk_i16v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_i16v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z7fun_i16RNSt3__16atomicIsEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z7fun_i16RNSt3__16atomicIsEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld2b.sx %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.10", align 2
+  %2 = bitcast %"struct.std::__1::atomic.10"* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %2)
+  call void @_Z7fun_i16RNSt3__16atomicIsEE(%"struct.std::__1::atomic.10"* nonnull align 2 dereferenceable(2) %1)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = load atomic i16, i16* %3 monotonic, align 2
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %2)
+  ret i16 %4
+}
+
+declare void @_Z7fun_i16RNSt3__16atomicIsEE(%"struct.std::__1::atomic.10"* nonnull align 2 dereferenceable(2))
+
+; Function Attrs: mustprogress
+define zeroext i16 @_Z27atomic_load_relaxed_stk_u16v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_u16v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z7fun_u16RNSt3__16atomicItEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z7fun_u16RNSt3__16atomicItEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld2b.zx %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.15", align 2
+  %2 = bitcast %"struct.std::__1::atomic.15"* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %2)
+  call void @_Z7fun_u16RNSt3__16atomicItEE(%"struct.std::__1::atomic.15"* nonnull align 2 dereferenceable(2) %1)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = load atomic i16, i16* %3 monotonic, align 2
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %2)
+  ret i16 %4
+}
+
+declare void @_Z7fun_u16RNSt3__16atomicItEE(%"struct.std::__1::atomic.15"* nonnull align 2 dereferenceable(2))
+
+; Function Attrs: mustprogress
+define signext i32 @_Z27atomic_load_relaxed_stk_i32v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_i32v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z7fun_i32RNSt3__16atomicIiEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z7fun_i32RNSt3__16atomicIiEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ldl.sx %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.20", align 4
+  %2 = bitcast %"struct.std::__1::atomic.20"* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2)
+  call void @_Z7fun_i32RNSt3__16atomicIiEE(%"struct.std::__1::atomic.20"* nonnull align 4 dereferenceable(4) %1)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = load atomic i32, i32* %3 monotonic, align 4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2)
+  ret i32 %4
+}
+
+declare void @_Z7fun_i32RNSt3__16atomicIiEE(%"struct.std::__1::atomic.20"* nonnull align 4 dereferenceable(4))
+
+; Function Attrs: mustprogress
+define zeroext i32 @_Z27atomic_load_relaxed_stk_u32v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_u32v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z7fun_u32RNSt3__16atomicIjEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z7fun_u32RNSt3__16atomicIjEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ldl.zx %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.25", align 4
+  %2 = bitcast %"struct.std::__1::atomic.25"* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2)
+  call void @_Z7fun_u32RNSt3__16atomicIjEE(%"struct.std::__1::atomic.25"* nonnull align 4 dereferenceable(4) %1)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = load atomic i32, i32* %3 monotonic, align 4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2)
+  ret i32 %4
+}
+
+declare void @_Z7fun_u32RNSt3__16atomicIjEE(%"struct.std::__1::atomic.25"* nonnull align 4 dereferenceable(4))
+
+; Function Attrs: mustprogress
+define i64 @_Z27atomic_load_relaxed_stk_i64v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_i64v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z7fun_i64RNSt3__16atomicIlEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z7fun_i64RNSt3__16atomicIlEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.30", align 8
+  %2 = bitcast %"struct.std::__1::atomic.30"* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %2)
+  call void @_Z7fun_i64RNSt3__16atomicIlEE(%"struct.std::__1::atomic.30"* nonnull align 8 dereferenceable(8) %1)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = load atomic i64, i64* %3 monotonic, align 8
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %2)
+  ret i64 %4
+}
+
+declare void @_Z7fun_i64RNSt3__16atomicIlEE(%"struct.std::__1::atomic.30"* nonnull align 8 dereferenceable(8))
+
+; Function Attrs: mustprogress
+define i64 @_Z27atomic_load_relaxed_stk_u64v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_u64v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z7fun_u64RNSt3__16atomicImEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z7fun_u64RNSt3__16atomicImEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 248(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s0, 248(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca %"struct.std::__1::atomic.35", align 8
+  %2 = bitcast %"struct.std::__1::atomic.35"* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %2)
+  call void @_Z7fun_u64RNSt3__16atomicImEE(%"struct.std::__1::atomic.35"* nonnull align 8 dereferenceable(8) %1)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %1, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = load atomic i64, i64* %3 monotonic, align 8
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %2)
+  ret i64 %4
+}
+
+declare void @_Z7fun_u64RNSt3__16atomicImEE(%"struct.std::__1::atomic.35"* nonnull align 8 dereferenceable(8))
+
+; Function Attrs: mustprogress
+define i128 @_Z28atomic_load_relaxed_stk_i128v() {
+; CHECK-LABEL: _Z28atomic_load_relaxed_stk_i128v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z8fun_i128RNSt3__16atomicInEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z8fun_i128RNSt3__16atomicInEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 240(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    lea %s0, __atomic_load at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_load at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 264(, %s11)
+; CHECK-NEXT:    ld %s0, 256(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca i128, align 16
+  %2 = alloca %"struct.std::__1::atomic.40", align 16
+  %3 = bitcast %"struct.std::__1::atomic.40"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %3)
+  call void @_Z8fun_i128RNSt3__16atomicInEE(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %2)
+  %4 = bitcast i128* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  call void @__atomic_load(i64 16, i8* nonnull %3, i8* nonnull %4, i32 signext 0)
+  %5 = load i128, i128* %1, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %3)
+  ret i128 %5
+}
+
+declare void @_Z8fun_i128RNSt3__16atomicInEE(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16))
+
+; Function Attrs: mustprogress
+define i128 @_Z28atomic_load_relaxed_stk_u128v() {
+; CHECK-LABEL: _Z28atomic_load_relaxed_stk_u128v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, _Z8fun_u128RNSt3__16atomicIoEE at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, _Z8fun_u128RNSt3__16atomicIoEE at hi(, %s0)
+; CHECK-NEXT:    lea %s0, 240(, %s11)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    lea %s0, __atomic_load at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_load at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 264(, %s11)
+; CHECK-NEXT:    ld %s0, 256(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca i128, align 16
+  %2 = alloca %"struct.std::__1::atomic.45", align 16
+  %3 = bitcast %"struct.std::__1::atomic.45"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %3)
+  call void @_Z8fun_u128RNSt3__16atomicIoEE(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %2)
+  %4 = bitcast i128* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  call void @__atomic_load(i64 16, i8* nonnull %3, i8* nonnull %4, i32 signext 0)
+  %5 = load i128, i128* %1, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %3)
+  ret i128 %5
+}
+
+declare void @_Z8fun_u128RNSt3__16atomicIoEE(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16))
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z25atomic_load_relaxed_gv_i1v() {
+; CHECK-LABEL: _Z25atomic_load_relaxed_gv_i1v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_i1 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_i1 at hi(, %s0)
+; CHECK-NEXT:    ld1b.zx %s0, (, %s0)
+; CHECK-NEXT:    and %s0, 1, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i8, i8* getelementptr inbounds (%"struct.std::__1::atomic", %"struct.std::__1::atomic"* @gv_i1, i64 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  %2 = and i8 %1, 1
+  %3 = icmp ne i8 %2, 0
+  ret i1 %3
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z25atomic_load_relaxed_gv_i8v() {
+; CHECK-LABEL: _Z25atomic_load_relaxed_gv_i8v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_i8 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_i8 at hi(, %s0)
+; CHECK-NEXT:    ld1b.sx %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i8, i8* getelementptr inbounds (%"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* @gv_i8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret i8 %1
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z25atomic_load_relaxed_gv_u8v() {
+; CHECK-LABEL: _Z25atomic_load_relaxed_gv_u8v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_u8 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_u8 at hi(, %s0)
+; CHECK-NEXT:    ld1b.zx %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i8, i8* getelementptr inbounds (%"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* @gv_u8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret i8 %1
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z26atomic_load_relaxed_gv_i16v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_i16v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_i16 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_i16 at hi(, %s0)
+; CHECK-NEXT:    ld2b.sx %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i16, i16* getelementptr inbounds (%"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* @gv_i16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret i16 %1
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z26atomic_load_relaxed_gv_u16v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_u16v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_u16 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_u16 at hi(, %s0)
+; CHECK-NEXT:    ld2b.zx %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i16, i16* getelementptr inbounds (%"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* @gv_u16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret i16 %1
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z26atomic_load_relaxed_gv_i32v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_i32v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_i32 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_i32 at hi(, %s0)
+; CHECK-NEXT:    ldl.sx %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i32, i32* getelementptr inbounds (%"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* @gv_i32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret i32 %1
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z26atomic_load_relaxed_gv_u32v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_u32v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_u32 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_u32 at hi(, %s0)
+; CHECK-NEXT:    ldl.zx %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i32, i32* getelementptr inbounds (%"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* @gv_u32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret i32 %1
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z26atomic_load_relaxed_gv_i64v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_i64v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_i64 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_i64 at hi(, %s0)
+; CHECK-NEXT:    ld %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i64, i64* getelementptr inbounds (%"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* @gv_i64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 8
+  ret i64 %1
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z26atomic_load_relaxed_gv_u64v() {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_u64v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, gv_u64 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0, gv_u64 at hi(, %s0)
+; CHECK-NEXT:    ld %s0, (, %s0)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %1 = load atomic i64, i64* getelementptr inbounds (%"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* @gv_u64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 8
+  ret i64 %1
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define i128 @_Z27atomic_load_relaxed_gv_i128v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_gv_i128v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, __atomic_load at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_load at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_i128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i128 at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca i128, align 16
+  %2 = bitcast i128* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2)
+  call void @__atomic_load(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.40"* @gv_i128 to i8*), i8* nonnull %2, i32 signext 0)
+  %3 = load i128, i128* %1, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2)
+  ret i128 %3
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define i128 @_Z27atomic_load_relaxed_gv_u128v() {
+; CHECK-LABEL: _Z27atomic_load_relaxed_gv_u128v:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, __atomic_load at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_load at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_u128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u128 at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %1 = alloca i128, align 16
+  %2 = bitcast i128* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2)
+  call void @__atomic_load(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.45"* @gv_u128 to i8*), i8* nonnull %2, i32 signext 0)
+  %3 = load i128, i128* %1, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2)
+  ret i128 %3
+}
+
+; Function Attrs: nofree nounwind willreturn
+declare void @__atomic_load(i64, i8*, i8*, i32)
+
 !2 = !{!3, !3, i64 0}
 !3 = !{!"__int128", !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}

diff  --git a/llvm/test/CodeGen/VE/Scalar/atomic_store.ll b/llvm/test/CodeGen/VE/Scalar/atomic_store.ll
index 097e0297ac36..f7323f2f7dd5 100644
--- a/llvm/test/CodeGen/VE/Scalar/atomic_store.ll
+++ b/llvm/test/CodeGen/VE/Scalar/atomic_store.ll
@@ -5,6 +5,7 @@
 ;;; Note:
 ;;;   We test i1/i8/i16/i32/i64/i128/u8/u16/u32/u64/u128.
 ;;;   We test relaxed, release, and seq_cst.
+;;;   We test an object, a stack object, and a global variable.
 
 %"struct.std::__1::atomic" = type { %"struct.std::__1::__atomic_base" }
 %"struct.std::__1::__atomic_base" = type { %"struct.std::__1::__cxx_atomic_impl" }
@@ -61,7 +62,19 @@
 %"struct.std::__1::__cxx_atomic_impl.48" = type { %"struct.std::__1::__cxx_atomic_base_impl.49" }
 %"struct.std::__1::__cxx_atomic_base_impl.49" = type { i128 }
 
-; Function Attrs: nofree norecurse nounwind
+ at gv_i1 = global %"struct.std::__1::atomic" zeroinitializer, align 4
+ at gv_i8 = global %"struct.std::__1::atomic.0" zeroinitializer, align 4
+ at gv_u8 = global %"struct.std::__1::atomic.5" zeroinitializer, align 4
+ at gv_i16 = global %"struct.std::__1::atomic.10" zeroinitializer, align 4
+ at gv_u16 = global %"struct.std::__1::atomic.15" zeroinitializer, align 4
+ at gv_i32 = global %"struct.std::__1::atomic.20" zeroinitializer, align 4
+ at gv_u32 = global %"struct.std::__1::atomic.25" zeroinitializer, align 4
+ at gv_i64 = global %"struct.std::__1::atomic.30" zeroinitializer, align 8
+ at gv_u64 = global %"struct.std::__1::atomic.35" zeroinitializer, align 8
+ at gv_i128 = global %"struct.std::__1::atomic.40" zeroinitializer, align 16
+ at gv_u128 = global %"struct.std::__1::atomic.45" zeroinitializer, align 16
+
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_relaxed_i1RNSt3__16atomicIbEEb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
 ; CHECK-LABEL: _Z23atomic_store_relaxed_i1RNSt3__16atomicIbEEb:
 ; CHECK:       # %bb.0:
@@ -73,7 +86,7 @@ define void @_Z23atomic_store_relaxed_i1RNSt3__16atomicIbEEb(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_relaxed_i8RNSt3__16atomicIcEEc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
 ; CHECK-LABEL: _Z23atomic_store_relaxed_i8RNSt3__16atomicIcEEc:
 ; CHECK:       # %bb.0:
@@ -84,7 +97,7 @@ define void @_Z23atomic_store_relaxed_i8RNSt3__16atomicIcEEc(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_relaxed_u8RNSt3__16atomicIhEEh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
 ; CHECK-LABEL: _Z23atomic_store_relaxed_u8RNSt3__16atomicIhEEh:
 ; CHECK:       # %bb.0:
@@ -95,7 +108,7 @@ define void @_Z23atomic_store_relaxed_u8RNSt3__16atomicIhEEh(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_relaxed_i16RNSt3__16atomicIsEEs(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
 ; CHECK-LABEL: _Z24atomic_store_relaxed_i16RNSt3__16atomicIsEEs:
 ; CHECK:       # %bb.0:
@@ -106,7 +119,7 @@ define void @_Z24atomic_store_relaxed_i16RNSt3__16atomicIsEEs(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_relaxed_u16RNSt3__16atomicItEEt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
 ; CHECK-LABEL: _Z24atomic_store_relaxed_u16RNSt3__16atomicItEEt:
 ; CHECK:       # %bb.0:
@@ -117,7 +130,7 @@ define void @_Z24atomic_store_relaxed_u16RNSt3__16atomicItEEt(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_relaxed_i32RNSt3__16atomicIiEEi(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
 ; CHECK-LABEL: _Z24atomic_store_relaxed_i32RNSt3__16atomicIiEEi:
 ; CHECK:       # %bb.0:
@@ -128,7 +141,7 @@ define void @_Z24atomic_store_relaxed_i32RNSt3__16atomicIiEEi(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_relaxed_u32RNSt3__16atomicIjEEj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
 ; CHECK-LABEL: _Z24atomic_store_relaxed_u32RNSt3__16atomicIjEEj:
 ; CHECK:       # %bb.0:
@@ -139,7 +152,7 @@ define void @_Z24atomic_store_relaxed_u32RNSt3__16atomicIjEEj(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_relaxed_i64RNSt3__16atomicIlEEl(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
 ; CHECK-LABEL: _Z24atomic_store_relaxed_i64RNSt3__16atomicIlEEl:
 ; CHECK:       # %bb.0:
@@ -150,7 +163,7 @@ define void @_Z24atomic_store_relaxed_i64RNSt3__16atomicIlEEl(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_relaxed_u64RNSt3__16atomicImEEm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
 ; CHECK-LABEL: _Z24atomic_store_relaxed_u64RNSt3__16atomicImEEm:
 ; CHECK:       # %bb.0:
@@ -161,7 +174,7 @@ define void @_Z24atomic_store_relaxed_u64RNSt3__16atomicImEEm(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define void @_Z25atomic_store_relaxed_i128RNSt3__16atomicInEEn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
 ; CHECK-LABEL: _Z25atomic_store_relaxed_i128RNSt3__16atomicInEEn:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -187,7 +200,7 @@ define void @_Z25atomic_store_relaxed_i128RNSt3__16atomicInEEn(%"struct.std::__1
   ret void
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define void @_Z25atomic_store_relaxed_u128RNSt3__16atomicIoEEo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
 ; CHECK-LABEL: _Z25atomic_store_relaxed_u128RNSt3__16atomicIoEEo:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -213,7 +226,7 @@ define void @_Z25atomic_store_relaxed_u128RNSt3__16atomicIoEEo(%"struct.std::__1
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_release_i1RNSt3__16atomicIbEEb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
 ; CHECK-LABEL: _Z23atomic_store_release_i1RNSt3__16atomicIbEEb:
 ; CHECK:       # %bb.0:
@@ -226,7 +239,7 @@ define void @_Z23atomic_store_release_i1RNSt3__16atomicIbEEb(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_release_i8RNSt3__16atomicIcEEc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
 ; CHECK-LABEL: _Z23atomic_store_release_i8RNSt3__16atomicIcEEc:
 ; CHECK:       # %bb.0:
@@ -238,7 +251,7 @@ define void @_Z23atomic_store_release_i8RNSt3__16atomicIcEEc(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_release_u8RNSt3__16atomicIhEEh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
 ; CHECK-LABEL: _Z23atomic_store_release_u8RNSt3__16atomicIhEEh:
 ; CHECK:       # %bb.0:
@@ -250,7 +263,7 @@ define void @_Z23atomic_store_release_u8RNSt3__16atomicIhEEh(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_release_i16RNSt3__16atomicIsEEs(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
 ; CHECK-LABEL: _Z24atomic_store_release_i16RNSt3__16atomicIsEEs:
 ; CHECK:       # %bb.0:
@@ -262,7 +275,7 @@ define void @_Z24atomic_store_release_i16RNSt3__16atomicIsEEs(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_release_u16RNSt3__16atomicItEEt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
 ; CHECK-LABEL: _Z24atomic_store_release_u16RNSt3__16atomicItEEt:
 ; CHECK:       # %bb.0:
@@ -274,7 +287,7 @@ define void @_Z24atomic_store_release_u16RNSt3__16atomicItEEt(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_release_i32RNSt3__16atomicIiEEi(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
 ; CHECK-LABEL: _Z24atomic_store_release_i32RNSt3__16atomicIiEEi:
 ; CHECK:       # %bb.0:
@@ -286,7 +299,7 @@ define void @_Z24atomic_store_release_i32RNSt3__16atomicIiEEi(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_release_u32RNSt3__16atomicIjEEj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
 ; CHECK-LABEL: _Z24atomic_store_release_u32RNSt3__16atomicIjEEj:
 ; CHECK:       # %bb.0:
@@ -298,7 +311,7 @@ define void @_Z24atomic_store_release_u32RNSt3__16atomicIjEEj(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_release_i64RNSt3__16atomicIlEEl(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
 ; CHECK-LABEL: _Z24atomic_store_release_i64RNSt3__16atomicIlEEl:
 ; CHECK:       # %bb.0:
@@ -310,7 +323,7 @@ define void @_Z24atomic_store_release_i64RNSt3__16atomicIlEEl(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_release_u64RNSt3__16atomicImEEm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
 ; CHECK-LABEL: _Z24atomic_store_release_u64RNSt3__16atomicImEEm:
 ; CHECK:       # %bb.0:
@@ -322,7 +335,7 @@ define void @_Z24atomic_store_release_u64RNSt3__16atomicImEEm(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define void @_Z25atomic_store_release_i128RNSt3__16atomicInEEn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
 ; CHECK-LABEL: _Z25atomic_store_release_i128RNSt3__16atomicInEEn:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -348,7 +361,7 @@ define void @_Z25atomic_store_release_i128RNSt3__16atomicInEEn(%"struct.std::__1
   ret void
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define void @_Z25atomic_store_release_u128RNSt3__16atomicIoEEo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
 ; CHECK-LABEL: _Z25atomic_store_release_u128RNSt3__16atomicIoEEo:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -374,7 +387,7 @@ define void @_Z25atomic_store_release_u128RNSt3__16atomicIoEEo(%"struct.std::__1
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_seq_cst_i1RNSt3__16atomicIbEEb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
 ; CHECK-LABEL: _Z23atomic_store_seq_cst_i1RNSt3__16atomicIbEEb:
 ; CHECK:       # %bb.0:
@@ -388,7 +401,7 @@ define void @_Z23atomic_store_seq_cst_i1RNSt3__16atomicIbEEb(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_seq_cst_i8RNSt3__16atomicIcEEc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
 ; CHECK-LABEL: _Z23atomic_store_seq_cst_i8RNSt3__16atomicIcEEc:
 ; CHECK:       # %bb.0:
@@ -401,7 +414,7 @@ define void @_Z23atomic_store_seq_cst_i8RNSt3__16atomicIcEEc(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z23atomic_store_seq_cst_u8RNSt3__16atomicIhEEh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
 ; CHECK-LABEL: _Z23atomic_store_seq_cst_u8RNSt3__16atomicIhEEh:
 ; CHECK:       # %bb.0:
@@ -414,7 +427,7 @@ define void @_Z23atomic_store_seq_cst_u8RNSt3__16atomicIhEEh(%"struct.std::__1::
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_seq_cst_i16RNSt3__16atomicIsEEs(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
 ; CHECK-LABEL: _Z24atomic_store_seq_cst_i16RNSt3__16atomicIsEEs:
 ; CHECK:       # %bb.0:
@@ -427,7 +440,7 @@ define void @_Z24atomic_store_seq_cst_i16RNSt3__16atomicIsEEs(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_seq_cst_u16RNSt3__16atomicItEEt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
 ; CHECK-LABEL: _Z24atomic_store_seq_cst_u16RNSt3__16atomicItEEt:
 ; CHECK:       # %bb.0:
@@ -440,7 +453,7 @@ define void @_Z24atomic_store_seq_cst_u16RNSt3__16atomicItEEt(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_seq_cst_i32RNSt3__16atomicIiEEi(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
 ; CHECK-LABEL: _Z24atomic_store_seq_cst_i32RNSt3__16atomicIiEEi:
 ; CHECK:       # %bb.0:
@@ -453,7 +466,7 @@ define void @_Z24atomic_store_seq_cst_i32RNSt3__16atomicIiEEi(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_seq_cst_u32RNSt3__16atomicIjEEj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
 ; CHECK-LABEL: _Z24atomic_store_seq_cst_u32RNSt3__16atomicIjEEj:
 ; CHECK:       # %bb.0:
@@ -466,7 +479,7 @@ define void @_Z24atomic_store_seq_cst_u32RNSt3__16atomicIjEEj(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_seq_cst_i64RNSt3__16atomicIlEEl(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
 ; CHECK-LABEL: _Z24atomic_store_seq_cst_i64RNSt3__16atomicIlEEl:
 ; CHECK:       # %bb.0:
@@ -479,7 +492,7 @@ define void @_Z24atomic_store_seq_cst_i64RNSt3__16atomicIlEEl(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nofree norecurse nounwind
+; Function Attrs: nofree norecurse nounwind mustprogress
 define void @_Z24atomic_store_seq_cst_u64RNSt3__16atomicImEEm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
 ; CHECK-LABEL: _Z24atomic_store_seq_cst_u64RNSt3__16atomicImEEm:
 ; CHECK:       # %bb.0:
@@ -492,7 +505,7 @@ define void @_Z24atomic_store_seq_cst_u64RNSt3__16atomicImEEm(%"struct.std::__1:
   ret void
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define void @_Z25atomic_store_seq_cst_i128RNSt3__16atomicInEEn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
 ; CHECK-LABEL: _Z25atomic_store_seq_cst_i128RNSt3__16atomicInEEn:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -518,7 +531,7 @@ define void @_Z25atomic_store_seq_cst_i128RNSt3__16atomicInEEn(%"struct.std::__1
   ret void
 }
 
-; Function Attrs: nounwind
+; Function Attrs: nofree nounwind mustprogress
 define void @_Z25atomic_store_seq_cst_u128RNSt3__16atomicIoEEo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
 ; CHECK-LABEL: _Z25atomic_store_seq_cst_u128RNSt3__16atomicIoEEo:
 ; CHECK:       .LBB{{[0-9]+}}_2:
@@ -544,15 +557,374 @@ define void @_Z25atomic_store_seq_cst_u128RNSt3__16atomicIoEEo(%"struct.std::__1
   ret void
 }
 
-; Function Attrs: nofree nounwind willreturn
-declare void @__atomic_store(i64, i8*, i8*, i32)
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z26atomic_load_relaxed_stk_i1b(i1 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_stk_i1b:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st1b %s0, 15(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i8, align 1
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %2)
+  %3 = zext i1 %0 to i8
+  store atomic volatile i8 %3, i8* %2 monotonic, align 1
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %2)
+  ret void
+}
 
-; Function Attrs: argmemonly nounwind willreturn
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
 declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
 
-; Function Attrs: argmemonly nounwind willreturn
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
 declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
 
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z26atomic_load_relaxed_stk_i8c(i8 signext %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_stk_i8c:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st1b %s0, 15(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i8, align 1
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %2)
+  store atomic volatile i8 %0, i8* %2 monotonic, align 1
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %2)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z26atomic_load_relaxed_stk_u8h(i8 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_stk_u8h:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st1b %s0, 15(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i8, align 1
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %2)
+  store atomic volatile i8 %0, i8* %2 monotonic, align 1
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %2)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_stk_i16s(i16 signext %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_i16s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st2b %s0, 14(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i16, align 2
+  %3 = bitcast i16* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %3)
+  store atomic volatile i16 %0, i16* %2 monotonic, align 2
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_stk_u16t(i16 zeroext %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_u16t:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st2b %s0, 14(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i16, align 2
+  %3 = bitcast i16* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %3)
+  store atomic volatile i16 %0, i16* %2 monotonic, align 2
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_stk_i32i(i32 signext %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_i32i:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    stl %s0, 12(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i32, align 4
+  %3 = bitcast i32* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %3)
+  store atomic volatile i32 %0, i32* %2 monotonic, align 4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_stk_u32j(i32 zeroext %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_u32j:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    stl %s0, 12(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i32, align 4
+  %3 = bitcast i32* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %3)
+  store atomic volatile i32 %0, i32* %2 monotonic, align 4
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_stk_i64l(i64 %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_i64l:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s0, 8(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i64, align 8
+  %3 = bitcast i64* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %3)
+  store atomic volatile i64 %0, i64* %2 monotonic, align 8
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_stk_u64m(i64 %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_stk_u64m:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s0, 8(, %s11)
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca i64, align 8
+  %3 = bitcast i64* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %3)
+  store atomic volatile i64 %0, i64* %2 monotonic, align 8
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z28atomic_load_relaxed_stk_i128n(i128 %0) {
+; CHECK-LABEL: _Z28atomic_load_relaxed_stk_i128n:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 264(, %s11)
+; CHECK-NEXT:    st %s0, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_store at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_store at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = alloca %"struct.std::__1::atomic.40", align 16
+  %4 = bitcast %"struct.std::__1::atomic.40"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  %5 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_store(i64 16, i8* nonnull %4, i8* nonnull %5, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z28atomic_load_relaxed_stk_u128o(i128 %0) {
+; CHECK-LABEL: _Z28atomic_load_relaxed_stk_u128o:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 264(, %s11)
+; CHECK-NEXT:    st %s0, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_store at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_store at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = alloca %"struct.std::__1::atomic.45", align 16
+  %4 = bitcast %"struct.std::__1::atomic.45"* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  %5 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_store(i64 16, i8* nonnull %4, i8* nonnull %5, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z25atomic_load_relaxed_gv_i1b(i1 zeroext %0) {
+; CHECK-LABEL: _Z25atomic_load_relaxed_gv_i1b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i1 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i1 at hi(, %s1)
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = zext i1 %0 to i8
+  store atomic i8 %2, i8* getelementptr inbounds (%"struct.std::__1::atomic", %"struct.std::__1::atomic"* @gv_i1, i64 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z25atomic_load_relaxed_gv_i8c(i8 signext %0) {
+; CHECK-LABEL: _Z25atomic_load_relaxed_gv_i8c:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i8 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i8 at hi(, %s1)
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i8 %0, i8* getelementptr inbounds (%"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* @gv_i8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z25atomic_load_relaxed_gv_u8h(i8 zeroext %0) {
+; CHECK-LABEL: _Z25atomic_load_relaxed_gv_u8h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u8 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u8 at hi(, %s1)
+; CHECK-NEXT:    st1b %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i8 %0, i8* getelementptr inbounds (%"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* @gv_u8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z26atomic_load_relaxed_gv_i16s(i16 signext %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_i16s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i16 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i16 at hi(, %s1)
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i16 %0, i16* getelementptr inbounds (%"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* @gv_i16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z26atomic_load_relaxed_gv_u16t(i16 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_u16t:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u16 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u16 at hi(, %s1)
+; CHECK-NEXT:    st2b %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i16 %0, i16* getelementptr inbounds (%"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* @gv_u16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z26atomic_load_relaxed_gv_i32i(i32 signext %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_i32i:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i32 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i32 at hi(, %s1)
+; CHECK-NEXT:    stl %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i32 %0, i32* getelementptr inbounds (%"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* @gv_i32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z26atomic_load_relaxed_gv_u32j(i32 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_u32j:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u32 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u32 at hi(, %s1)
+; CHECK-NEXT:    stl %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i32 %0, i32* getelementptr inbounds (%"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* @gv_u32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z26atomic_load_relaxed_gv_i64l(i64 %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_i64l:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i64 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i64 at hi(, %s1)
+; CHECK-NEXT:    st %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i64 %0, i64* getelementptr inbounds (%"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* @gv_i64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 8
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define void @_Z26atomic_load_relaxed_gv_u64m(i64 %0) {
+; CHECK-LABEL: _Z26atomic_load_relaxed_gv_u64m:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u64 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u64 at hi(, %s1)
+; CHECK-NEXT:    st %s0, (, %s1)
+; CHECK-NEXT:    b.l.t (, %s10)
+  store atomic i64 %0, i64* getelementptr inbounds (%"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* @gv_u64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0) monotonic, align 8
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_gv_i128n(i128 %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_gv_i128n:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 248(, %s11)
+; CHECK-NEXT:    st %s0, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_store at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_store at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_i128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i128 at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %3)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_store(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.40"* @gv_i128 to i8*), i8* nonnull %3, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define void @_Z27atomic_load_relaxed_gv_u128o(i128 %0) {
+; CHECK-LABEL: _Z27atomic_load_relaxed_gv_u128o:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 248(, %s11)
+; CHECK-NEXT:    st %s0, 240(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_store at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_store at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_u128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u128 at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s3, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %3)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_store(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.45"* @gv_u128 to i8*), i8* nonnull %3, i32 signext 0)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nofree nounwind willreturn
+declare void @__atomic_store(i64, i8*, i8*, i32)
+
 !2 = !{!3, !3, i64 0}
 !3 = !{!"__int128", !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}

diff  --git a/llvm/test/CodeGen/VE/Scalar/atomic_swap.ll b/llvm/test/CodeGen/VE/Scalar/atomic_swap.ll
new file mode 100644
index 000000000000..fb03fc92e933
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Scalar/atomic_swap.ll
@@ -0,0 +1,1248 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+;;; Test atomic swap for all types and all memory order
+;;;
+;;; Note:
+;;;   - We test i1/i8/i16/i32/i64/i128/u8/u16/u32/u64/u128.
+;;;   - We test relaxed, acquire, and seq_cst.
+;;;   - We test only exchange with variables since VE doesn't have exchange
+;;;     instructions with immediate values.
+;;;   - We test against an object, a stack object, and a global variable.
+
+%"struct.std::__1::atomic" = type { %"struct.std::__1::__atomic_base" }
+%"struct.std::__1::__atomic_base" = type { %"struct.std::__1::__cxx_atomic_impl" }
+%"struct.std::__1::__cxx_atomic_impl" = type { %"struct.std::__1::__cxx_atomic_base_impl" }
+%"struct.std::__1::__cxx_atomic_base_impl" = type { i8 }
+%"struct.std::__1::atomic.0" = type { %"struct.std::__1::__atomic_base.1" }
+%"struct.std::__1::__atomic_base.1" = type { %"struct.std::__1::__atomic_base.2" }
+%"struct.std::__1::__atomic_base.2" = type { %"struct.std::__1::__cxx_atomic_impl.3" }
+%"struct.std::__1::__cxx_atomic_impl.3" = type { %"struct.std::__1::__cxx_atomic_base_impl.4" }
+%"struct.std::__1::__cxx_atomic_base_impl.4" = type { i8 }
+%"struct.std::__1::atomic.5" = type { %"struct.std::__1::__atomic_base.6" }
+%"struct.std::__1::__atomic_base.6" = type { %"struct.std::__1::__atomic_base.7" }
+%"struct.std::__1::__atomic_base.7" = type { %"struct.std::__1::__cxx_atomic_impl.8" }
+%"struct.std::__1::__cxx_atomic_impl.8" = type { %"struct.std::__1::__cxx_atomic_base_impl.9" }
+%"struct.std::__1::__cxx_atomic_base_impl.9" = type { i8 }
+%"struct.std::__1::atomic.10" = type { %"struct.std::__1::__atomic_base.11" }
+%"struct.std::__1::__atomic_base.11" = type { %"struct.std::__1::__atomic_base.12" }
+%"struct.std::__1::__atomic_base.12" = type { %"struct.std::__1::__cxx_atomic_impl.13" }
+%"struct.std::__1::__cxx_atomic_impl.13" = type { %"struct.std::__1::__cxx_atomic_base_impl.14" }
+%"struct.std::__1::__cxx_atomic_base_impl.14" = type { i16 }
+%"struct.std::__1::atomic.15" = type { %"struct.std::__1::__atomic_base.16" }
+%"struct.std::__1::__atomic_base.16" = type { %"struct.std::__1::__atomic_base.17" }
+%"struct.std::__1::__atomic_base.17" = type { %"struct.std::__1::__cxx_atomic_impl.18" }
+%"struct.std::__1::__cxx_atomic_impl.18" = type { %"struct.std::__1::__cxx_atomic_base_impl.19" }
+%"struct.std::__1::__cxx_atomic_base_impl.19" = type { i16 }
+%"struct.std::__1::atomic.20" = type { %"struct.std::__1::__atomic_base.21" }
+%"struct.std::__1::__atomic_base.21" = type { %"struct.std::__1::__atomic_base.22" }
+%"struct.std::__1::__atomic_base.22" = type { %"struct.std::__1::__cxx_atomic_impl.23" }
+%"struct.std::__1::__cxx_atomic_impl.23" = type { %"struct.std::__1::__cxx_atomic_base_impl.24" }
+%"struct.std::__1::__cxx_atomic_base_impl.24" = type { i32 }
+%"struct.std::__1::atomic.25" = type { %"struct.std::__1::__atomic_base.26" }
+%"struct.std::__1::__atomic_base.26" = type { %"struct.std::__1::__atomic_base.27" }
+%"struct.std::__1::__atomic_base.27" = type { %"struct.std::__1::__cxx_atomic_impl.28" }
+%"struct.std::__1::__cxx_atomic_impl.28" = type { %"struct.std::__1::__cxx_atomic_base_impl.29" }
+%"struct.std::__1::__cxx_atomic_base_impl.29" = type { i32 }
+%"struct.std::__1::atomic.30" = type { %"struct.std::__1::__atomic_base.31" }
+%"struct.std::__1::__atomic_base.31" = type { %"struct.std::__1::__atomic_base.32" }
+%"struct.std::__1::__atomic_base.32" = type { %"struct.std::__1::__cxx_atomic_impl.33" }
+%"struct.std::__1::__cxx_atomic_impl.33" = type { %"struct.std::__1::__cxx_atomic_base_impl.34" }
+%"struct.std::__1::__cxx_atomic_base_impl.34" = type { i64 }
+%"struct.std::__1::atomic.35" = type { %"struct.std::__1::__atomic_base.36" }
+%"struct.std::__1::__atomic_base.36" = type { %"struct.std::__1::__atomic_base.37" }
+%"struct.std::__1::__atomic_base.37" = type { %"struct.std::__1::__cxx_atomic_impl.38" }
+%"struct.std::__1::__cxx_atomic_impl.38" = type { %"struct.std::__1::__cxx_atomic_base_impl.39" }
+%"struct.std::__1::__cxx_atomic_base_impl.39" = type { i64 }
+%"struct.std::__1::atomic.40" = type { %"struct.std::__1::__atomic_base.41" }
+%"struct.std::__1::__atomic_base.41" = type { %"struct.std::__1::__atomic_base.42" }
+%"struct.std::__1::__atomic_base.42" = type { %"struct.std::__1::__cxx_atomic_impl.43" }
+%"struct.std::__1::__cxx_atomic_impl.43" = type { %"struct.std::__1::__cxx_atomic_base_impl.44" }
+%"struct.std::__1::__cxx_atomic_base_impl.44" = type { i128 }
+%"struct.std::__1::atomic.45" = type { %"struct.std::__1::__atomic_base.46" }
+%"struct.std::__1::__atomic_base.46" = type { %"struct.std::__1::__atomic_base.47" }
+%"struct.std::__1::__atomic_base.47" = type { %"struct.std::__1::__cxx_atomic_impl.48" }
+%"struct.std::__1::__cxx_atomic_impl.48" = type { %"struct.std::__1::__cxx_atomic_base_impl.49" }
+%"struct.std::__1::__cxx_atomic_base_impl.49" = type { i128 }
+
+ at gv_i1 = global %"struct.std::__1::atomic" zeroinitializer, align 4
+ at gv_i8 = global %"struct.std::__1::atomic.0" zeroinitializer, align 4
+ at gv_u8 = global %"struct.std::__1::atomic.5" zeroinitializer, align 4
+ at gv_i16 = global %"struct.std::__1::atomic.10" zeroinitializer, align 4
+ at gv_u16 = global %"struct.std::__1::atomic.15" zeroinitializer, align 4
+ at gv_i32 = global %"struct.std::__1::atomic.20" zeroinitializer, align 4
+ at gv_u32 = global %"struct.std::__1::atomic.25" zeroinitializer, align 4
+ at gv_i64 = global %"struct.std::__1::atomic.30" zeroinitializer, align 8
+ at gv_u64 = global %"struct.std::__1::atomic.35" zeroinitializer, align 8
+ at gv_i128 = global %"struct.std::__1::atomic.40" zeroinitializer, align 16
+ at gv_u128 = global %"struct.std::__1::atomic.45" zeroinitializer, align 16
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z22atomic_swap_relaxed_i1RNSt3__16atomicIbEEb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
+; CHECK-LABEL: _Z22atomic_swap_relaxed_i1RNSt3__16atomicIbEEb:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, 1, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = zext i1 %1 to i8
+  %4 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %0, i64 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw xchg i8* %4, i8 %3 monotonic
+  %6 = and i8 %5, 1
+  %7 = icmp ne i8 %6, 0
+  ret i1 %7
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z22atomic_swap_relaxed_i8RNSt3__16atomicIcEEc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
+; CHECK-LABEL: _Z22atomic_swap_relaxed_i8RNSt3__16atomicIcEEc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 56
+; CHECK-NEXT:    sra.l %s0, %s0, 56
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i8* %3, i8 %1 monotonic
+  ret i8 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z22atomic_swap_relaxed_u8RNSt3__16atomicIhEEh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
+; CHECK-LABEL: _Z22atomic_swap_relaxed_u8RNSt3__16atomicIhEEh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i8* %3, i8 %1 monotonic
+  ret i8 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z23atomic_swap_relaxed_i16RNSt3__16atomicIsEEs(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
+; CHECK-LABEL: _Z23atomic_swap_relaxed_i16RNSt3__16atomicIsEEs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 48
+; CHECK-NEXT:    sra.l %s0, %s0, 48
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i16* %3, i16 %1 monotonic
+  ret i16 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z23atomic_swap_relaxed_u16RNSt3__16atomicItEEt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
+; CHECK-LABEL: _Z23atomic_swap_relaxed_u16RNSt3__16atomicItEEt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (48)0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i16* %3, i16 %1 monotonic
+  ret i16 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z23atomic_swap_relaxed_i32RNSt3__16atomicIiEEi(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
+; CHECK-LABEL: _Z23atomic_swap_relaxed_i32RNSt3__16atomicIiEEi:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ts1am.w %s1, (%s0), 15
+; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i32* %3, i32 %1 monotonic
+  ret i32 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z23atomic_swap_relaxed_u32RNSt3__16atomicIjEEj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
+; CHECK-LABEL: _Z23atomic_swap_relaxed_u32RNSt3__16atomicIjEEj:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ts1am.w %s1, (%s0), 15
+; CHECK-NEXT:    adds.w.zx %s0, %s1, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i32* %3, i32 %1 monotonic
+  ret i32 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z23atomic_swap_relaxed_i64RNSt3__16atomicIlEEl(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z23atomic_swap_relaxed_i64RNSt3__16atomicIlEEl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s1, (%s0), %s2
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i64* %3, i64 %1 monotonic
+  ret i64 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z23atomic_swap_relaxed_u64RNSt3__16atomicImEEm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z23atomic_swap_relaxed_u64RNSt3__16atomicImEEm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s1, (%s0), %s2
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i64* %3, i64 %1 monotonic
+  ret i64 %4
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z24atomic_swap_relaxed_i128RNSt3__16atomicInEEn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z24atomic_swap_relaxed_i128RNSt3__16atomicInEEn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s5, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s1, 0, %s5
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast %"struct.std::__1::atomic.40"* %0 to i8*
+  call void @__atomic_exchange(i64 16, i8* nonnull %7, i8* nonnull %5, i8* nonnull %6, i32 signext 0)
+  %8 = load i128, i128* %4, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  ret i128 %8
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z24atomic_swap_relaxed_u128RNSt3__16atomicIoEEo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z24atomic_swap_relaxed_u128RNSt3__16atomicIoEEo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s5, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    or %s1, 0, %s5
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast %"struct.std::__1::atomic.45"* %0 to i8*
+  call void @__atomic_exchange(i64 16, i8* nonnull %7, i8* nonnull %5, i8* nonnull %6, i32 signext 0)
+  %8 = load i128, i128* %4, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  ret i128 %8
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z22atomic_swap_acquire_i1RNSt3__16atomicIbEEb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
+; CHECK-LABEL: _Z22atomic_swap_acquire_i1RNSt3__16atomicIbEEb:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, 1, %s0
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = zext i1 %1 to i8
+  %4 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %0, i64 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw xchg i8* %4, i8 %3 acquire
+  %6 = and i8 %5, 1
+  %7 = icmp ne i8 %6, 0
+  ret i1 %7
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z22atomic_swap_acquire_i8RNSt3__16atomicIcEEc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
+; CHECK-LABEL: _Z22atomic_swap_acquire_i8RNSt3__16atomicIcEEc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 56
+; CHECK-NEXT:    sra.l %s0, %s0, 56
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i8* %3, i8 %1 acquire
+  ret i8 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z22atomic_swap_acquire_u8RNSt3__16atomicIhEEh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
+; CHECK-LABEL: _Z22atomic_swap_acquire_u8RNSt3__16atomicIhEEh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i8* %3, i8 %1 acquire
+  ret i8 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z23atomic_swap_acquire_i16RNSt3__16atomicIsEEs(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
+; CHECK-LABEL: _Z23atomic_swap_acquire_i16RNSt3__16atomicIsEEs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 48
+; CHECK-NEXT:    sra.l %s0, %s0, 48
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i16* %3, i16 %1 acquire
+  ret i16 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z23atomic_swap_acquire_u16RNSt3__16atomicItEEt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
+; CHECK-LABEL: _Z23atomic_swap_acquire_u16RNSt3__16atomicItEEt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (48)0
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i16* %3, i16 %1 acquire
+  ret i16 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z23atomic_swap_acquire_i32RNSt3__16atomicIiEEi(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
+; CHECK-LABEL: _Z23atomic_swap_acquire_i32RNSt3__16atomicIiEEi:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ts1am.w %s1, (%s0), 15
+; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i32* %3, i32 %1 acquire
+  ret i32 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z23atomic_swap_acquire_u32RNSt3__16atomicIjEEj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
+; CHECK-LABEL: _Z23atomic_swap_acquire_u32RNSt3__16atomicIjEEj:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ts1am.w %s1, (%s0), 15
+; CHECK-NEXT:    adds.w.zx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i32* %3, i32 %1 acquire
+  ret i32 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z23atomic_swap_acquire_i64RNSt3__16atomicIlEEl(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z23atomic_swap_acquire_i64RNSt3__16atomicIlEEl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s1, (%s0), %s2
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i64* %3, i64 %1 acquire
+  ret i64 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z23atomic_swap_acquire_u64RNSt3__16atomicImEEm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z23atomic_swap_acquire_u64RNSt3__16atomicImEEm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s1, (%s0), %s2
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i64* %3, i64 %1 acquire
+  ret i64 %4
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z24atomic_swap_acquire_i128RNSt3__16atomicInEEn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z24atomic_swap_acquire_i128RNSt3__16atomicInEEn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s5, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 2, (0)1
+; CHECK-NEXT:    or %s1, 0, %s5
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast %"struct.std::__1::atomic.40"* %0 to i8*
+  call void @__atomic_exchange(i64 16, i8* nonnull %7, i8* nonnull %5, i8* nonnull %6, i32 signext 2)
+  %8 = load i128, i128* %4, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  ret i128 %8
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z24atomic_swap_acquire_u128RNSt3__16atomicIoEEo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z24atomic_swap_acquire_u128RNSt3__16atomicIoEEo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s5, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 2, (0)1
+; CHECK-NEXT:    or %s1, 0, %s5
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast %"struct.std::__1::atomic.45"* %0 to i8*
+  call void @__atomic_exchange(i64 16, i8* nonnull %7, i8* nonnull %5, i8* nonnull %6, i32 signext 2)
+  %8 = load i128, i128* %4, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  ret i128 %8
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z22atomic_swap_seq_cst_i1RNSt3__16atomicIbEEb(%"struct.std::__1::atomic"* nocapture nonnull align 1 dereferenceable(1) %0, i1 zeroext %1) {
+; CHECK-LABEL: _Z22atomic_swap_seq_cst_i1RNSt3__16atomicIbEEb:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, 1, %s0
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = zext i1 %1 to i8
+  %4 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %0, i64 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw xchg i8* %4, i8 %3 seq_cst
+  %6 = and i8 %5, 1
+  %7 = icmp ne i8 %6, 0
+  ret i1 %7
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z22atomic_swap_seq_cst_i8RNSt3__16atomicIcEEc(%"struct.std::__1::atomic.0"* nocapture nonnull align 1 dereferenceable(1) %0, i8 signext %1) {
+; CHECK-LABEL: _Z22atomic_swap_seq_cst_i8RNSt3__16atomicIcEEc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 56
+; CHECK-NEXT:    sra.l %s0, %s0, 56
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i8* %3, i8 %1 seq_cst
+  ret i8 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z22atomic_swap_seq_cst_u8RNSt3__16atomicIhEEh(%"struct.std::__1::atomic.5"* nocapture nonnull align 1 dereferenceable(1) %0, i8 zeroext %1) {
+; CHECK-LABEL: _Z22atomic_swap_seq_cst_u8RNSt3__16atomicIhEEh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i8* %3, i8 %1 seq_cst
+  ret i8 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z23atomic_swap_seq_cst_i16RNSt3__16atomicIsEEs(%"struct.std::__1::atomic.10"* nocapture nonnull align 2 dereferenceable(2) %0, i16 signext %1) {
+; CHECK-LABEL: _Z23atomic_swap_seq_cst_i16RNSt3__16atomicIsEEs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 48
+; CHECK-NEXT:    sra.l %s0, %s0, 48
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i16* %3, i16 %1 seq_cst
+  ret i16 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z23atomic_swap_seq_cst_u16RNSt3__16atomicItEEt(%"struct.std::__1::atomic.15"* nocapture nonnull align 2 dereferenceable(2) %0, i16 zeroext %1) {
+; CHECK-LABEL: _Z23atomic_swap_seq_cst_u16RNSt3__16atomicItEEt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    and %s2, 3, %s0
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s1, %s1, %s3
+; CHECK-NEXT:    and %s0, -4, %s0
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s1, (%s0), %s2
+; CHECK-NEXT:    and %s0, %s1, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (48)0
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i16* %3, i16 %1 seq_cst
+  ret i16 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z23atomic_swap_seq_cst_i32RNSt3__16atomicIiEEi(%"struct.std::__1::atomic.20"* nocapture nonnull align 4 dereferenceable(4) %0, i32 signext %1) {
+; CHECK-LABEL: _Z23atomic_swap_seq_cst_i32RNSt3__16atomicIiEEi:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    ts1am.w %s1, (%s0), 15
+; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i32* %3, i32 %1 seq_cst
+  ret i32 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z23atomic_swap_seq_cst_u32RNSt3__16atomicIjEEj(%"struct.std::__1::atomic.25"* nocapture nonnull align 4 dereferenceable(4) %0, i32 zeroext %1) {
+; CHECK-LABEL: _Z23atomic_swap_seq_cst_u32RNSt3__16atomicIjEEj:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    ts1am.w %s1, (%s0), 15
+; CHECK-NEXT:    adds.w.zx %s0, %s1, (0)1
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i32* %3, i32 %1 seq_cst
+  ret i32 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z23atomic_swap_seq_cst_i64RNSt3__16atomicIlEEl(%"struct.std::__1::atomic.30"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z23atomic_swap_seq_cst_i64RNSt3__16atomicIlEEl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s1, (%s0), %s2
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i64* %3, i64 %1 seq_cst
+  ret i64 %4
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z23atomic_swap_seq_cst_u64RNSt3__16atomicImEEm(%"struct.std::__1::atomic.35"* nocapture nonnull align 8 dereferenceable(8) %0, i64 %1) {
+; CHECK-LABEL: _Z23atomic_swap_seq_cst_u64RNSt3__16atomicImEEm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s1, (%s0), %s2
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %4 = atomicrmw xchg i64* %3, i64 %1 seq_cst
+  ret i64 %4
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z24atomic_swap_seq_cst_i128RNSt3__16atomicInEEn(%"struct.std::__1::atomic.40"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z24atomic_swap_seq_cst_i128RNSt3__16atomicInEEn:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s5, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 5, (0)1
+; CHECK-NEXT:    or %s1, 0, %s5
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast %"struct.std::__1::atomic.40"* %0 to i8*
+  call void @__atomic_exchange(i64 16, i8* nonnull %7, i8* nonnull %5, i8* nonnull %6, i32 signext 5)
+  %8 = load i128, i128* %4, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  ret i128 %8
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z24atomic_swap_seq_cst_u128RNSt3__16atomicIoEEo(%"struct.std::__1::atomic.45"* nonnull align 16 dereferenceable(16) %0, i128 %1) {
+; CHECK-LABEL: _Z24atomic_swap_seq_cst_u128RNSt3__16atomicIoEEo:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s5, 0, %s0
+; CHECK-NEXT:    st %s2, 264(, %s11)
+; CHECK-NEXT:    st %s1, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 5, (0)1
+; CHECK-NEXT:    or %s1, 0, %s5
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %3 = alloca i128, align 16
+  %4 = alloca i128, align 16
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  store i128 %1, i128* %3, align 16, !tbaa !2
+  %7 = bitcast %"struct.std::__1::atomic.45"* %0 to i8*
+  call void @__atomic_exchange(i64 16, i8* nonnull %7, i8* nonnull %5, i8* nonnull %6, i32 signext 5)
+  %8 = load i128, i128* %4, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  ret i128 %8
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i1 @_Z26atomic_swap_relaxed_stk_i1b(i1 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_stk_i1b:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    or %s1, 1, (0)1
+; CHECK-NEXT:    lea %s2, 8(, %s11)
+; CHECK-NEXT:    ts1am.w %s0, (%s2), %s1
+; CHECK-NEXT:    and %s0, 1, %s0
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic", align 1
+  %3 = getelementptr inbounds %"struct.std::__1::atomic", %"struct.std::__1::atomic"* %2, i64 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %3)
+  %4 = zext i1 %0 to i8
+  %5 = atomicrmw volatile xchg i8* %3, i8 %4 monotonic
+  %6 = and i8 %5, 1
+  %7 = icmp ne i8 %6, 0
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %3)
+  ret i1 %7
+}
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: nofree nounwind mustprogress
+define signext i8 @_Z26atomic_swap_relaxed_stk_i8c(i8 signext %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_stk_i8c:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    or %s1, 1, (0)1
+; CHECK-NEXT:    lea %s2, 8(, %s11)
+; CHECK-NEXT:    ts1am.w %s0, (%s2), %s1
+; CHECK-NEXT:    sll %s0, %s0, 56
+; CHECK-NEXT:    sra.l %s0, %s0, 56
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.0", align 1
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %3)
+  %4 = atomicrmw volatile xchg i8* %3, i8 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %3)
+  ret i8 %4
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i8 @_Z26atomic_swap_relaxed_stk_u8h(i8 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_stk_u8h:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    or %s1, 1, (0)1
+; CHECK-NEXT:    lea %s2, 8(, %s11)
+; CHECK-NEXT:    ts1am.w %s0, (%s2), %s1
+; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.5", align 1
+  %3 = getelementptr inbounds %"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %3)
+  %4 = atomicrmw volatile xchg i8* %3, i8 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %3)
+  ret i8 %4
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define signext i16 @_Z27atomic_swap_relaxed_stk_i16s(i16 signext %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_stk_i16s:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    or %s1, 3, (0)1
+; CHECK-NEXT:    lea %s2, 8(, %s11)
+; CHECK-NEXT:    ts1am.w %s0, (%s2), %s1
+; CHECK-NEXT:    sll %s0, %s0, 48
+; CHECK-NEXT:    sra.l %s0, %s0, 48
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.10", align 2
+  %3 = bitcast %"struct.std::__1::atomic.10"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %3)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw volatile xchg i16* %4, i16 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %3)
+  ret i16 %5
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i16 @_Z27atomic_swap_relaxed_stk_u16t(i16 zeroext %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_stk_u16t:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    or %s1, 3, (0)1
+; CHECK-NEXT:    lea %s2, 8(, %s11)
+; CHECK-NEXT:    ts1am.w %s0, (%s2), %s1
+; CHECK-NEXT:    and %s0, %s0, (48)0
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.15", align 2
+  %3 = bitcast %"struct.std::__1::atomic.15"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %3)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw volatile xchg i16* %4, i16 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %3)
+  ret i16 %5
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define signext i32 @_Z27atomic_swap_relaxed_stk_i32i(i32 signext %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_stk_i32i:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    ts1am.w %s0, 8(%s11), 15
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.20", align 4
+  %3 = bitcast %"struct.std::__1::atomic.20"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %3)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw volatile xchg i32* %4, i32 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3)
+  ret i32 %5
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define zeroext i32 @_Z27atomic_swap_relaxed_stk_u32j(i32 zeroext %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_stk_u32j:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    ts1am.w %s0, 8(%s11), 15
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.25", align 4
+  %3 = bitcast %"struct.std::__1::atomic.25"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %3)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw volatile xchg i32* %4, i32 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3)
+  ret i32 %5
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define i64 @_Z27atomic_swap_relaxed_stk_i64l(i64 %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_stk_i64l:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s1, 255
+; CHECK-NEXT:    ts1am.l %s0, 8(%s11), %s1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.30", align 8
+  %3 = bitcast %"struct.std::__1::atomic.30"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %3)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw volatile xchg i64* %4, i64 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %3)
+  ret i64 %5
+}
+
+; Function Attrs: nofree nounwind mustprogress
+define i64 @_Z27atomic_swap_relaxed_stk_u64m(i64 %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_stk_u64m:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s1, 255
+; CHECK-NEXT:    ts1am.l %s0, 8(%s11), %s1
+; CHECK-NEXT:    adds.l %s11, 16, %s11
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = alloca %"struct.std::__1::atomic.35", align 8
+  %3 = bitcast %"struct.std::__1::atomic.35"* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %3)
+  %4 = getelementptr inbounds %"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %5 = atomicrmw volatile xchg i64* %4, i64 %0 monotonic
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %3)
+  ret i64 %5
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_swap_relaxed_stk_i128n(i128 %0) {
+; CHECK-LABEL: _Z28atomic_swap_relaxed_stk_i128n:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 280(, %s11)
+; CHECK-NEXT:    st %s0, 272(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s2, 272(, %s11)
+; CHECK-NEXT:    lea %s3, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 264(, %s11)
+; CHECK-NEXT:    ld %s0, 256(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = alloca i128, align 16
+  %4 = alloca %"struct.std::__1::atomic.40", align 16
+  %5 = bitcast %"struct.std::__1::atomic.40"* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  %7 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %7)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_exchange(i64 16, i8* nonnull %5, i8* nonnull %6, i8* nonnull %7, i32 signext 0)
+  %8 = load i128, i128* %3, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %7)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  ret i128 %8
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z28atomic_swap_relaxed_stk_u128o(i128 %0) {
+; CHECK-LABEL: _Z28atomic_swap_relaxed_stk_u128o:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 280(, %s11)
+; CHECK-NEXT:    st %s0, 272(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s1, 240(, %s11)
+; CHECK-NEXT:    lea %s2, 272(, %s11)
+; CHECK-NEXT:    lea %s3, 256(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 264(, %s11)
+; CHECK-NEXT:    ld %s0, 256(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = alloca i128, align 16
+  %4 = alloca %"struct.std::__1::atomic.45", align 16
+  %5 = bitcast %"struct.std::__1::atomic.45"* %4 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  %6 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %6)
+  %7 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %7)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_exchange(i64 16, i8* nonnull %5, i8* nonnull %6, i8* nonnull %7, i32 signext 0)
+  %8 = load i128, i128* %3, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %6)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %7)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  ret i128 %8
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i1 @_Z25atomic_swap_relaxed_gv_i1b(i1 zeroext %0) {
+; CHECK-LABEL: _Z25atomic_swap_relaxed_gv_i1b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i1 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i1 at hi(, %s1)
+; CHECK-NEXT:    and %s2, 3, %s1
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s0, %s0, %s3
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s0, (%s1), %s2
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, 1, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = zext i1 %0 to i8
+  %3 = atomicrmw xchg i8* getelementptr inbounds (%"struct.std::__1::atomic", %"struct.std::__1::atomic"* @gv_i1, i64 0, i32 0, i32 0, i32 0, i32 0), i8 %2 monotonic
+  %4 = and i8 %3, 1
+  %5 = icmp ne i8 %4, 0
+  ret i1 %5
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i8 @_Z25atomic_swap_relaxed_gv_i8c(i8 signext %0) {
+; CHECK-LABEL: _Z25atomic_swap_relaxed_gv_i8c:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i8 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i8 at hi(, %s1)
+; CHECK-NEXT:    and %s2, 3, %s1
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s0, %s0, %s3
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s0, (%s1), %s2
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 56
+; CHECK-NEXT:    sra.l %s0, %s0, 56
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i8* getelementptr inbounds (%"struct.std::__1::atomic.0", %"struct.std::__1::atomic.0"* @gv_i8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 %0 monotonic
+  ret i8 %2
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i8 @_Z25atomic_swap_relaxed_gv_u8h(i8 zeroext %0) {
+; CHECK-LABEL: _Z25atomic_swap_relaxed_gv_u8h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u8 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u8 at hi(, %s1)
+; CHECK-NEXT:    and %s2, 3, %s1
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s0, %s0, %s3
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    sla.w.sx %s2, (63)0, %s2
+; CHECK-NEXT:    ts1am.w %s0, (%s1), %s2
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i8* getelementptr inbounds (%"struct.std::__1::atomic.5", %"struct.std::__1::atomic.5"* @gv_u8, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i8 %0 monotonic
+  ret i8 %2
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i16 @_Z26atomic_swap_relaxed_gv_i16s(i16 signext %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_gv_i16s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i16 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i16 at hi(, %s1)
+; CHECK-NEXT:    and %s2, 3, %s1
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s0, %s0, %s3
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s0, (%s1), %s2
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    sll %s0, %s0, 48
+; CHECK-NEXT:    sra.l %s0, %s0, 48
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i16* getelementptr inbounds (%"struct.std::__1::atomic.10", %"struct.std::__1::atomic.10"* @gv_i16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i16 %0 monotonic
+  ret i16 %2
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i16 @_Z26atomic_swap_relaxed_gv_u16t(i16 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_gv_u16t:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u16 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u16 at hi(, %s1)
+; CHECK-NEXT:    and %s2, 3, %s1
+; CHECK-NEXT:    sla.w.sx %s3, %s2, 3
+; CHECK-NEXT:    sla.w.sx %s0, %s0, %s3
+; CHECK-NEXT:    and %s1, -4, %s1
+; CHECK-NEXT:    sla.w.sx %s2, (62)0, %s2
+; CHECK-NEXT:    ts1am.w %s0, (%s1), %s2
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    srl %s0, %s0, %s3
+; CHECK-NEXT:    and %s0, %s0, (48)0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i16* getelementptr inbounds (%"struct.std::__1::atomic.15", %"struct.std::__1::atomic.15"* @gv_u16, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i16 %0 monotonic
+  ret i16 %2
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define signext i32 @_Z26atomic_swap_relaxed_gv_i32i(i32 signext %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_gv_i32i:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i32 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i32 at hi(, %s1)
+; CHECK-NEXT:    ts1am.w %s0, (%s1), 15
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i32* getelementptr inbounds (%"struct.std::__1::atomic.20", %"struct.std::__1::atomic.20"* @gv_i32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i32 %0 monotonic
+  ret i32 %2
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define zeroext i32 @_Z26atomic_swap_relaxed_gv_u32j(i32 zeroext %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_gv_u32j:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u32 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u32 at hi(, %s1)
+; CHECK-NEXT:    ts1am.w %s0, (%s1), 15
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i32* getelementptr inbounds (%"struct.std::__1::atomic.25", %"struct.std::__1::atomic.25"* @gv_u32, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i32 %0 monotonic
+  ret i32 %2
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z26atomic_swap_relaxed_gv_i64l(i64 %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_gv_i64l:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_i64 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i64 at hi(, %s1)
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s0, (%s1), %s2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i64* getelementptr inbounds (%"struct.std::__1::atomic.30", %"struct.std::__1::atomic.30"* @gv_i64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 %0 monotonic
+  ret i64 %2
+}
+
+; Function Attrs: nofree norecurse nounwind mustprogress
+define i64 @_Z26atomic_swap_relaxed_gv_u64m(i64 %0) {
+; CHECK-LABEL: _Z26atomic_swap_relaxed_gv_u64m:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, gv_u64 at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u64 at hi(, %s1)
+; CHECK-NEXT:    lea %s2, 255
+; CHECK-NEXT:    ts1am.l %s0, (%s1), %s2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = atomicrmw xchg i64* getelementptr inbounds (%"struct.std::__1::atomic.35", %"struct.std::__1::atomic.35"* @gv_u64, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0), i64 %0 monotonic
+  ret i64 %2
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z27atomic_swap_relaxed_gv_i128n(i128 %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_gv_i128n:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 264(, %s11)
+; CHECK-NEXT:    st %s0, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_i128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_i128 at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = alloca i128, align 16
+  %4 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_exchange(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.40"* @gv_i128 to i8*), i8* nonnull %4, i8* nonnull %5, i32 signext 0)
+  %6 = load i128, i128* %3, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  ret i128 %6
+}
+
+; Function Attrs: nounwind mustprogress
+define i128 @_Z27atomic_swap_relaxed_gv_u128o(i128 %0) {
+; CHECK-LABEL: _Z27atomic_swap_relaxed_gv_u128o:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s1, 264(, %s11)
+; CHECK-NEXT:    st %s0, 256(, %s11)
+; CHECK-NEXT:    lea %s0, __atomic_exchange at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, __atomic_exchange at hi(, %s0)
+; CHECK-NEXT:    lea %s0, gv_u128 at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s1, gv_u128 at hi(, %s0)
+; CHECK-NEXT:    lea %s2, 256(, %s11)
+; CHECK-NEXT:    lea %s3, 240(, %s11)
+; CHECK-NEXT:    or %s0, 16, (0)1
+; CHECK-NEXT:    or %s4, 0, (0)1
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    ld %s1, 248(, %s11)
+; CHECK-NEXT:    ld %s0, 240(, %s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %2 = alloca i128, align 16
+  %3 = alloca i128, align 16
+  %4 = bitcast i128* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %4)
+  %5 = bitcast i128* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5)
+  store i128 %0, i128* %2, align 16, !tbaa !2
+  call void @__atomic_exchange(i64 16, i8* nonnull bitcast (%"struct.std::__1::atomic.45"* @gv_u128 to i8*), i8* nonnull %4, i8* nonnull %5, i32 signext 0)
+  %6 = load i128, i128* %3, align 16, !tbaa !2
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %4)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5)
+  ret i128 %6
+}
+
+; Function Attrs: nounwind willreturn
+declare void @__atomic_exchange(i64, i8*, i8*, i8*, i32)
+
+!2 = !{!3, !3, i64 0}
+!3 = !{!"__int128", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}


        


More information about the llvm-commits mailing list