[llvm] de79233 - [X86] Complete preservation of !pcsections in X86ISelLowering
Marco Elver via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 12 06:09:51 PDT 2023
Author: Marco Elver
Date: 2023-07-12T15:09:31+02:00
New Revision: de79233b2e14a79d6526f4e74f35f88c8fa97296
URL: https://github.com/llvm/llvm-project/commit/de79233b2e14a79d6526f4e74f35f88c8fa97296
DIFF: https://github.com/llvm/llvm-project/commit/de79233b2e14a79d6526f4e74f35f88c8fa97296.diff
LOG: [X86] Complete preservation of !pcsections in X86ISelLowering
https://reviews.llvm.org/D130883 introduced MIMetadata to simplify
metadata propagation (DebugLoc and PCSections).
However, we're currently still permitting implicit conversion of
DebugLoc to MIMetadata, to allow for a gradual transition and let the
old code work as-is.
This manifests in lost !pcsections metadata for X86-specific lowerings.
For example, 128-bit atomics.
Fix the situation for X86ISelLowering by converting all BuildMI() calls
to use an explicitly constructed MIMetadata.
Reviewed By: dvyukov
Differential Revision: https://reviews.llvm.org/D154986
Added:
Modified:
llvm/include/llvm/CodeGen/MachineInstrBuilder.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pcsections-atomics.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index c35bdc0c2b44a1..c6f3a84809fa70 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -9,7 +9,8 @@
// This file exposes a function named BuildMI, which is useful for dramatically
// simplifying how MachineInstr's are created. It allows use of code like this:
//
-// M = BuildMI(MBB, MI, DL, TII.get(X86::ADD8rr), Dst)
+// MIMetadata MIMD(MI); // Propagates DebugLoc and other metadata
+// M = BuildMI(MBB, MI, MIMD, TII.get(X86::ADD8rr), Dst)
// .addReg(argVal1)
// .addReg(argVal2);
//
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a2063dfedc7424..d9e01c4ac2fa87 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2478,10 +2478,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
- if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
- DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
- MVT::Other, getControlRoot(),
- DAG.getBasicBlock(Succ0MBB)));
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) {
+ auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(Succ0MBB));
+ setValue(&I, Br);
+ DAG.setRoot(Br);
+ }
return;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 75254d23059b55..e87948d0a54bf2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36263,7 +36263,7 @@ static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
/// Utility function to emit xbegin specifying the start of an RTM region.
static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
@@ -36313,28 +36313,28 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
// xbegin fallMBB
// # fallthrough to mainMBB
// # abortion to fallMBB
- BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
+ BuildMI(thisMBB, MIMD, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(fallMBB);
// mainMBB:
// mainDstReg := -1
- BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
- BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
+ BuildMI(mainMBB, MIMD, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
+ BuildMI(mainMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
mainMBB->addSuccessor(sinkMBB);
// fallMBB:
// ; pseudo instruction to model hardware's definition from XABORT
// EAX := XABORT_DEF
// fallDstReg := EAX
- BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF));
- BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg)
+ BuildMI(fallMBB, MIMD, TII->get(X86::XABORT_DEF));
+ BuildMI(fallMBB, MIMD, TII->get(TargetOpcode::COPY), fallDstReg)
.addReg(X86::EAX);
fallMBB->addSuccessor(sinkMBB);
// sinkMBB:
// DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
- BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
+ BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
.addReg(mainDstReg).addMBB(mainMBB)
.addReg(fallDstReg).addMBB(fallMBB);
@@ -36387,7 +36387,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
const TargetRegisterClass *AddrRegClass =
getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout()));
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
// struct va_list {
// i32 gp_offset
@@ -36471,7 +36471,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Load the offset value into a register
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
- BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ BuildMI(thisMBB, MIMD, TII->get(X86::MOV32rm), OffsetReg)
.add(Base)
.add(Scale)
.add(Index)
@@ -36480,13 +36480,13 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.setMemRefs(LoadOnlyMMO);
// Check if there is enough room left to pull this argument.
- BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ BuildMI(thisMBB, MIMD, TII->get(X86::CMP32ri))
.addReg(OffsetReg)
.addImm(MaxOffset + 8 - ArgSizeA8);
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
- BuildMI(thisMBB, DL, TII->get(X86::JCC_1))
+ BuildMI(thisMBB, MIMD, TII->get(X86::JCC_1))
.addMBB(overflowMBB).addImm(X86::COND_AE);
}
@@ -36497,7 +36497,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Read the reg_save_area address.
Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(
- offsetMBB, DL,
+ offsetMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
RegSaveReg)
.add(Base)
@@ -36510,30 +36510,30 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
if (Subtarget.isTarget64BitLP64()) {
// Zero-extend the offset
Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
.addImm(0)
.addReg(OffsetReg)
.addImm(X86::sub_32bit);
// Add the offset to the reg_save_area to get the final address.
- BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::ADD64rr), OffsetDestReg)
.addReg(OffsetReg64)
.addReg(RegSaveReg);
} else {
// Add the offset to the reg_save_area to get the final address.
- BuildMI(offsetMBB, DL, TII->get(X86::ADD32rr), OffsetDestReg)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32rr), OffsetDestReg)
.addReg(OffsetReg)
.addReg(RegSaveReg);
}
// Compute the offset for the next argument
Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
- BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32ri), NextOffsetReg)
.addReg(OffsetReg)
.addImm(UseFPOffset ? 16 : 8);
// Store it back into the va_list.
- BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ BuildMI(offsetMBB, MIMD, TII->get(X86::MOV32mr))
.add(Base)
.add(Scale)
.add(Index)
@@ -36543,7 +36543,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.setMemRefs(StoreOnlyMMO);
// Jump to endMBB
- BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
+ BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1))
.addMBB(endMBB);
}
@@ -36553,7 +36553,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Load the overflow_area address into a register.
Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(overflowMBB, DL,
+ BuildMI(overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
OverflowAddrReg)
.add(Base)
@@ -36571,20 +36571,20 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// aligned_addr = (addr + (align-1)) & ~(align-1)
BuildMI(
- overflowMBB, DL,
+ overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
TmpReg)
.addReg(OverflowAddrReg)
.addImm(Alignment.value() - 1);
BuildMI(
- overflowMBB, DL,
+ overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri),
OverflowDestReg)
.addReg(TmpReg)
.addImm(~(uint64_t)(Alignment.value() - 1));
} else {
- BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ BuildMI(overflowMBB, MIMD, TII->get(TargetOpcode::COPY), OverflowDestReg)
.addReg(OverflowAddrReg);
}
@@ -36592,14 +36592,14 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// (the overflow address should be kept 8-byte aligned)
Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(
- overflowMBB, DL,
+ overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
NextAddrReg)
.addReg(OverflowDestReg)
.addImm(ArgSizeA8);
// Store the new overflow address.
- BuildMI(overflowMBB, DL,
+ BuildMI(overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr))
.add(Base)
.add(Scale)
@@ -36611,7 +36611,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
- BuildMI(*endMBB, endMBB->begin(), DL,
+ BuildMI(*endMBB, endMBB->begin(), MIMD,
TII->get(X86::PHI), DestReg)
.addReg(OffsetDestReg).addMBB(offsetMBB)
.addReg(OverflowDestReg).addMBB(overflowMBB);
@@ -36688,7 +36688,7 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB(
MachineBasicBlock *SinkMBB) {
MachineFunction *MF = TrueMBB->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- const DebugLoc &DL = MIItBegin->getDebugLoc();
+ const MIMetadata MIMD(*MIItBegin);
X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm());
X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
@@ -36721,11 +36721,12 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB(
if (RegRewriteTable.contains(Op2Reg))
Op2Reg = RegRewriteTable[Op2Reg].second;
- MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
- .addReg(Op1Reg)
- .addMBB(FalseMBB)
- .addReg(Op2Reg)
- .addMBB(TrueMBB);
+ MIB =
+ BuildMI(*SinkMBB, SinkInsertionPoint, MIMD, TII->get(X86::PHI), DestReg)
+ .addReg(Op1Reg)
+ .addMBB(FalseMBB)
+ .addReg(Op2Reg)
+ .addMBB(TrueMBB);
// Add this PHI to the rewrite table.
RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
@@ -36740,7 +36741,7 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
MachineInstr &SecondCascadedCMOV,
MachineBasicBlock *ThisMBB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = FirstCMOV.getDebugLoc();
+ const MIMetadata MIMD(FirstCMOV);
// We lower cascaded CMOVs such as
//
@@ -36857,11 +36858,13 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
// Create the conditional branch instructions.
X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm());
- BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);
+ BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);
X86::CondCode SecondCC =
X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm());
- BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC);
+ BuildMI(FirstInsertedMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(SinkMBB)
+ .addImm(SecondCC);
// SinkMBB:
// %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
@@ -36869,7 +36872,7 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
Register Op1Reg = FirstCMOV.getOperand(1).getReg();
Register Op2Reg = FirstCMOV.getOperand(2).getReg();
MachineInstrBuilder MIB =
- BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg)
+ BuildMI(*SinkMBB, SinkMBB->begin(), MIMD, TII->get(X86::PHI), DestReg)
.addReg(Op1Reg)
.addMBB(SecondInsertedMBB)
.addReg(Op2Reg)
@@ -36890,7 +36893,7 @@ MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
MachineBasicBlock *ThisMBB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -37008,7 +37011,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
FalseMBB->addSuccessor(SinkMBB);
// Create the conditional branch instruction.
- BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
+ BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
// SinkMBB:
// %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ]
@@ -37037,7 +37040,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
const unsigned ProbeSize = getStackProbeSize(*MF);
@@ -37061,23 +37064,23 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
Register FinalStackPtr = MRI.createVirtualRegister(
TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);
- BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr)
+ BuildMI(*MBB, {MI}, MIMD, TII->get(TargetOpcode::COPY), TmpStackPtr)
.addReg(physSPReg);
{
const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr;
- BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr)
+ BuildMI(*MBB, {MI}, MIMD, TII->get(Opc), FinalStackPtr)
.addReg(TmpStackPtr)
.addReg(sizeVReg);
}
// test rsp size
- BuildMI(testMBB, DL,
+ BuildMI(testMBB, MIMD,
TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
.addReg(FinalStackPtr)
.addReg(physSPReg);
- BuildMI(testMBB, DL, TII->get(X86::JCC_1))
+ BuildMI(testMBB, MIMD, TII->get(X86::JCC_1))
.addMBB(tailMBB)
.addImm(X86::COND_GE);
testMBB->addSuccessor(blockMBB);
@@ -37097,20 +37100,20 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
const unsigned XORMIOpc =
TFI.Uses64BitFramePtr ? X86::XOR64mi32 : X86::XOR32mi;
- addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
+ addRegOffset(BuildMI(blockMBB, MIMD, TII->get(XORMIOpc)), physSPReg, false, 0)
.addImm(0);
- BuildMI(blockMBB, DL,
- TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr)), physSPReg)
+ BuildMI(blockMBB, MIMD, TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr)),
+ physSPReg)
.addReg(physSPReg)
.addImm(ProbeSize);
-
- BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
+ BuildMI(blockMBB, MIMD, TII->get(X86::JMP_1)).addMBB(testMBB);
blockMBB->addSuccessor(testMBB);
// Replace original instruction by the expected stack ptr
- BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
+ BuildMI(tailMBB, MIMD, TII->get(TargetOpcode::COPY),
+ MI.getOperand(0).getReg())
.addReg(FinalStackPtr);
tailMBB->splice(tailMBB->end(), MBB,
@@ -37130,7 +37133,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
@@ -37185,58 +37188,58 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
- BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
- BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
+ BuildMI(BB, MIMD, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
- BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
- BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
+ BuildMI(BB, MIMD, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
- BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
+ BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), physSPReg)
.addReg(SPLimitVReg);
- BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
+ BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
.addReg(SPLimitVReg);
- BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
+ BuildMI(bumpMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
- BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
- BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
} else if (Is64Bit) {
- BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI)
.addReg(sizeVReg);
- BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EDI, RegState::Implicit)
.addReg(X86::EAX, RegState::ImplicitDefine);
} else {
- BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
- BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
- BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
+ BuildMI(mallocMBB, MIMD, TII->get(X86::PUSH32r)).addReg(sizeVReg);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::CALLpcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
- BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
.addImm(16);
- BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
+ BuildMI(mallocMBB, MIMD, TII->get(TargetOpcode::COPY), mallocPtrVReg)
.addReg(IsLP64 ? X86::RAX : X86::EAX);
- BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Set up the CFG correctly.
BB->addSuccessor(bumpMBB);
@@ -37245,7 +37248,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
bumpMBB->addSuccessor(continueMBB);
// Take care of the PHI nodes.
- BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
+ BuildMI(*continueMBB, continueMBB->begin(), MIMD, TII->get(X86::PHI),
MI.getOperand(0).getReg())
.addReg(mallocPtrVReg)
.addMBB(mallocMBB)
@@ -37265,7 +37268,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
MachineFunction *MF = BB->getParent();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
assert(!isAsynchronousEHPersonality(
classifyEHPersonality(MF->getFunction().getPersonalityFn())) &&
@@ -37290,7 +37293,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
RestoreMBB->setIsEHPad(true);
auto RestoreMBBI = RestoreMBB->begin();
- BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
+ BuildMI(*RestoreMBB, RestoreMBBI, MIMD, TII.get(X86::JMP_4)).addMBB(TargetMBB);
return BB;
}
@@ -37303,13 +37306,13 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
// inside MC, therefore without the two markers shrink-wrapping
// may push the prologue/epilogue pass them.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction &MF = *BB->getParent();
// Emit CALLSEQ_START right before the instruction.
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
MachineInstrBuilder CallseqStart =
- BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
+ BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
// Emit CALLSEQ_END right after the instruction.
@@ -37317,7 +37320,7 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
// original instruction around.
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
MachineInstrBuilder CallseqEnd =
- BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
+ BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0);
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
return BB;
@@ -37332,7 +37335,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
// be in the normal return register.
MachineFunction *F = BB->getParent();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
assert(MI.getOperand(3).isGlobal() && "This should be a global");
@@ -37346,38 +37349,38 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (!isPositionIndependent()) {
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else {
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
}
@@ -37471,7 +37474,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
MachineBasicBlock *BB) const {
// Copy the virtual register into the R11 physical register and
// call the retpoline thunk.
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const X86InstrInfo *TII = Subtarget.getInstrInfo();
Register CalleeVReg = MI.getOperand(0).getReg();
unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
@@ -37510,7 +37513,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
MI.getOperand(0).ChangeToES(Symbol);
MI.setDesc(TII->get(Opc));
@@ -37533,7 +37536,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
/// \param [in] MBB The Machine Basic Block that will be modified.
void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -37548,7 +37551,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
Register ZReg = MRI.createVirtualRegister(PtrRC);
unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
- BuildMI(*MBB, MI, DL, TII->get(XorRROpc))
+ BuildMI(*MBB, MI, MIMD, TII->get(XorRROpc))
.addDef(ZReg)
.addReg(ZReg, RegState::Undef)
.addReg(ZReg, RegState::Undef);
@@ -37556,11 +37559,11 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
// Read the current SSP Register value to the zeroed register.
Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
- BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+ BuildMI(*MBB, MI, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
// Write the SSP register value to offset 3 in input memory buffer.
unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
- MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc));
+ MIB = BuildMI(*MBB, MI, MIMD, TII->get(PtrStoreOpc));
const int64_t SSPOffset = 3 * PVT.getStoreSize();
const unsigned MemOpndSlot = 1;
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
@@ -37576,7 +37579,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -37652,7 +37655,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
LabelReg = MRI.createVirtualRegister(PtrRC);
if (Subtarget.is64Bit()) {
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
@@ -37660,7 +37663,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
.addReg(0);
} else {
const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg)
.addReg(XII->getGlobalBaseReg(MF))
.addImm(0)
.addReg(0)
@@ -37670,7 +37673,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
} else
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
// Store IP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrStoreOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
@@ -37688,7 +37691,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
}
// Setup
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -37698,14 +37701,15 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// mainMBB:
// EAX = 0
- BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
+ BuildMI(mainMBB, MIMD, TII->get(X86::MOV32r0), mainDstReg);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(X86::PHI), DstReg)
- .addReg(mainDstReg).addMBB(mainMBB)
- .addReg(restoreDstReg).addMBB(restoreMBB);
+ BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
+ .addReg(mainDstReg)
+ .addMBB(mainMBB)
+ .addReg(restoreDstReg)
+ .addMBB(restoreMBB);
// restoreMBB:
if (RegInfo->hasBasePointer(*MF)) {
@@ -37716,12 +37720,12 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
Register FramePtr = RegInfo->getFrameRegister(*MF);
Register BasePtr = RegInfo->getBaseRegister();
unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
- addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
+ addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
.setMIFlag(MachineInstr::FrameSetup);
}
- BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
- BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
+ BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+ BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
restoreMBB->addSuccessor(sinkMBB);
MI.eraseFromParent();
@@ -37736,7 +37740,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -37797,11 +37801,11 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Initialize a register with zero.
Register ZReg = MRI.createVirtualRegister(&X86::GR32RegClass);
- BuildMI(checkSspMBB, DL, TII->get(X86::MOV32r0), ZReg);
+ BuildMI(checkSspMBB, MIMD, TII->get(X86::MOV32r0), ZReg);
if (PVT == MVT::i64) {
Register TmpZReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(checkSspMBB, DL, TII->get(X86::SUBREG_TO_REG), TmpZReg)
+ BuildMI(checkSspMBB, MIMD, TII->get(X86::SUBREG_TO_REG), TmpZReg)
.addImm(0)
.addReg(ZReg)
.addImm(X86::sub_32bit);
@@ -37811,15 +37815,17 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Read the current SSP Register value to the zeroed register.
Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
- BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+ BuildMI(checkSspMBB, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
// Check whether the result of the SSP register is zero and jump directly
// to the sink.
unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
- BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
+ BuildMI(checkSspMBB, MIMD, TII->get(TestRROpc))
.addReg(SSPCopyReg)
.addReg(SSPCopyReg);
- BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
+ BuildMI(checkSspMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(sinkMBB)
+ .addImm(X86::COND_E);
checkSspMBB->addSuccessor(sinkMBB);
checkSspMBB->addSuccessor(fallMBB);
@@ -37828,7 +37834,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
const int64_t SPPOffset = 3 * PVT.getStoreSize();
MachineInstrBuilder MIB =
- BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
+ BuildMI(fallMBB, MIMD, TII->get(PtrLoadOpc), PrevSSPReg);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
@@ -37844,12 +37850,14 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Subtract the current SSP from the previous SSP.
Register SspSubReg = MRI.createVirtualRegister(PtrRC);
unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
- BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg)
+ BuildMI(fallMBB, MIMD, TII->get(SubRROpc), SspSubReg)
.addReg(PrevSSPReg)
.addReg(SSPCopyReg);
// Jump to sink in case PrevSSPReg <= SSPCopyReg.
- BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE);
+ BuildMI(fallMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(sinkMBB)
+ .addImm(X86::COND_BE);
fallMBB->addSuccessor(sinkMBB);
fallMBB->addSuccessor(fixShadowMBB);
@@ -37857,36 +37865,38 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg)
+ BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspFirstShrReg)
.addReg(SspSubReg)
.addImm(Offset);
// Increase SSP when looking only on the lower 8 bits of the delta.
unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
- BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
+ BuildMI(fixShadowMBB, MIMD, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
// Reset the lower 8 bits.
Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg)
+ BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspSecondShrReg)
.addReg(SspFirstShrReg)
.addImm(8);
// Jump if the result of the shift is zero.
- BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
+ BuildMI(fixShadowMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(sinkMBB)
+ .addImm(X86::COND_E);
fixShadowMBB->addSuccessor(sinkMBB);
fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
// Do a single shift left.
unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64ri : X86::SHL32ri;
Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg)
+ BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(ShlR1Opc), SspAfterShlReg)
.addReg(SspSecondShrReg)
.addImm(1);
// Save the value 128 to a register (will be used next with incssp).
Register Value128InReg = MRI.createVirtualRegister(PtrRC);
unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
- BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg)
+ BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(MovRIOpc), Value128InReg)
.addImm(128);
fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB);
@@ -37894,21 +37904,23 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// iterations of incssp until we finish fixing the shadow stack.
Register DecReg = MRI.createVirtualRegister(PtrRC);
Register CounterReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg)
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::PHI), CounterReg)
.addReg(SspAfterShlReg)
.addMBB(fixShadowLoopPrepareMBB)
.addReg(DecReg)
.addMBB(fixShadowLoopMBB);
// Every iteration we increase the SSP by 128.
- BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg);
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(IncsspOpc)).addReg(Value128InReg);
// Every iteration we decrement the counter by 1.
unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
- BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(DecROpc), DecReg).addReg(CounterReg);
// Jump if the counter is not zero yet.
- BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE);
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(fixShadowLoopMBB)
+ .addImm(X86::COND_NE);
fixShadowLoopMBB->addSuccessor(sinkMBB);
fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
@@ -37918,7 +37930,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -37955,7 +37967,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
}
// Reload FP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg()) // Don't add the whole operand, we don't want to
@@ -37967,7 +37979,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.setMemRefs(MMOs);
// Reload IP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
@@ -37981,7 +37993,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.setMemRefs(MMOs);
// Reload SP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
@@ -37992,7 +38004,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.setMemRefs(MMOs);
// Jump
- BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+ BuildMI(*thisMBB, MI, MIMD, TII->get(IJmpOpc)).addReg(Tmp);
MI.eraseFromParent();
return thisMBB;
@@ -38002,7 +38014,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB,
int FI) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
@@ -38025,14 +38037,14 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
if (Subtarget.is64Bit())
- BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR)
+ BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA64r), VR)
.addReg(X86::RIP)
.addImm(1)
.addReg(0)
.addMBB(DispatchBB)
.addReg(0);
else
- BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR)
+ BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA32r), VR)
.addReg(0) /* TII->getGlobalBaseReg(MF) */
.addImm(1)
.addReg(0)
@@ -38040,7 +38052,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
.addReg(0);
}
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op));
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MIMD, TII->get(Op));
addFrameReference(MIB, FI, Subtarget.is64Bit() ? 56 : 36);
if (UseImmLabel)
MIB.addMBB(DispatchBB);
@@ -38051,7 +38063,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *BB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = BB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
@@ -38106,7 +38118,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
DispatchBB->setIsEHPad(true);
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
- BuildMI(TrapBB, DL, TII->get(X86::TRAP));
+ BuildMI(TrapBB, MIMD, TII->get(X86::TRAP));
DispatchBB->addSuccessor(TrapBB);
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
@@ -38138,36 +38150,38 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
Register FP = RI.getFrameRegister(*MF);
Register BP = RI.getBaseRegister();
unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
- addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true,
+ addRegOffset(BuildMI(DispatchBB, MIMD, TII->get(Op), BP), FP, true,
MFI->getRestoreBasePointerOffset())
.addRegMask(RI.getNoPreservedMask());
} else {
- BuildMI(DispatchBB, DL, TII->get(X86::NOOP))
+ BuildMI(DispatchBB, MIMD, TII->get(X86::NOOP))
.addRegMask(RI.getNoPreservedMask());
}
// IReg is used as an index in a memory operand and therefore can't be SP
Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);
- addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI,
+ addFrameReference(BuildMI(DispatchBB, MIMD, TII->get(X86::MOV32rm), IReg), FI,
Subtarget.is64Bit() ? 8 : 4);
- BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
+ BuildMI(DispatchBB, MIMD, TII->get(X86::CMP32ri))
.addReg(IReg)
.addImm(LPadList.size());
- BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE);
+ BuildMI(DispatchBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(TrapBB)
+ .addImm(X86::COND_AE);
if (Subtarget.is64Bit()) {
Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
// leaq .LJTI0_0(%rip), BReg
- BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg)
+ BuildMI(DispContBB, MIMD, TII->get(X86::LEA64r), BReg)
.addReg(X86::RIP)
.addImm(1)
.addReg(0)
.addJumpTableIndex(MJTI)
.addReg(0);
// movzx IReg64, IReg
- BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)
+ BuildMI(DispContBB, MIMD, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)
.addImm(0)
.addReg(IReg)
.addImm(X86::sub_32bit);
@@ -38175,7 +38189,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
switch (JTE) {
case MachineJumpTableInfo::EK_BlockAddress:
// jmpq *(BReg,IReg64,8)
- BuildMI(DispContBB, DL, TII->get(X86::JMP64m))
+ BuildMI(DispContBB, MIMD, TII->get(X86::JMP64m))
.addReg(BReg)
.addImm(8)
.addReg(IReg64)
@@ -38188,20 +38202,21 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass);
// movl (BReg,IReg64,4), OReg
- BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg)
+ BuildMI(DispContBB, MIMD, TII->get(X86::MOV32rm), OReg)
.addReg(BReg)
.addImm(4)
.addReg(IReg64)
.addImm(0)
.addReg(0);
// movsx OReg64, OReg
- BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg);
+ BuildMI(DispContBB, MIMD, TII->get(X86::MOVSX64rr32), OReg64)
+ .addReg(OReg);
// addq BReg, OReg64, TReg
- BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg)
+ BuildMI(DispContBB, MIMD, TII->get(X86::ADD64rr), TReg)
.addReg(OReg64)
.addReg(BReg);
// jmpq *TReg
- BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg);
+ BuildMI(DispContBB, MIMD, TII->get(X86::JMP64r)).addReg(TReg);
break;
}
default:
@@ -38209,7 +38224,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
}
} else {
// jmpl *.LJTI0_0(,IReg,4)
- BuildMI(DispContBB, DL, TII->get(X86::JMP32m))
+ BuildMI(DispContBB, MIMD, TII->get(X86::JMP32m))
.addReg(0)
.addImm(4)
.addReg(IReg)
@@ -38281,7 +38296,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
auto TMMImmToTMMReg = [](unsigned Imm) {
assert (Imm < 8 && "Illegal tmm index");
@@ -38345,46 +38360,46 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// precision when performing the addition.
int OrigCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)),
OrigCWFrameIdx);
// Load the old value of the control word...
Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW),
OrigCWFrameIdx);
// OR 0b11 into bit 8 and 9. 0b11 is the encoding for double extended
// precision.
Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)
.addReg(OldCW, RegState::Kill)
.addImm(0x300);
// Extract to 16 bits.
Register NewCW16 =
MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)
.addReg(NewCW, RegState::Kill, X86::sub_16bit);
// Prepare memory for FLDCW.
int NewCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),
NewCWFrameIdx)
.addReg(NewCW16, RegState::Kill);
// Reload the modified control word now...
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
NewCWFrameIdx);
// Do the addition.
if (MI.getOpcode() == X86::FP80_ADDr) {
- BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80))
+ BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(2));
} else {
- BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80m32))
+ BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80m32))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
@@ -38395,7 +38410,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
// Reload the original control word now.
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
OrigCWFrameIdx);
MI.eraseFromParent(); // The pseudo instruction is gone now.
@@ -38415,34 +38430,34 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// mode when truncating to an integer value.
int OrigCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
- addFrameReference(BuildMI(*BB, MI, DL,
- TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)),
+ OrigCWFrameIdx);
// Load the old value of the control word...
Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW),
OrigCWFrameIdx);
// OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)
.addReg(OldCW, RegState::Kill).addImm(0xC00);
// Extract to 16 bits.
Register NewCW16 =
MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)
.addReg(NewCW, RegState::Kill, X86::sub_16bit);
// Prepare memory for FLDCW.
int NewCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),
NewCWFrameIdx)
.addReg(NewCW16, RegState::Kill);
// Reload the modified control word now...
- addFrameReference(BuildMI(*BB, MI, DL,
+ addFrameReference(BuildMI(*BB, MI, MIMD,
TII->get(X86::FLDCW16m)), NewCWFrameIdx);
// Get the X86 opcode to use.
@@ -38461,12 +38476,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
X86AddressMode AM = getAddressFromInstr(&MI, 0);
- addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ addFullAddress(BuildMI(*BB, MI, MIMD, TII->get(Opc)), AM)
.addReg(MI.getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
- addFrameReference(BuildMI(*BB, MI, DL,
- TII->get(X86::FLDCW16m)), OrigCWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
+ OrigCWFrameIdx);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -38551,7 +38566,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
MachineBasicBlock::iterator MBBI(RMBBI);
addFullAddress(
- BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
+ BuildMI(*BB, *MBBI, MIMD, TII->get(X86::LEA32r), computedAddrVReg), AM);
setDirectAddressInInstr(&MI, 0, computedAddrVReg);
@@ -38567,21 +38582,21 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Save RBX into a virtual register.
Register SaveRBX =
MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX)
.addReg(X86::RBX);
Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
+ BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
MIB.add(MI.getOperand(Idx));
MIB.add(MI.getOperand(X86::AddrNumOperands));
MIB.addReg(SaveRBX);
} else {
// Simple case, just copy the virtual register to RBX.
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::RBX)
.add(MI.getOperand(X86::AddrNumOperands));
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B));
+ BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B));
for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
MIB.add(MI.getOperand(Idx));
}
@@ -38595,32 +38610,32 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// If no need to save the base pointer, we generate MWAITXrrr,
// else we generate pseudo MWAITX_SAVE_RBX.
if (!IsRBX || !TRI->hasBasePointer(*MF)) {
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI.getOperand(0).getReg());
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX)
.addReg(MI.getOperand(1).getReg());
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EBX)
.addReg(MI.getOperand(2).getReg());
- BuildMI(*BB, MI, DL, TII->get(X86::MWAITXrrr));
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITXrrr));
MI.eraseFromParent();
} else {
if (!BB->isLiveIn(BasePtr)) {
BB->addLiveIn(BasePtr);
}
// Parameters can be copied into ECX and EAX but not EBX yet.
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI.getOperand(0).getReg());
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX)
.addReg(MI.getOperand(1).getReg());
assert(Subtarget.is64Bit() && "Expected 64-bit mode!");
// Save RBX into a virtual register.
Register SaveRBX =
MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX)
.addReg(X86::RBX);
// Generate mwaitx pseudo.
Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
- BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX))
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITX_SAVE_RBX))
.addDef(Dst) // Destination tied in with SaveRBX.
.addReg(MI.getOperand(2).getReg()) // input value of EBX.
.addUse(SaveRBX); // Save of base pointer.
@@ -38637,7 +38652,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
assert(StackAdjustment != 0 && "0 stack adjustment");
LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
<< StackAdjustment << "\n");
- BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::SUB32ri), X86::ESP)
.addReg(X86::ESP)
.addImm(StackAdjustment);
MI.eraseFromParent();
@@ -38652,9 +38667,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
<< ", arg offset " << ArgOffset << "\n");
// stack pointer + offset
- addRegOffset(
- BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
- X86::ESP, false, ArgOffset);
+ addRegOffset(BuildMI(*BB, MI, MIMD, TII->get(X86::LEA32r),
+ MI.getOperand(0).getReg()),
+ X86::ESP, false, ArgOffset);
MI.eraseFromParent();
return BB;
}
@@ -38675,7 +38690,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break;
}
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef);
MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
@@ -38686,7 +38701,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
case X86::PTILEZERO: {
unsigned Imm = MI.getOperand(0).getImm();
- BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
+ BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
@@ -38701,7 +38716,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED: Opc = X86::TILESTORED; break;
}
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
if (Opc != X86::TILESTORED)
MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
@@ -38722,14 +38737,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
case X86::PTCMMIMFP16PS:
case X86::PTCMMRLFP16PS: {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected instruction!");
case X86::PTCMMIMFP16PS: Opc = X86::TCMMIMFP16PS; break;
case X86::PTCMMRLFP16PS: Opc = X86::TCMMRLFP16PS; break;
}
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef);
MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
@@ -59577,12 +59592,12 @@ void X86TargetLowering::insertCopiesSplitCSR(
Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
- BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
+ BuildMI(*Entry, MBBI, MIMetadata(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
- BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+ BuildMI(*Exit, Exit->getFirstTerminator(), MIMetadata(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
@@ -59651,7 +59666,7 @@ X86TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
break;
}
- return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(X86::KCFI_CHECK))
+ return BuildMI(MBB, MBBI, MIMetadata(*MBBI), TII->get(X86::KCFI_CHECK))
.addReg(TargetReg)
.addImm(MBBI->getCFIType())
.getInstr();
diff --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll
index 8567865cbbe107..e6604c957f1fa1 100644
--- a/llvm/test/CodeGen/X86/pcsections-atomics.ll
+++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll
@@ -5,10 +5,10 @@
; access, and end with another non-atomic access; this is to test that the
; !pcsections propagation doesn't accidentally touch adjacent instructions.
;
-; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=O0
-; RUN: llc -O1 < %s | FileCheck %s --check-prefixes=O1
-; RUN: llc -O2 < %s | FileCheck %s --check-prefixes=O2
-; RUN: llc -O3 < %s | FileCheck %s --check-prefixes=O3
+; RUN: llc -O0 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O0
+; RUN: llc -O1 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O1
+; RUN: llc -O2 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O2
+; RUN: llc -O3 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O3
target triple = "x86_64-unknown-linux-gnu"
@@ -9979,4 +9979,6320 @@ else:
ret i64 2
}
+define i128 @atomic128_load_unordered(ptr %a) {
+; O0-LABEL: atomic128_load_unordered:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection414:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ebx
+; O0-NEXT: movq %rbx, %rax
+; O0-NEXT: movq %rbx, %rdx
+; O0-NEXT: movq %rbx, %rcx
+; O0-NEXT: .Lpcsection415:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo(%rip)
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_load_unordered:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection328:
+; O1-NEXT: xorl %eax, %eax
+; O1-NEXT: .Lpcsection329:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection330:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection331:
+; O1-NEXT: xorl %ebx, %ebx
+; O1-NEXT: .Lpcsection332:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_load_unordered:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection328:
+; O2-NEXT: xorl %eax, %eax
+; O2-NEXT: .Lpcsection329:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection330:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection331:
+; O2-NEXT: xorl %ebx, %ebx
+; O2-NEXT: .Lpcsection332:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_load_unordered:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection328:
+; O3-NEXT: xorl %eax, %eax
+; O3-NEXT: .Lpcsection329:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection330:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection331:
+; O3-NEXT: xorl %ebx, %ebx
+; O3-NEXT: .Lpcsection332:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = load atomic i128, ptr %a unordered, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret i128 %x
+}
+
+define i128 @atomic128_load_monotonic(ptr %a) {
+; O0-LABEL: atomic128_load_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection416:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ebx
+; O0-NEXT: movq %rbx, %rax
+; O0-NEXT: movq %rbx, %rdx
+; O0-NEXT: movq %rbx, %rcx
+; O0-NEXT: .Lpcsection417:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo(%rip)
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_load_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection333:
+; O1-NEXT: xorl %eax, %eax
+; O1-NEXT: .Lpcsection334:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection335:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection336:
+; O1-NEXT: xorl %ebx, %ebx
+; O1-NEXT: .Lpcsection337:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_load_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection333:
+; O2-NEXT: xorl %eax, %eax
+; O2-NEXT: .Lpcsection334:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection335:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection336:
+; O2-NEXT: xorl %ebx, %ebx
+; O2-NEXT: .Lpcsection337:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_load_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection333:
+; O3-NEXT: xorl %eax, %eax
+; O3-NEXT: .Lpcsection334:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection335:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection336:
+; O3-NEXT: xorl %ebx, %ebx
+; O3-NEXT: .Lpcsection337:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = load atomic i128, ptr %a monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret i128 %x
+}
+
+define i128 @atomic128_load_acquire(ptr %a) {
+; O0-LABEL: atomic128_load_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection418:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ebx
+; O0-NEXT: movq %rbx, %rax
+; O0-NEXT: movq %rbx, %rdx
+; O0-NEXT: movq %rbx, %rcx
+; O0-NEXT: .Lpcsection419:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo(%rip)
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_load_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection338:
+; O1-NEXT: xorl %eax, %eax
+; O1-NEXT: .Lpcsection339:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection340:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection341:
+; O1-NEXT: xorl %ebx, %ebx
+; O1-NEXT: .Lpcsection342:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_load_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection338:
+; O2-NEXT: xorl %eax, %eax
+; O2-NEXT: .Lpcsection339:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection340:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection341:
+; O2-NEXT: xorl %ebx, %ebx
+; O2-NEXT: .Lpcsection342:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_load_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection338:
+; O3-NEXT: xorl %eax, %eax
+; O3-NEXT: .Lpcsection339:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection340:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection341:
+; O3-NEXT: xorl %ebx, %ebx
+; O3-NEXT: .Lpcsection342:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = load atomic i128, ptr %a acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret i128 %x
+}
+
+define i128 @atomic128_load_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_load_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection420:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ebx
+; O0-NEXT: movq %rbx, %rax
+; O0-NEXT: movq %rbx, %rdx
+; O0-NEXT: movq %rbx, %rcx
+; O0-NEXT: .Lpcsection421:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo(%rip)
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_load_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection343:
+; O1-NEXT: xorl %eax, %eax
+; O1-NEXT: .Lpcsection344:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection345:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection346:
+; O1-NEXT: xorl %ebx, %ebx
+; O1-NEXT: .Lpcsection347:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_load_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection343:
+; O2-NEXT: xorl %eax, %eax
+; O2-NEXT: .Lpcsection344:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection345:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection346:
+; O2-NEXT: xorl %ebx, %ebx
+; O2-NEXT: .Lpcsection347:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_load_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection343:
+; O3-NEXT: xorl %eax, %eax
+; O3-NEXT: .Lpcsection344:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection345:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection346:
+; O3-NEXT: xorl %ebx, %ebx
+; O3-NEXT: .Lpcsection347:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = load atomic i128, ptr %a seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret i128 %x
+}
+
+define ptr @atomic128_load_seq_cst_ptr_ty(ptr %a) {
+; O0-LABEL: atomic128_load_seq_cst_ptr_ty:
+; O0: # %bb.0: # %entry
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection422:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: movq $1, foo
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_load_seq_cst_ptr_ty:
+; O1: # %bb.0: # %entry
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection348:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_load_seq_cst_ptr_ty:
+; O2: # %bb.0: # %entry
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection348:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_load_seq_cst_ptr_ty:
+; O3: # %bb.0: # %entry
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection348:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = load atomic ptr, ptr %a seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret ptr %x
+}
+
+define void @atomic128_store_unordered(ptr %a) {
+; O0-LABEL: atomic128_store_unordered:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection423:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection424:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection425:
+; O0-NEXT: jmp .LBB203_1
+; O0-NEXT: .LBB203_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection426:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection427:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection428:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection429:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection430:
+; O0-NEXT: jne .LBB203_1
+; O0-NEXT: jmp .LBB203_2
+; O0-NEXT: .LBB203_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_store_unordered:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection349:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection350:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection351:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB203_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection352:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection353:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection354:
+; O1-NEXT: jne .LBB203_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_store_unordered:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection349:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection350:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection351:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB203_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection352:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection353:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection354:
+; O2-NEXT: jne .LBB203_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_store_unordered:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection349:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection350:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection351:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB203_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection352:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection353:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection354:
+; O3-NEXT: jne .LBB203_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ store atomic i128 42, ptr %a unordered, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_store_monotonic(ptr %a) {
+; O0-LABEL: atomic128_store_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection431:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection432:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection433:
+; O0-NEXT: jmp .LBB204_1
+; O0-NEXT: .LBB204_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection434:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection435:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection436:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection437:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection438:
+; O0-NEXT: jne .LBB204_1
+; O0-NEXT: jmp .LBB204_2
+; O0-NEXT: .LBB204_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_store_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection355:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection356:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection357:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB204_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection358:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection359:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection360:
+; O1-NEXT: jne .LBB204_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_store_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection355:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection356:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection357:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB204_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection358:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection359:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection360:
+; O2-NEXT: jne .LBB204_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_store_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection355:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection356:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection357:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB204_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection358:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection359:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection360:
+; O3-NEXT: jne .LBB204_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ store atomic i128 42, ptr %a monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_store_release(ptr %a) {
+; O0-LABEL: atomic128_store_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection439:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection440:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection441:
+; O0-NEXT: jmp .LBB205_1
+; O0-NEXT: .LBB205_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection442:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection443:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection444:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection445:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection446:
+; O0-NEXT: jne .LBB205_1
+; O0-NEXT: jmp .LBB205_2
+; O0-NEXT: .LBB205_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_store_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection361:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection362:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection363:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB205_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection364:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection365:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection366:
+; O1-NEXT: jne .LBB205_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_store_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection361:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection362:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection363:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB205_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection364:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection365:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection366:
+; O2-NEXT: jne .LBB205_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_store_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection361:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection362:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection363:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB205_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection364:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection365:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection366:
+; O3-NEXT: jne .LBB205_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ store atomic i128 42, ptr %a release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_store_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_store_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection447:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection448:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection449:
+; O0-NEXT: jmp .LBB206_1
+; O0-NEXT: .LBB206_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection450:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection451:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection452:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection453:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection454:
+; O0-NEXT: jne .LBB206_1
+; O0-NEXT: jmp .LBB206_2
+; O0-NEXT: .LBB206_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_store_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection367:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection368:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection369:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB206_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection370:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection371:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection372:
+; O1-NEXT: jne .LBB206_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_store_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection367:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection368:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection369:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB206_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection370:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection371:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection372:
+; O2-NEXT: jne .LBB206_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_store_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection367:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection368:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection369:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB206_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection370:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection371:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection372:
+; O3-NEXT: jne .LBB206_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ store atomic i128 42, ptr %a seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_store_seq_cst_ptr_ty(ptr %a, ptr %v) {
+; O0-LABEL: atomic128_store_seq_cst_ptr_ty:
+; O0: # %bb.0: # %entry
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection455:
+; O0-NEXT: xchgq %rsi, (%rdi)
+; O0-NEXT: movq $1, foo
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_store_seq_cst_ptr_ty:
+; O1: # %bb.0: # %entry
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection373:
+; O1-NEXT: xchgq %rsi, (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_store_seq_cst_ptr_ty:
+; O2: # %bb.0: # %entry
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection373:
+; O2-NEXT: xchgq %rsi, (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_store_seq_cst_ptr_ty:
+; O3: # %bb.0: # %entry
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection373:
+; O3-NEXT: xchgq %rsi, (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ store atomic ptr %v, ptr %a seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xchg_monotonic(ptr %a) {
+; O0-LABEL: atomic128_xchg_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection456:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection457:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection458:
+; O0-NEXT: jmp .LBB208_1
+; O0-NEXT: .LBB208_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection459:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection460:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection461:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection462:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection463:
+; O0-NEXT: jne .LBB208_1
+; O0-NEXT: jmp .LBB208_2
+; O0-NEXT: .LBB208_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xchg_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection374:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection375:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection376:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB208_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection377:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection378:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection379:
+; O1-NEXT: jne .LBB208_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xchg_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection374:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection375:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection376:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB208_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection377:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection378:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection379:
+; O2-NEXT: jne .LBB208_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xchg_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection374:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection375:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection376:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB208_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection377:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection378:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection379:
+; O3-NEXT: jne .LBB208_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xchg ptr %a, i128 42 monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_add_monotonic(ptr %a) {
+; O0-LABEL: atomic128_add_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection464:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection465:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection466:
+; O0-NEXT: jmp .LBB209_1
+; O0-NEXT: .LBB209_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection467:
+; O0-NEXT: addq $42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection468:
+; O0-NEXT: adcq $0, %rcx
+; O0-NEXT: .Lpcsection469:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection470:
+; O0-NEXT: jne .LBB209_1
+; O0-NEXT: jmp .LBB209_2
+; O0-NEXT: .LBB209_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_add_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection380:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection381:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB209_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection382:
+; O1-NEXT: addq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection383:
+; O1-NEXT: adcq $0, %rcx
+; O1-NEXT: .Lpcsection384:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection385:
+; O1-NEXT: jne .LBB209_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_add_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection380:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection381:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB209_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection382:
+; O2-NEXT: addq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection383:
+; O2-NEXT: adcq $0, %rcx
+; O2-NEXT: .Lpcsection384:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection385:
+; O2-NEXT: jne .LBB209_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_add_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection380:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection381:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB209_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection382:
+; O3-NEXT: addq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection383:
+; O3-NEXT: adcq $0, %rcx
+; O3-NEXT: .Lpcsection384:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection385:
+; O3-NEXT: jne .LBB209_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw add ptr %a, i128 42 monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_sub_monotonic(ptr %a) {
+; O0-LABEL: atomic128_sub_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection471:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection472:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection473:
+; O0-NEXT: jmp .LBB210_1
+; O0-NEXT: .LBB210_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection474:
+; O0-NEXT: addq $-42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection475:
+; O0-NEXT: adcq $-1, %rcx
+; O0-NEXT: .Lpcsection476:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection477:
+; O0-NEXT: jne .LBB210_1
+; O0-NEXT: jmp .LBB210_2
+; O0-NEXT: .LBB210_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_sub_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection386:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection387:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB210_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection388:
+; O1-NEXT: addq $-42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection389:
+; O1-NEXT: adcq $-1, %rcx
+; O1-NEXT: .Lpcsection390:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection391:
+; O1-NEXT: jne .LBB210_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_sub_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection386:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection387:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB210_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection388:
+; O2-NEXT: addq $-42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection389:
+; O2-NEXT: adcq $-1, %rcx
+; O2-NEXT: .Lpcsection390:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection391:
+; O2-NEXT: jne .LBB210_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_sub_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection386:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection387:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB210_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection388:
+; O3-NEXT: addq $-42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection389:
+; O3-NEXT: adcq $-1, %rcx
+; O3-NEXT: .Lpcsection390:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection391:
+; O3-NEXT: jne .LBB210_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw sub ptr %a, i128 42 monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_and_monotonic(ptr %a) {
+; O0-LABEL: atomic128_and_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection478:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection479:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection480:
+; O0-NEXT: jmp .LBB211_1
+; O0-NEXT: .LBB211_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection481:
+; O0-NEXT: andl $42, %ecx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection482:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection483:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection484:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection485:
+; O0-NEXT: jne .LBB211_1
+; O0-NEXT: jmp .LBB211_2
+; O0-NEXT: .LBB211_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_and_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection392:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection393:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB211_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection394:
+; O1-NEXT: andl $42, %ebx
+; O1-NEXT: .Lpcsection395:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection396:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection397:
+; O1-NEXT: jne .LBB211_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_and_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection392:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection393:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB211_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection394:
+; O2-NEXT: andl $42, %ebx
+; O2-NEXT: .Lpcsection395:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection396:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection397:
+; O2-NEXT: jne .LBB211_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_and_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection392:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection393:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB211_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection394:
+; O3-NEXT: andl $42, %ebx
+; O3-NEXT: .Lpcsection395:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection396:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection397:
+; O3-NEXT: jne .LBB211_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw and ptr %a, i128 42 monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_or_monotonic(ptr %a) {
+; O0-LABEL: atomic128_or_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection486:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection487:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection488:
+; O0-NEXT: jmp .LBB212_1
+; O0-NEXT: .LBB212_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection489:
+; O0-NEXT: orq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection490:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection491:
+; O0-NEXT: jne .LBB212_1
+; O0-NEXT: jmp .LBB212_2
+; O0-NEXT: .LBB212_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_or_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection398:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection399:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB212_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection400:
+; O1-NEXT: orq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection401:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection402:
+; O1-NEXT: jne .LBB212_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_or_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection398:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection399:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB212_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection400:
+; O2-NEXT: orq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection401:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection402:
+; O2-NEXT: jne .LBB212_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_or_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection398:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection399:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB212_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection400:
+; O3-NEXT: orq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection401:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection402:
+; O3-NEXT: jne .LBB212_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw or ptr %a, i128 42 monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xor_monotonic(ptr %a) {
+; O0-LABEL: atomic128_xor_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection492:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection493:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection494:
+; O0-NEXT: jmp .LBB213_1
+; O0-NEXT: .LBB213_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection495:
+; O0-NEXT: xorq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection496:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection497:
+; O0-NEXT: jne .LBB213_1
+; O0-NEXT: jmp .LBB213_2
+; O0-NEXT: .LBB213_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xor_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection403:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection404:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB213_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection405:
+; O1-NEXT: xorq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection406:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection407:
+; O1-NEXT: jne .LBB213_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xor_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection403:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection404:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB213_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection405:
+; O2-NEXT: xorq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection406:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection407:
+; O2-NEXT: jne .LBB213_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xor_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection403:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection404:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB213_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection405:
+; O3-NEXT: xorq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection406:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection407:
+; O3-NEXT: jne .LBB213_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xor ptr %a, i128 42 monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_nand_monotonic(ptr %a) {
+; O0-LABEL: atomic128_nand_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection498:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection499:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection500:
+; O0-NEXT: jmp .LBB214_1
+; O0-NEXT: .LBB214_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection501:
+; O0-NEXT: notl %ecx
+; O0-NEXT: .Lpcsection502:
+; O0-NEXT: # implicit-def: $rbx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection503:
+; O0-NEXT: orq $-43, %rbx
+; O0-NEXT: .Lpcsection504:
+; O0-NEXT: movq $-1, %rcx
+; O0-NEXT: .Lpcsection505:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection506:
+; O0-NEXT: jne .LBB214_1
+; O0-NEXT: jmp .LBB214_2
+; O0-NEXT: .LBB214_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_nand_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection408:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection409:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection410:
+; O1-NEXT: movq $-1, %rcx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB214_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection411:
+; O1-NEXT: notl %ebx
+; O1-NEXT: .Lpcsection412:
+; O1-NEXT: orq $-43, %rbx
+; O1-NEXT: .Lpcsection413:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection414:
+; O1-NEXT: jne .LBB214_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_nand_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection408:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection409:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection410:
+; O2-NEXT: movq $-1, %rcx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB214_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection411:
+; O2-NEXT: notl %ebx
+; O2-NEXT: .Lpcsection412:
+; O2-NEXT: orq $-43, %rbx
+; O2-NEXT: .Lpcsection413:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection414:
+; O2-NEXT: jne .LBB214_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_nand_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection408:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection409:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection410:
+; O3-NEXT: movq $-1, %rcx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB214_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection411:
+; O3-NEXT: notl %ebx
+; O3-NEXT: .Lpcsection412:
+; O3-NEXT: orq $-43, %rbx
+; O3-NEXT: .Lpcsection413:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection414:
+; O3-NEXT: jne .LBB214_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw nand ptr %a, i128 42 monotonic, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xchg_acquire(ptr %a) {
+; O0-LABEL: atomic128_xchg_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection507:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection508:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection509:
+; O0-NEXT: jmp .LBB215_1
+; O0-NEXT: .LBB215_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection510:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection511:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection512:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection513:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection514:
+; O0-NEXT: jne .LBB215_1
+; O0-NEXT: jmp .LBB215_2
+; O0-NEXT: .LBB215_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xchg_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection415:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection416:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection417:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB215_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection418:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection419:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection420:
+; O1-NEXT: jne .LBB215_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xchg_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection415:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection416:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection417:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB215_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection418:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection419:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection420:
+; O2-NEXT: jne .LBB215_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xchg_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection415:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection416:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection417:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB215_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection418:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection419:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection420:
+; O3-NEXT: jne .LBB215_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xchg ptr %a, i128 42 acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_add_acquire(ptr %a) {
+; O0-LABEL: atomic128_add_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection515:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection516:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection517:
+; O0-NEXT: jmp .LBB216_1
+; O0-NEXT: .LBB216_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection518:
+; O0-NEXT: addq $42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection519:
+; O0-NEXT: adcq $0, %rcx
+; O0-NEXT: .Lpcsection520:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection521:
+; O0-NEXT: jne .LBB216_1
+; O0-NEXT: jmp .LBB216_2
+; O0-NEXT: .LBB216_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_add_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection421:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection422:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB216_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection423:
+; O1-NEXT: addq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection424:
+; O1-NEXT: adcq $0, %rcx
+; O1-NEXT: .Lpcsection425:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection426:
+; O1-NEXT: jne .LBB216_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_add_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection421:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection422:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB216_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection423:
+; O2-NEXT: addq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection424:
+; O2-NEXT: adcq $0, %rcx
+; O2-NEXT: .Lpcsection425:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection426:
+; O2-NEXT: jne .LBB216_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_add_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection421:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection422:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB216_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection423:
+; O3-NEXT: addq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection424:
+; O3-NEXT: adcq $0, %rcx
+; O3-NEXT: .Lpcsection425:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection426:
+; O3-NEXT: jne .LBB216_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw add ptr %a, i128 42 acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_sub_acquire(ptr %a) {
+; O0-LABEL: atomic128_sub_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection522:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection523:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection524:
+; O0-NEXT: jmp .LBB217_1
+; O0-NEXT: .LBB217_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection525:
+; O0-NEXT: addq $-42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection526:
+; O0-NEXT: adcq $-1, %rcx
+; O0-NEXT: .Lpcsection527:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection528:
+; O0-NEXT: jne .LBB217_1
+; O0-NEXT: jmp .LBB217_2
+; O0-NEXT: .LBB217_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_sub_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection427:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection428:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB217_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection429:
+; O1-NEXT: addq $-42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection430:
+; O1-NEXT: adcq $-1, %rcx
+; O1-NEXT: .Lpcsection431:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection432:
+; O1-NEXT: jne .LBB217_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_sub_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection427:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection428:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB217_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection429:
+; O2-NEXT: addq $-42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection430:
+; O2-NEXT: adcq $-1, %rcx
+; O2-NEXT: .Lpcsection431:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection432:
+; O2-NEXT: jne .LBB217_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_sub_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection427:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection428:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB217_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection429:
+; O3-NEXT: addq $-42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection430:
+; O3-NEXT: adcq $-1, %rcx
+; O3-NEXT: .Lpcsection431:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection432:
+; O3-NEXT: jne .LBB217_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw sub ptr %a, i128 42 acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_and_acquire(ptr %a) {
+; O0-LABEL: atomic128_and_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection529:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection530:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection531:
+; O0-NEXT: jmp .LBB218_1
+; O0-NEXT: .LBB218_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection532:
+; O0-NEXT: andl $42, %ecx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection533:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection534:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection535:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection536:
+; O0-NEXT: jne .LBB218_1
+; O0-NEXT: jmp .LBB218_2
+; O0-NEXT: .LBB218_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_and_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection433:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection434:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB218_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection435:
+; O1-NEXT: andl $42, %ebx
+; O1-NEXT: .Lpcsection436:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection437:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection438:
+; O1-NEXT: jne .LBB218_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_and_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection433:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection434:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB218_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection435:
+; O2-NEXT: andl $42, %ebx
+; O2-NEXT: .Lpcsection436:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection437:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection438:
+; O2-NEXT: jne .LBB218_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_and_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection433:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection434:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB218_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection435:
+; O3-NEXT: andl $42, %ebx
+; O3-NEXT: .Lpcsection436:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection437:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection438:
+; O3-NEXT: jne .LBB218_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw and ptr %a, i128 42 acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_or_acquire(ptr %a) {
+; O0-LABEL: atomic128_or_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection537:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection538:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection539:
+; O0-NEXT: jmp .LBB219_1
+; O0-NEXT: .LBB219_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection540:
+; O0-NEXT: orq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection541:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection542:
+; O0-NEXT: jne .LBB219_1
+; O0-NEXT: jmp .LBB219_2
+; O0-NEXT: .LBB219_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_or_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection439:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection440:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB219_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection441:
+; O1-NEXT: orq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection442:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection443:
+; O1-NEXT: jne .LBB219_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_or_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection439:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection440:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB219_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection441:
+; O2-NEXT: orq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection442:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection443:
+; O2-NEXT: jne .LBB219_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_or_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection439:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection440:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB219_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection441:
+; O3-NEXT: orq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection442:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection443:
+; O3-NEXT: jne .LBB219_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw or ptr %a, i128 42 acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xor_acquire(ptr %a) {
+; O0-LABEL: atomic128_xor_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection543:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection544:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection545:
+; O0-NEXT: jmp .LBB220_1
+; O0-NEXT: .LBB220_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection546:
+; O0-NEXT: xorq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection547:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection548:
+; O0-NEXT: jne .LBB220_1
+; O0-NEXT: jmp .LBB220_2
+; O0-NEXT: .LBB220_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xor_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection444:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection445:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB220_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection446:
+; O1-NEXT: xorq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection447:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection448:
+; O1-NEXT: jne .LBB220_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xor_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection444:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection445:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB220_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection446:
+; O2-NEXT: xorq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection447:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection448:
+; O2-NEXT: jne .LBB220_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xor_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection444:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection445:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB220_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection446:
+; O3-NEXT: xorq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection447:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection448:
+; O3-NEXT: jne .LBB220_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xor ptr %a, i128 42 acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_nand_acquire(ptr %a) {
+; O0-LABEL: atomic128_nand_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection549:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection550:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection551:
+; O0-NEXT: jmp .LBB221_1
+; O0-NEXT: .LBB221_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection552:
+; O0-NEXT: notl %ecx
+; O0-NEXT: .Lpcsection553:
+; O0-NEXT: # implicit-def: $rbx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection554:
+; O0-NEXT: orq $-43, %rbx
+; O0-NEXT: .Lpcsection555:
+; O0-NEXT: movq $-1, %rcx
+; O0-NEXT: .Lpcsection556:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection557:
+; O0-NEXT: jne .LBB221_1
+; O0-NEXT: jmp .LBB221_2
+; O0-NEXT: .LBB221_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_nand_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection449:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection450:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection451:
+; O1-NEXT: movq $-1, %rcx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB221_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection452:
+; O1-NEXT: notl %ebx
+; O1-NEXT: .Lpcsection453:
+; O1-NEXT: orq $-43, %rbx
+; O1-NEXT: .Lpcsection454:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection455:
+; O1-NEXT: jne .LBB221_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_nand_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection449:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection450:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection451:
+; O2-NEXT: movq $-1, %rcx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB221_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection452:
+; O2-NEXT: notl %ebx
+; O2-NEXT: .Lpcsection453:
+; O2-NEXT: orq $-43, %rbx
+; O2-NEXT: .Lpcsection454:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection455:
+; O2-NEXT: jne .LBB221_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_nand_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection449:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection450:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection451:
+; O3-NEXT: movq $-1, %rcx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB221_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection452:
+; O3-NEXT: notl %ebx
+; O3-NEXT: .Lpcsection453:
+; O3-NEXT: orq $-43, %rbx
+; O3-NEXT: .Lpcsection454:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection455:
+; O3-NEXT: jne .LBB221_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw nand ptr %a, i128 42 acquire, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xchg_release(ptr %a) {
+; O0-LABEL: atomic128_xchg_release:
+; O0: # %bb.0:
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection558:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection559:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection560:
+; O0-NEXT: jmp .LBB222_1
+; O0-NEXT: .LBB222_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection561:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection562:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection563:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection564:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection565:
+; O0-NEXT: jne .LBB222_1
+; O0-NEXT: jmp .LBB222_2
+; O0-NEXT: .LBB222_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xchg_release:
+; O1: # %bb.0:
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection456:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection457:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection458:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB222_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection459:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection460:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection461:
+; O1-NEXT: jne .LBB222_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xchg_release:
+; O2: # %bb.0:
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection456:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection457:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection458:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB222_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection459:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection460:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection461:
+; O2-NEXT: jne .LBB222_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xchg_release:
+; O3: # %bb.0:
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection456:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection457:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection458:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB222_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection459:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection460:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection461:
+; O3-NEXT: jne .LBB222_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xchg ptr %a, i128 42 release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_add_release(ptr %a) {
+; O0-LABEL: atomic128_add_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection566:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection567:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection568:
+; O0-NEXT: jmp .LBB223_1
+; O0-NEXT: .LBB223_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection569:
+; O0-NEXT: addq $42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection570:
+; O0-NEXT: adcq $0, %rcx
+; O0-NEXT: .Lpcsection571:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection572:
+; O0-NEXT: jne .LBB223_1
+; O0-NEXT: jmp .LBB223_2
+; O0-NEXT: .LBB223_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_add_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection462:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection463:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB223_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection464:
+; O1-NEXT: addq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection465:
+; O1-NEXT: adcq $0, %rcx
+; O1-NEXT: .Lpcsection466:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection467:
+; O1-NEXT: jne .LBB223_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_add_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection462:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection463:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB223_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection464:
+; O2-NEXT: addq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection465:
+; O2-NEXT: adcq $0, %rcx
+; O2-NEXT: .Lpcsection466:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection467:
+; O2-NEXT: jne .LBB223_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_add_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection462:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection463:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB223_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection464:
+; O3-NEXT: addq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection465:
+; O3-NEXT: adcq $0, %rcx
+; O3-NEXT: .Lpcsection466:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection467:
+; O3-NEXT: jne .LBB223_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw add ptr %a, i128 42 release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_sub_release(ptr %a) {
+; O0-LABEL: atomic128_sub_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection573:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection574:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection575:
+; O0-NEXT: jmp .LBB224_1
+; O0-NEXT: .LBB224_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection576:
+; O0-NEXT: addq $-42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection577:
+; O0-NEXT: adcq $-1, %rcx
+; O0-NEXT: .Lpcsection578:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection579:
+; O0-NEXT: jne .LBB224_1
+; O0-NEXT: jmp .LBB224_2
+; O0-NEXT: .LBB224_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_sub_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection468:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection469:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB224_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection470:
+; O1-NEXT: addq $-42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection471:
+; O1-NEXT: adcq $-1, %rcx
+; O1-NEXT: .Lpcsection472:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection473:
+; O1-NEXT: jne .LBB224_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_sub_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection468:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection469:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB224_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection470:
+; O2-NEXT: addq $-42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection471:
+; O2-NEXT: adcq $-1, %rcx
+; O2-NEXT: .Lpcsection472:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection473:
+; O2-NEXT: jne .LBB224_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_sub_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection468:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection469:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB224_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection470:
+; O3-NEXT: addq $-42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection471:
+; O3-NEXT: adcq $-1, %rcx
+; O3-NEXT: .Lpcsection472:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection473:
+; O3-NEXT: jne .LBB224_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw sub ptr %a, i128 42 release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_and_release(ptr %a) {
+; O0-LABEL: atomic128_and_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection580:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection581:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection582:
+; O0-NEXT: jmp .LBB225_1
+; O0-NEXT: .LBB225_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection583:
+; O0-NEXT: andl $42, %ecx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection584:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection585:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection586:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection587:
+; O0-NEXT: jne .LBB225_1
+; O0-NEXT: jmp .LBB225_2
+; O0-NEXT: .LBB225_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_and_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection474:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection475:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB225_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection476:
+; O1-NEXT: andl $42, %ebx
+; O1-NEXT: .Lpcsection477:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection478:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection479:
+; O1-NEXT: jne .LBB225_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_and_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection474:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection475:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB225_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection476:
+; O2-NEXT: andl $42, %ebx
+; O2-NEXT: .Lpcsection477:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection478:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection479:
+; O2-NEXT: jne .LBB225_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_and_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection474:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection475:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB225_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection476:
+; O3-NEXT: andl $42, %ebx
+; O3-NEXT: .Lpcsection477:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection478:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection479:
+; O3-NEXT: jne .LBB225_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw and ptr %a, i128 42 release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_or_release(ptr %a) {
+; O0-LABEL: atomic128_or_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection588:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection589:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection590:
+; O0-NEXT: jmp .LBB226_1
+; O0-NEXT: .LBB226_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection591:
+; O0-NEXT: orq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection592:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection593:
+; O0-NEXT: jne .LBB226_1
+; O0-NEXT: jmp .LBB226_2
+; O0-NEXT: .LBB226_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_or_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection480:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection481:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB226_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection482:
+; O1-NEXT: orq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection483:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection484:
+; O1-NEXT: jne .LBB226_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_or_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection480:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection481:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB226_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection482:
+; O2-NEXT: orq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection483:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection484:
+; O2-NEXT: jne .LBB226_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_or_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection480:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection481:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB226_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection482:
+; O3-NEXT: orq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection483:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection484:
+; O3-NEXT: jne .LBB226_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw or ptr %a, i128 42 release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xor_release(ptr %a) {
+; O0-LABEL: atomic128_xor_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection594:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection595:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection596:
+; O0-NEXT: jmp .LBB227_1
+; O0-NEXT: .LBB227_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection597:
+; O0-NEXT: xorq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection598:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection599:
+; O0-NEXT: jne .LBB227_1
+; O0-NEXT: jmp .LBB227_2
+; O0-NEXT: .LBB227_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xor_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection485:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection486:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB227_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection487:
+; O1-NEXT: xorq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection488:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection489:
+; O1-NEXT: jne .LBB227_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xor_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection485:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection486:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB227_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection487:
+; O2-NEXT: xorq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection488:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection489:
+; O2-NEXT: jne .LBB227_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xor_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection485:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection486:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB227_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection487:
+; O3-NEXT: xorq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection488:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection489:
+; O3-NEXT: jne .LBB227_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xor ptr %a, i128 42 release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_nand_release(ptr %a) {
+; O0-LABEL: atomic128_nand_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection600:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection601:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection602:
+; O0-NEXT: jmp .LBB228_1
+; O0-NEXT: .LBB228_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection603:
+; O0-NEXT: notl %ecx
+; O0-NEXT: .Lpcsection604:
+; O0-NEXT: # implicit-def: $rbx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection605:
+; O0-NEXT: orq $-43, %rbx
+; O0-NEXT: .Lpcsection606:
+; O0-NEXT: movq $-1, %rcx
+; O0-NEXT: .Lpcsection607:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection608:
+; O0-NEXT: jne .LBB228_1
+; O0-NEXT: jmp .LBB228_2
+; O0-NEXT: .LBB228_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_nand_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection490:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection491:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection492:
+; O1-NEXT: movq $-1, %rcx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB228_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection493:
+; O1-NEXT: notl %ebx
+; O1-NEXT: .Lpcsection494:
+; O1-NEXT: orq $-43, %rbx
+; O1-NEXT: .Lpcsection495:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection496:
+; O1-NEXT: jne .LBB228_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_nand_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection490:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection491:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection492:
+; O2-NEXT: movq $-1, %rcx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB228_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection493:
+; O2-NEXT: notl %ebx
+; O2-NEXT: .Lpcsection494:
+; O2-NEXT: orq $-43, %rbx
+; O2-NEXT: .Lpcsection495:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection496:
+; O2-NEXT: jne .LBB228_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_nand_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection490:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection491:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection492:
+; O3-NEXT: movq $-1, %rcx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB228_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection493:
+; O3-NEXT: notl %ebx
+; O3-NEXT: .Lpcsection494:
+; O3-NEXT: orq $-43, %rbx
+; O3-NEXT: .Lpcsection495:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection496:
+; O3-NEXT: jne .LBB228_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw nand ptr %a, i128 42 release, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xchg_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_xchg_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection609:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection610:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection611:
+; O0-NEXT: jmp .LBB229_1
+; O0-NEXT: .LBB229_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection612:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection613:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection614:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection615:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection616:
+; O0-NEXT: jne .LBB229_1
+; O0-NEXT: jmp .LBB229_2
+; O0-NEXT: .LBB229_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xchg_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection497:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection498:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection499:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB229_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection500:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection501:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection502:
+; O1-NEXT: jne .LBB229_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xchg_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection497:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection498:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection499:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB229_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection500:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection501:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection502:
+; O2-NEXT: jne .LBB229_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xchg_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection497:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection498:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection499:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB229_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection500:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection501:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection502:
+; O3-NEXT: jne .LBB229_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xchg ptr %a, i128 42 acq_rel, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_add_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_add_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection617:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection618:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection619:
+; O0-NEXT: jmp .LBB230_1
+; O0-NEXT: .LBB230_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection620:
+; O0-NEXT: addq $42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection621:
+; O0-NEXT: adcq $0, %rcx
+; O0-NEXT: .Lpcsection622:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection623:
+; O0-NEXT: jne .LBB230_1
+; O0-NEXT: jmp .LBB230_2
+; O0-NEXT: .LBB230_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_add_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection503:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection504:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB230_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection505:
+; O1-NEXT: addq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection506:
+; O1-NEXT: adcq $0, %rcx
+; O1-NEXT: .Lpcsection507:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection508:
+; O1-NEXT: jne .LBB230_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_add_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection503:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection504:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB230_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection505:
+; O2-NEXT: addq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection506:
+; O2-NEXT: adcq $0, %rcx
+; O2-NEXT: .Lpcsection507:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection508:
+; O2-NEXT: jne .LBB230_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_add_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection503:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection504:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB230_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection505:
+; O3-NEXT: addq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection506:
+; O3-NEXT: adcq $0, %rcx
+; O3-NEXT: .Lpcsection507:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection508:
+; O3-NEXT: jne .LBB230_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw add ptr %a, i128 42 acq_rel, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_sub_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_sub_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection624:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection625:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection626:
+; O0-NEXT: jmp .LBB231_1
+; O0-NEXT: .LBB231_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection627:
+; O0-NEXT: addq $-42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection628:
+; O0-NEXT: adcq $-1, %rcx
+; O0-NEXT: .Lpcsection629:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection630:
+; O0-NEXT: jne .LBB231_1
+; O0-NEXT: jmp .LBB231_2
+; O0-NEXT: .LBB231_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_sub_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection509:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection510:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB231_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection511:
+; O1-NEXT: addq $-42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection512:
+; O1-NEXT: adcq $-1, %rcx
+; O1-NEXT: .Lpcsection513:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection514:
+; O1-NEXT: jne .LBB231_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_sub_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection509:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection510:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB231_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection511:
+; O2-NEXT: addq $-42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection512:
+; O2-NEXT: adcq $-1, %rcx
+; O2-NEXT: .Lpcsection513:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection514:
+; O2-NEXT: jne .LBB231_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_sub_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection509:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection510:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB231_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection511:
+; O3-NEXT: addq $-42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection512:
+; O3-NEXT: adcq $-1, %rcx
+; O3-NEXT: .Lpcsection513:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection514:
+; O3-NEXT: jne .LBB231_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw sub ptr %a, i128 42 acq_rel, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_and_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_and_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection631:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection632:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection633:
+; O0-NEXT: jmp .LBB232_1
+; O0-NEXT: .LBB232_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection634:
+; O0-NEXT: andl $42, %ecx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection635:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection636:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection637:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection638:
+; O0-NEXT: jne .LBB232_1
+; O0-NEXT: jmp .LBB232_2
+; O0-NEXT: .LBB232_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_and_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection515:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection516:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB232_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection517:
+; O1-NEXT: andl $42, %ebx
+; O1-NEXT: .Lpcsection518:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection519:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection520:
+; O1-NEXT: jne .LBB232_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_and_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection515:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection516:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB232_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection517:
+; O2-NEXT: andl $42, %ebx
+; O2-NEXT: .Lpcsection518:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection519:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection520:
+; O2-NEXT: jne .LBB232_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_and_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection515:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection516:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB232_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection517:
+; O3-NEXT: andl $42, %ebx
+; O3-NEXT: .Lpcsection518:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection519:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection520:
+; O3-NEXT: jne .LBB232_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw and ptr %a, i128 42 acq_rel, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_or_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_or_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection639:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection640:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection641:
+; O0-NEXT: jmp .LBB233_1
+; O0-NEXT: .LBB233_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection642:
+; O0-NEXT: orq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection643:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection644:
+; O0-NEXT: jne .LBB233_1
+; O0-NEXT: jmp .LBB233_2
+; O0-NEXT: .LBB233_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_or_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection521:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection522:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB233_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection523:
+; O1-NEXT: orq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection524:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection525:
+; O1-NEXT: jne .LBB233_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_or_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection521:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection522:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB233_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection523:
+; O2-NEXT: orq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection524:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection525:
+; O2-NEXT: jne .LBB233_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_or_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection521:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection522:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB233_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection523:
+; O3-NEXT: orq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection524:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection525:
+; O3-NEXT: jne .LBB233_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw or ptr %a, i128 42 acq_rel, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xor_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_xor_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection645:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection646:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection647:
+; O0-NEXT: jmp .LBB234_1
+; O0-NEXT: .LBB234_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection648:
+; O0-NEXT: xorq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection649:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection650:
+; O0-NEXT: jne .LBB234_1
+; O0-NEXT: jmp .LBB234_2
+; O0-NEXT: .LBB234_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xor_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection526:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection527:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB234_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection528:
+; O1-NEXT: xorq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection529:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection530:
+; O1-NEXT: jne .LBB234_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xor_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection526:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection527:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB234_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection528:
+; O2-NEXT: xorq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection529:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection530:
+; O2-NEXT: jne .LBB234_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xor_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection526:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection527:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB234_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection528:
+; O3-NEXT: xorq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection529:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection530:
+; O3-NEXT: jne .LBB234_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xor ptr %a, i128 42 acq_rel, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_nand_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_nand_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection651:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection652:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection653:
+; O0-NEXT: jmp .LBB235_1
+; O0-NEXT: .LBB235_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection654:
+; O0-NEXT: notl %ecx
+; O0-NEXT: .Lpcsection655:
+; O0-NEXT: # implicit-def: $rbx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection656:
+; O0-NEXT: orq $-43, %rbx
+; O0-NEXT: .Lpcsection657:
+; O0-NEXT: movq $-1, %rcx
+; O0-NEXT: .Lpcsection658:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection659:
+; O0-NEXT: jne .LBB235_1
+; O0-NEXT: jmp .LBB235_2
+; O0-NEXT: .LBB235_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_nand_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection531:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection532:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection533:
+; O1-NEXT: movq $-1, %rcx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB235_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection534:
+; O1-NEXT: notl %ebx
+; O1-NEXT: .Lpcsection535:
+; O1-NEXT: orq $-43, %rbx
+; O1-NEXT: .Lpcsection536:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection537:
+; O1-NEXT: jne .LBB235_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_nand_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection531:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection532:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection533:
+; O2-NEXT: movq $-1, %rcx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB235_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection534:
+; O2-NEXT: notl %ebx
+; O2-NEXT: .Lpcsection535:
+; O2-NEXT: orq $-43, %rbx
+; O2-NEXT: .Lpcsection536:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection537:
+; O2-NEXT: jne .LBB235_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_nand_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection531:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection532:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection533:
+; O3-NEXT: movq $-1, %rcx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB235_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection534:
+; O3-NEXT: notl %ebx
+; O3-NEXT: .Lpcsection535:
+; O3-NEXT: orq $-43, %rbx
+; O3-NEXT: .Lpcsection536:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection537:
+; O3-NEXT: jne .LBB235_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw nand ptr %a, i128 42 acq_rel, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xchg_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_xchg_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection660:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection661:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection662:
+; O0-NEXT: jmp .LBB236_1
+; O0-NEXT: .LBB236_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: .Lpcsection663:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection664:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection665:
+; O0-NEXT: movl $42, %ebx
+; O0-NEXT: .Lpcsection666:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection667:
+; O0-NEXT: jne .LBB236_1
+; O0-NEXT: jmp .LBB236_2
+; O0-NEXT: .LBB236_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xchg_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection538:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection539:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection540:
+; O1-NEXT: movl $42, %ebx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB236_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: .Lpcsection541:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection542:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection543:
+; O1-NEXT: jne .LBB236_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xchg_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection538:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection539:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection540:
+; O2-NEXT: movl $42, %ebx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB236_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: .Lpcsection541:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection542:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection543:
+; O2-NEXT: jne .LBB236_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xchg_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection538:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection539:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection540:
+; O3-NEXT: movl $42, %ebx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB236_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: .Lpcsection541:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection542:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection543:
+; O3-NEXT: jne .LBB236_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xchg ptr %a, i128 42 seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_add_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_add_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection668:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection669:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection670:
+; O0-NEXT: jmp .LBB237_1
+; O0-NEXT: .LBB237_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection671:
+; O0-NEXT: addq $42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection672:
+; O0-NEXT: adcq $0, %rcx
+; O0-NEXT: .Lpcsection673:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection674:
+; O0-NEXT: jne .LBB237_1
+; O0-NEXT: jmp .LBB237_2
+; O0-NEXT: .LBB237_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_add_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection544:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection545:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB237_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection546:
+; O1-NEXT: addq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection547:
+; O1-NEXT: adcq $0, %rcx
+; O1-NEXT: .Lpcsection548:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection549:
+; O1-NEXT: jne .LBB237_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_add_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection544:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection545:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB237_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection546:
+; O2-NEXT: addq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection547:
+; O2-NEXT: adcq $0, %rcx
+; O2-NEXT: .Lpcsection548:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection549:
+; O2-NEXT: jne .LBB237_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_add_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection544:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection545:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB237_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection546:
+; O3-NEXT: addq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection547:
+; O3-NEXT: adcq $0, %rcx
+; O3-NEXT: .Lpcsection548:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection549:
+; O3-NEXT: jne .LBB237_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw add ptr %a, i128 42 seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_sub_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_sub_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection675:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection676:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection677:
+; O0-NEXT: jmp .LBB238_1
+; O0-NEXT: .LBB238_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection678:
+; O0-NEXT: addq $-42, %rbx
+; O0-NEXT: movq %rdx, %rcx
+; O0-NEXT: .Lpcsection679:
+; O0-NEXT: adcq $-1, %rcx
+; O0-NEXT: .Lpcsection680:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection681:
+; O0-NEXT: jne .LBB238_1
+; O0-NEXT: jmp .LBB238_2
+; O0-NEXT: .LBB238_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_sub_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection550:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection551:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB238_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection552:
+; O1-NEXT: addq $-42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection553:
+; O1-NEXT: adcq $-1, %rcx
+; O1-NEXT: .Lpcsection554:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection555:
+; O1-NEXT: jne .LBB238_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_sub_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection550:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection551:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB238_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection552:
+; O2-NEXT: addq $-42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection553:
+; O2-NEXT: adcq $-1, %rcx
+; O2-NEXT: .Lpcsection554:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection555:
+; O2-NEXT: jne .LBB238_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_sub_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection550:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection551:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB238_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection552:
+; O3-NEXT: addq $-42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection553:
+; O3-NEXT: adcq $-1, %rcx
+; O3-NEXT: .Lpcsection554:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection555:
+; O3-NEXT: jne .LBB238_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw sub ptr %a, i128 42 seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_and_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_and_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection682:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection683:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection684:
+; O0-NEXT: jmp .LBB239_1
+; O0-NEXT: .LBB239_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection685:
+; O0-NEXT: andl $42, %ecx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection686:
+; O0-NEXT: xorl %ecx, %ecx
+; O0-NEXT: .Lpcsection687:
+; O0-NEXT: # kill: def $rcx killed $ecx
+; O0-NEXT: .Lpcsection688:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection689:
+; O0-NEXT: jne .LBB239_1
+; O0-NEXT: jmp .LBB239_2
+; O0-NEXT: .LBB239_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_and_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection556:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection557:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB239_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection558:
+; O1-NEXT: andl $42, %ebx
+; O1-NEXT: .Lpcsection559:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection560:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection561:
+; O1-NEXT: jne .LBB239_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_and_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection556:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection557:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB239_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection558:
+; O2-NEXT: andl $42, %ebx
+; O2-NEXT: .Lpcsection559:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection560:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection561:
+; O2-NEXT: jne .LBB239_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_and_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection556:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection557:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB239_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection558:
+; O3-NEXT: andl $42, %ebx
+; O3-NEXT: .Lpcsection559:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection560:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection561:
+; O3-NEXT: jne .LBB239_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw and ptr %a, i128 42 seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_or_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_or_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection690:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection691:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection692:
+; O0-NEXT: jmp .LBB240_1
+; O0-NEXT: .LBB240_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection693:
+; O0-NEXT: orq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection694:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection695:
+; O0-NEXT: jne .LBB240_1
+; O0-NEXT: jmp .LBB240_2
+; O0-NEXT: .LBB240_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_or_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection562:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection563:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB240_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection564:
+; O1-NEXT: orq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection565:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection566:
+; O1-NEXT: jne .LBB240_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_or_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection562:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection563:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB240_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection564:
+; O2-NEXT: orq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection565:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection566:
+; O2-NEXT: jne .LBB240_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_or_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection562:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection563:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB240_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection564:
+; O3-NEXT: orq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection565:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection566:
+; O3-NEXT: jne .LBB240_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw or ptr %a, i128 42 seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_xor_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_xor_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection696:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection697:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection698:
+; O0-NEXT: jmp .LBB241_1
+; O0-NEXT: .LBB241_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movq %rax, %rbx
+; O0-NEXT: .Lpcsection699:
+; O0-NEXT: xorq $42, %rbx
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection700:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection701:
+; O0-NEXT: jne .LBB241_1
+; O0-NEXT: jmp .LBB241_2
+; O0-NEXT: .LBB241_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_xor_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection567:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection568:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB241_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movq %rax, %rbx
+; O1-NEXT: .Lpcsection569:
+; O1-NEXT: xorq $42, %rbx
+; O1-NEXT: movq %rdx, %rcx
+; O1-NEXT: .Lpcsection570:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection571:
+; O1-NEXT: jne .LBB241_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_xor_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection567:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection568:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB241_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movq %rax, %rbx
+; O2-NEXT: .Lpcsection569:
+; O2-NEXT: xorq $42, %rbx
+; O2-NEXT: movq %rdx, %rcx
+; O2-NEXT: .Lpcsection570:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection571:
+; O2-NEXT: jne .LBB241_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_xor_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection567:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection568:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB241_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movq %rax, %rbx
+; O3-NEXT: .Lpcsection569:
+; O3-NEXT: xorq $42, %rbx
+; O3-NEXT: movq %rdx, %rcx
+; O3-NEXT: .Lpcsection570:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection571:
+; O3-NEXT: jne .LBB241_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw xor ptr %a, i128 42 seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_nand_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_nand_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection702:
+; O0-NEXT: movq (%rdi), %rax
+; O0-NEXT: .Lpcsection703:
+; O0-NEXT: movq 8(%rdi), %rdx
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection704:
+; O0-NEXT: jmp .LBB242_1
+; O0-NEXT: .LBB242_1: # %atomicrmw.start
+; O0-NEXT: # =>This Inner Loop Header: Depth=1
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: .Lpcsection705:
+; O0-NEXT: notl %ecx
+; O0-NEXT: .Lpcsection706:
+; O0-NEXT: # implicit-def: $rbx
+; O0-NEXT: movl %ecx, %ebx
+; O0-NEXT: .Lpcsection707:
+; O0-NEXT: orq $-43, %rbx
+; O0-NEXT: .Lpcsection708:
+; O0-NEXT: movq $-1, %rcx
+; O0-NEXT: .Lpcsection709:
+; O0-NEXT: lock cmpxchg16b (%rsi)
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection710:
+; O0-NEXT: jne .LBB242_1
+; O0-NEXT: jmp .LBB242_2
+; O0-NEXT: .LBB242_2: # %atomicrmw.end
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_nand_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection572:
+; O1-NEXT: movq (%rdi), %rax
+; O1-NEXT: .Lpcsection573:
+; O1-NEXT: movq 8(%rdi), %rdx
+; O1-NEXT: .Lpcsection574:
+; O1-NEXT: movq $-1, %rcx
+; O1-NEXT: .p2align 4, 0x90
+; O1-NEXT: .LBB242_1: # %atomicrmw.start
+; O1-NEXT: # =>This Inner Loop Header: Depth=1
+; O1-NEXT: movl %eax, %ebx
+; O1-NEXT: .Lpcsection575:
+; O1-NEXT: notl %ebx
+; O1-NEXT: .Lpcsection576:
+; O1-NEXT: orq $-43, %rbx
+; O1-NEXT: .Lpcsection577:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection578:
+; O1-NEXT: jne .LBB242_1
+; O1-NEXT: # %bb.2: # %atomicrmw.end
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_nand_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection572:
+; O2-NEXT: movq (%rdi), %rax
+; O2-NEXT: .Lpcsection573:
+; O2-NEXT: movq 8(%rdi), %rdx
+; O2-NEXT: .Lpcsection574:
+; O2-NEXT: movq $-1, %rcx
+; O2-NEXT: .p2align 4, 0x90
+; O2-NEXT: .LBB242_1: # %atomicrmw.start
+; O2-NEXT: # =>This Inner Loop Header: Depth=1
+; O2-NEXT: movl %eax, %ebx
+; O2-NEXT: .Lpcsection575:
+; O2-NEXT: notl %ebx
+; O2-NEXT: .Lpcsection576:
+; O2-NEXT: orq $-43, %rbx
+; O2-NEXT: .Lpcsection577:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection578:
+; O2-NEXT: jne .LBB242_1
+; O2-NEXT: # %bb.2: # %atomicrmw.end
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_nand_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection572:
+; O3-NEXT: movq (%rdi), %rax
+; O3-NEXT: .Lpcsection573:
+; O3-NEXT: movq 8(%rdi), %rdx
+; O3-NEXT: .Lpcsection574:
+; O3-NEXT: movq $-1, %rcx
+; O3-NEXT: .p2align 4, 0x90
+; O3-NEXT: .LBB242_1: # %atomicrmw.start
+; O3-NEXT: # =>This Inner Loop Header: Depth=1
+; O3-NEXT: movl %eax, %ebx
+; O3-NEXT: .Lpcsection575:
+; O3-NEXT: notl %ebx
+; O3-NEXT: .Lpcsection576:
+; O3-NEXT: orq $-43, %rbx
+; O3-NEXT: .Lpcsection577:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection578:
+; O3-NEXT: jne .LBB242_1
+; O3-NEXT: # %bb.2: # %atomicrmw.end
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = atomicrmw nand ptr %a, i128 42 seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_cas_monotonic(ptr %a) {
+; O0-LABEL: atomic128_cas_monotonic:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection711:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection712:
+; O0-NEXT: movl $42, %eax
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection713:
+; O0-NEXT: movl $1, %ebx
+; O0-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection714:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection715:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection716:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection717:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection718:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_cas_monotonic:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection579:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection580:
+; O1-NEXT: movl $1, %ebx
+; O1-NEXT: .Lpcsection581:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection582:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection583:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection584:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection585:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection586:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection587:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection588:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection589:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection590:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection591:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_cas_monotonic:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection579:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection580:
+; O2-NEXT: movl $1, %ebx
+; O2-NEXT: .Lpcsection581:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection582:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection583:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection584:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection585:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection586:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection587:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection588:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection589:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection590:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection591:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_cas_monotonic:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection579:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection580:
+; O3-NEXT: movl $1, %ebx
+; O3-NEXT: .Lpcsection581:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection582:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection583:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection584:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection585:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection586:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection587:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection588:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection589:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection590:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection591:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = cmpxchg ptr %a, i128 42, i128 1 monotonic monotonic, align 16, !pcsections !0
+ %y = cmpxchg ptr %a, i128 42, i128 1 monotonic acquire, align 16, !pcsections !0
+ %z = cmpxchg ptr %a, i128 42, i128 1 monotonic seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_cas_acquire(ptr %a) {
+; O0-LABEL: atomic128_cas_acquire:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection719:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection720:
+; O0-NEXT: movl $42, %eax
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection721:
+; O0-NEXT: movl $1, %ebx
+; O0-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection722:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection723:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection724:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection725:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection726:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_cas_acquire:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection592:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection593:
+; O1-NEXT: movl $1, %ebx
+; O1-NEXT: .Lpcsection594:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection595:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection596:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection597:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection598:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection599:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection600:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection601:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection602:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection603:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection604:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_cas_acquire:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection592:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection593:
+; O2-NEXT: movl $1, %ebx
+; O2-NEXT: .Lpcsection594:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection595:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection596:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection597:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection598:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection599:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection600:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection601:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection602:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection603:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection604:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_cas_acquire:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection592:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection593:
+; O3-NEXT: movl $1, %ebx
+; O3-NEXT: .Lpcsection594:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection595:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection596:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection597:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection598:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection599:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection600:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection601:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection602:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection603:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection604:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = cmpxchg ptr %a, i128 42, i128 1 acquire monotonic, align 16, !pcsections !0
+ %y = cmpxchg ptr %a, i128 42, i128 1 acquire acquire, align 16, !pcsections !0
+ %z = cmpxchg ptr %a, i128 42, i128 1 acquire seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_cas_release(ptr %a) {
+; O0-LABEL: atomic128_cas_release:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection727:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection728:
+; O0-NEXT: movl $42, %eax
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection729:
+; O0-NEXT: movl $1, %ebx
+; O0-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection730:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection731:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection732:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection733:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection734:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_cas_release:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection605:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection606:
+; O1-NEXT: movl $1, %ebx
+; O1-NEXT: .Lpcsection607:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection608:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection609:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection610:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection611:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection612:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection613:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection614:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection615:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection616:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection617:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_cas_release:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection605:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection606:
+; O2-NEXT: movl $1, %ebx
+; O2-NEXT: .Lpcsection607:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection608:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection609:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection610:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection611:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection612:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection613:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection614:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection615:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection616:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection617:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_cas_release:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection605:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection606:
+; O3-NEXT: movl $1, %ebx
+; O3-NEXT: .Lpcsection607:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection608:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection609:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection610:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection611:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection612:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection613:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection614:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection615:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection616:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection617:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = cmpxchg ptr %a, i128 42, i128 1 release monotonic, align 16, !pcsections !0
+ %y = cmpxchg ptr %a, i128 42, i128 1 release acquire, align 16, !pcsections !0
+ %z = cmpxchg ptr %a, i128 42, i128 1 release seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_cas_acq_rel(ptr %a) {
+; O0-LABEL: atomic128_cas_acq_rel:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection735:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection736:
+; O0-NEXT: movl $42, %eax
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection737:
+; O0-NEXT: movl $1, %ebx
+; O0-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection738:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection739:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection740:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection741:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection742:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $1, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_cas_acq_rel:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection618:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection619:
+; O1-NEXT: movl $1, %ebx
+; O1-NEXT: .Lpcsection620:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection621:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection622:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection623:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection624:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection625:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection626:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection627:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection628:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection629:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection630:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $1, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_cas_acq_rel:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection618:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection619:
+; O2-NEXT: movl $1, %ebx
+; O2-NEXT: .Lpcsection620:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection621:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection622:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection623:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection624:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection625:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection626:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection627:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection628:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection629:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection630:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $1, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_cas_acq_rel:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection618:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection619:
+; O3-NEXT: movl $1, %ebx
+; O3-NEXT: .Lpcsection620:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection621:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection622:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection623:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection624:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection625:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection626:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection627:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection628:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection629:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection630:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $1, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = cmpxchg ptr %a, i128 42, i128 1 acq_rel monotonic, align 16, !pcsections !0
+ %y = cmpxchg ptr %a, i128 42, i128 1 acq_rel acquire, align 16, !pcsections !0
+ %z = cmpxchg ptr %a, i128 42, i128 1 acq_rel seq_cst, align 16, !pcsections !0
+ store volatile i64 1, ptr @foo, align 8
+ ret void
+}
+
+define void @atomic128_cas_seq_cst(ptr %a) {
+; O0-LABEL: atomic128_cas_seq_cst:
+; O0: # %bb.0: # %entry
+; O0-NEXT: pushq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 16
+; O0-NEXT: .cfi_offset %rbx, -16
+; O0-NEXT: movq foo(%rip), %rax
+; O0-NEXT: .Lpcsection743:
+; O0-NEXT: xorl %eax, %eax
+; O0-NEXT: movl %eax, %ecx
+; O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection744:
+; O0-NEXT: movl $42, %eax
+; O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: .Lpcsection745:
+; O0-NEXT: movl $1, %ebx
+; O0-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection746:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection747:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection748:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; O0-NEXT: .Lpcsection749:
+; O0-NEXT: # kill: def $rsi killed $rax
+; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; O0-NEXT: movq %rcx, %rdx
+; O0-NEXT: .Lpcsection750:
+; O0-NEXT: lock cmpxchg16b (%rdi)
+; O0-NEXT: movq $3, foo
+; O0-NEXT: popq %rbx
+; O0-NEXT: .cfi_def_cfa_offset 8
+; O0-NEXT: retq
+;
+; O1-LABEL: atomic128_cas_seq_cst:
+; O1: # %bb.0: # %entry
+; O1-NEXT: pushq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 16
+; O1-NEXT: .cfi_offset %rbx, -16
+; O1-NEXT: movq foo(%rip), %rax
+; O1-NEXT: .Lpcsection631:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection632:
+; O1-NEXT: movl $1, %ebx
+; O1-NEXT: .Lpcsection633:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection634:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection635:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection636:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection637:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection638:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection639:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: .Lpcsection640:
+; O1-NEXT: movl $42, %eax
+; O1-NEXT: .Lpcsection641:
+; O1-NEXT: xorl %edx, %edx
+; O1-NEXT: .Lpcsection642:
+; O1-NEXT: xorl %ecx, %ecx
+; O1-NEXT: .Lpcsection643:
+; O1-NEXT: lock cmpxchg16b (%rdi)
+; O1-NEXT: movq $3, foo(%rip)
+; O1-NEXT: popq %rbx
+; O1-NEXT: .cfi_def_cfa_offset 8
+; O1-NEXT: retq
+;
+; O2-LABEL: atomic128_cas_seq_cst:
+; O2: # %bb.0: # %entry
+; O2-NEXT: pushq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 16
+; O2-NEXT: .cfi_offset %rbx, -16
+; O2-NEXT: movq foo(%rip), %rax
+; O2-NEXT: .Lpcsection631:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection632:
+; O2-NEXT: movl $1, %ebx
+; O2-NEXT: .Lpcsection633:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection634:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection635:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection636:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection637:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection638:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection639:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: .Lpcsection640:
+; O2-NEXT: movl $42, %eax
+; O2-NEXT: .Lpcsection641:
+; O2-NEXT: xorl %edx, %edx
+; O2-NEXT: .Lpcsection642:
+; O2-NEXT: xorl %ecx, %ecx
+; O2-NEXT: .Lpcsection643:
+; O2-NEXT: lock cmpxchg16b (%rdi)
+; O2-NEXT: movq $3, foo(%rip)
+; O2-NEXT: popq %rbx
+; O2-NEXT: .cfi_def_cfa_offset 8
+; O2-NEXT: retq
+;
+; O3-LABEL: atomic128_cas_seq_cst:
+; O3: # %bb.0: # %entry
+; O3-NEXT: pushq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 16
+; O3-NEXT: .cfi_offset %rbx, -16
+; O3-NEXT: movq foo(%rip), %rax
+; O3-NEXT: .Lpcsection631:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection632:
+; O3-NEXT: movl $1, %ebx
+; O3-NEXT: .Lpcsection633:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection634:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection635:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection636:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection637:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection638:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection639:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: .Lpcsection640:
+; O3-NEXT: movl $42, %eax
+; O3-NEXT: .Lpcsection641:
+; O3-NEXT: xorl %edx, %edx
+; O3-NEXT: .Lpcsection642:
+; O3-NEXT: xorl %ecx, %ecx
+; O3-NEXT: .Lpcsection643:
+; O3-NEXT: lock cmpxchg16b (%rdi)
+; O3-NEXT: movq $3, foo(%rip)
+; O3-NEXT: popq %rbx
+; O3-NEXT: .cfi_def_cfa_offset 8
+; O3-NEXT: retq
+entry:
+ load volatile i64, ptr @foo, align 8
+ %x = cmpxchg ptr %a, i128 42, i128 1 seq_cst monotonic, align 16, !pcsections !0
+ %y = cmpxchg ptr %a, i128 42, i128 1 seq_cst acquire, align 16, !pcsections !0
+ %z = cmpxchg ptr %a, i128 42, i128 1 seq_cst seq_cst, align 16, !pcsections !0
+ store volatile i64 3, ptr @foo, align 8
+ ret void
+}
+
!0 = !{!"somesection"}
More information about the llvm-commits
mailing list