[llvm] [Xtensa] Implement Xtensa S32C1I Option and atomics lowering. (PR #137134)
Andrei Safronov via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 14:03:19 PDT 2025
https://github.com/andreisfr updated https://github.com/llvm/llvm-project/pull/137134
>From b127324c314288ebf7989facc0b3646cfbd2a261 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Tue, 5 Aug 2025 01:45:48 +0300
Subject: [PATCH] [Xtensa] Implement Xtensa S32C1I Option and atomics lowering.
Implement Xtensa S32C1I Option and use s32c1i instruction to implement atomics
operations.
---
.../Disassembler/XtensaDisassembler.cpp | 67 +-
.../MCTargetDesc/XtensaMCTargetDesc.cpp | 3 +
llvm/lib/Target/Xtensa/XtensaFeatures.td | 16 +
llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 863 +++
llvm/lib/Target/Xtensa/XtensaISelLowering.h | 19 +
llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 158 +
llvm/lib/Target/Xtensa/XtensaOperators.td | 5 +
llvm/lib/Target/Xtensa/XtensaRegisterInfo.td | 10 +-
llvm/lib/Target/Xtensa/XtensaSubtarget.h | 2 +
.../lib/Target/Xtensa/XtensaTargetMachine.cpp | 6 +
llvm/test/CodeGen/Xtensa/atomic-load-store.ll | 498 ++
llvm/test/CodeGen/Xtensa/atomic-rmw.ll | 4728 +++++++++++++++++
llvm/test/CodeGen/Xtensa/forced-atomics.ll | 1288 +++++
13 files changed, 7628 insertions(+), 35 deletions(-)
create mode 100644 llvm/test/CodeGen/Xtensa/atomic-load-store.ll
create mode 100644 llvm/test/CodeGen/Xtensa/atomic-rmw.ll
create mode 100644 llvm/test/CodeGen/Xtensa/forced-atomics.ll
diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
index 2f92f8606fb48..39bec4785c61c 100644
--- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
+++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
@@ -145,39 +145,40 @@ struct DecodeRegister {
};
const DecodeRegister SRDecoderTable[] = {
- {Xtensa::LBEG, 0}, {Xtensa::LEND, 1},
- {Xtensa::LCOUNT, 2}, {Xtensa::SAR, 3},
- {Xtensa::BREG, 4}, {Xtensa::LITBASE, 5},
- {Xtensa::ACCLO, 16}, {Xtensa::ACCHI, 17},
- {Xtensa::M0, 32}, {Xtensa::M1, 33},
- {Xtensa::M2, 34}, {Xtensa::M3, 35},
- {Xtensa::WINDOWBASE, 72}, {Xtensa::WINDOWSTART, 73},
- {Xtensa::IBREAKENABLE, 96}, {Xtensa::MEMCTL, 97},
- {Xtensa::DDR, 104}, {Xtensa::IBREAKA0, 128},
- {Xtensa::IBREAKA1, 129}, {Xtensa::DBREAKA0, 144},
- {Xtensa::DBREAKA1, 145}, {Xtensa::DBREAKC0, 160},
- {Xtensa::DBREAKC1, 161}, {Xtensa::CONFIGID0, 176},
- {Xtensa::EPC1, 177}, {Xtensa::EPC2, 178},
- {Xtensa::EPC3, 179}, {Xtensa::EPC4, 180},
- {Xtensa::EPC5, 181}, {Xtensa::EPC6, 182},
- {Xtensa::EPC7, 183}, {Xtensa::DEPC, 192},
- {Xtensa::EPS2, 194}, {Xtensa::EPS3, 195},
- {Xtensa::EPS4, 196}, {Xtensa::EPS5, 197},
- {Xtensa::EPS6, 198}, {Xtensa::EPS7, 199},
- {Xtensa::CONFIGID1, 208}, {Xtensa::EXCSAVE1, 209},
- {Xtensa::EXCSAVE2, 210}, {Xtensa::EXCSAVE3, 211},
- {Xtensa::EXCSAVE4, 212}, {Xtensa::EXCSAVE5, 213},
- {Xtensa::EXCSAVE6, 214}, {Xtensa::EXCSAVE7, 215},
- {Xtensa::CPENABLE, 224}, {Xtensa::INTERRUPT, 226},
- {Xtensa::INTCLEAR, 227}, {Xtensa::INTENABLE, 228},
- {Xtensa::PS, 230}, {Xtensa::VECBASE, 231},
- {Xtensa::EXCCAUSE, 232}, {Xtensa::DEBUGCAUSE, 233},
- {Xtensa::CCOUNT, 234}, {Xtensa::PRID, 235},
- {Xtensa::ICOUNT, 236}, {Xtensa::ICOUNTLEVEL, 237},
- {Xtensa::EXCVADDR, 238}, {Xtensa::CCOMPARE0, 240},
- {Xtensa::CCOMPARE1, 241}, {Xtensa::CCOMPARE2, 242},
- {Xtensa::MISC0, 244}, {Xtensa::MISC1, 245},
- {Xtensa::MISC2, 246}, {Xtensa::MISC3, 247}};
+ {Xtensa::LBEG, 0}, {Xtensa::LEND, 1},
+ {Xtensa::LCOUNT, 2}, {Xtensa::SAR, 3},
+ {Xtensa::BREG, 4}, {Xtensa::LITBASE, 5},
+ {Xtensa::SCOMPARE1, 12}, {Xtensa::ACCLO, 16},
+ {Xtensa::ACCHI, 17}, {Xtensa::M0, 32},
+ {Xtensa::M1, 33}, {Xtensa::M2, 34},
+ {Xtensa::M3, 35}, {Xtensa::WINDOWBASE, 72},
+ {Xtensa::WINDOWSTART, 73}, {Xtensa::IBREAKENABLE, 96},
+ {Xtensa::MEMCTL, 97}, {Xtensa::ATOMCTL, 99},
+ {Xtensa::DDR, 104}, {Xtensa::IBREAKA0, 128},
+ {Xtensa::IBREAKA1, 129}, {Xtensa::DBREAKA0, 144},
+ {Xtensa::DBREAKA1, 145}, {Xtensa::DBREAKC0, 160},
+ {Xtensa::DBREAKC1, 161}, {Xtensa::CONFIGID0, 176},
+ {Xtensa::EPC1, 177}, {Xtensa::EPC2, 178},
+ {Xtensa::EPC3, 179}, {Xtensa::EPC4, 180},
+ {Xtensa::EPC5, 181}, {Xtensa::EPC6, 182},
+ {Xtensa::EPC7, 183}, {Xtensa::DEPC, 192},
+ {Xtensa::EPS2, 194}, {Xtensa::EPS3, 195},
+ {Xtensa::EPS4, 196}, {Xtensa::EPS5, 197},
+ {Xtensa::EPS6, 198}, {Xtensa::EPS7, 199},
+ {Xtensa::CONFIGID1, 208}, {Xtensa::EXCSAVE1, 209},
+ {Xtensa::EXCSAVE2, 210}, {Xtensa::EXCSAVE3, 211},
+ {Xtensa::EXCSAVE4, 212}, {Xtensa::EXCSAVE5, 213},
+ {Xtensa::EXCSAVE6, 214}, {Xtensa::EXCSAVE7, 215},
+ {Xtensa::CPENABLE, 224}, {Xtensa::INTERRUPT, 226},
+ {Xtensa::INTCLEAR, 227}, {Xtensa::INTENABLE, 228},
+ {Xtensa::PS, 230}, {Xtensa::VECBASE, 231},
+ {Xtensa::EXCCAUSE, 232}, {Xtensa::DEBUGCAUSE, 233},
+ {Xtensa::CCOUNT, 234}, {Xtensa::PRID, 235},
+ {Xtensa::ICOUNT, 236}, {Xtensa::ICOUNTLEVEL, 237},
+ {Xtensa::EXCVADDR, 238}, {Xtensa::CCOMPARE0, 240},
+ {Xtensa::CCOMPARE1, 241}, {Xtensa::CCOMPARE2, 242},
+ {Xtensa::MISC0, 244}, {Xtensa::MISC1, 245},
+ {Xtensa::MISC2, 246}, {Xtensa::MISC3, 247}};
static DecodeStatus DecodeSRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
index 821cba0fc25c2..080a9c0bdd9e0 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
@@ -200,6 +200,9 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits,
case Xtensa::WINDOWBASE:
case Xtensa::WINDOWSTART:
return FeatureBits[Xtensa::FeatureWindowed];
+ case Xtensa::ATOMCTL:
+ case Xtensa::SCOMPARE1:
+ return FeatureBits[Xtensa::FeatureWindowed];
case Xtensa::NoRegister:
return false;
}
diff --git a/llvm/lib/Target/Xtensa/XtensaFeatures.td b/llvm/lib/Target/Xtensa/XtensaFeatures.td
index 97d5472f3e96c..d6f3ef0f15e32 100644
--- a/llvm/lib/Target/Xtensa/XtensaFeatures.td
+++ b/llvm/lib/Target/Xtensa/XtensaFeatures.td
@@ -73,6 +73,22 @@ def FeatureDiv32 : SubtargetFeature<"div32", "HasDiv32", "true",
def HasDiv32 : Predicate<"Subtarget->hasDiv32()">,
AssemblerPredicate<(all_of FeatureDiv32)>;
+def FeatureS32C1I : SubtargetFeature<"s32c1i", "HasS32C1I", "true",
+ "Enable Xtensa S32C1I option">;
+def HasS32C1I : Predicate<"Subtarget->hasS32C1I()">,
+ AssemblerPredicate<(all_of FeatureS32C1I)>;
+
+// Assume that lock-free native-width atomics are available, even if the target
+// and operating system combination would not usually provide them. The user
+// is responsible for providing any necessary __sync implementations. Code
+// built with this feature is not ABI-compatible with code built without this
+// feature, if atomic variables are exposed across the ABI boundary.
+def FeatureForcedAtomics : SubtargetFeature<"forced-atomics", "HasForcedAtomics", "true",
+ "Assume that lock-free native-width atomics are available">;
+def HasForcedAtomics : Predicate<"Subtarget->hasForcedAtomics()">,
+ AssemblerPredicate<(all_of FeatureForcedAtomics)>;
+def HasAtomicLdSt : Predicate<"Subtarget->hasS32C1I() || Subtarget->hasForcedAtomics()">;
+
def FeatureRegionProtection : SubtargetFeature<"regprotect", "HasRegionProtection", "true",
"Enable Xtensa Region Protection option">;
def HasRegionProtection : Predicate<"Subtarget->hasRegionProtection()">,
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index fd42fd2e010ba..c56327b8c7334 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -250,6 +250,40 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
// Floating-point truncation and stores need to be done separately.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ // to have the best chance and doing something good with fences custom lower
+ // them
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ if (!Subtarget.hasS32C1I()) {
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_INTEGER_VALUETYPE; ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+ }
+ }
+ }
+
+ if (Subtarget.hasS32C1I()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ setMinCmpXchgSizeInBits(32);
+ } else if (Subtarget.hasForcedAtomics()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ } else {
+ setMaxAtomicSizeInBitsSupported(0);
+ }
+
// Compute derived properties from the register classes
computeRegisterProperties(STI.getRegisterInfo());
}
@@ -1448,6 +1482,13 @@ bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
+SDValue XtensaTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain);
+}
+
SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1491,6 +1532,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG, false);
+ case ISD::ATOMIC_FENCE:
+ return LowerATOMIC_FENCE(Op, DAG);
default:
report_fatal_error("Unexpected node to lower");
}
@@ -1630,6 +1673,731 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI,
return SinkMBB;
}
+// Emit instructions for atomic_cmp_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *thisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &CmpVal = MI.getOperand(2);
+ MachineOperand &SwpVal = MI.getOperand(3);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned Cmp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Cmp1).addReg(CmpVal.getReg());
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Swp1).addReg(SwpVal.getReg());
+
+ BB = BBLoop;
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, BB->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop)
+ .addReg(R4)
+ .addMBB(thisBB);
+
+ unsigned Cmp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Cmp2).addReg(Cmp1).addReg(MaskPhi);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp2).addReg(Swp1).addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Cmp2);
+
+ unsigned Swp3 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp3)
+ .addReg(Swp2)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp3).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop);
+
+ BB->addSuccessor(BBLoop);
+ BB->addSuccessor(BBExit);
+
+ BB = BBExit;
+ auto St = BBExit->begin();
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R5).addReg(Swp3);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::AND), Res.getReg())
+ .addReg(R5)
+ .addReg(Mask1);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop3 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop4 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop1);
+ F->insert(It, BBLoop2);
+ F->insert(It, BBLoop3);
+ F->insert(It, BBLoop4);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop1);
+ BBLoop1->addSuccessor(BBLoop2);
+ BBLoop2->addSuccessor(BBLoop3);
+ BBLoop2->addSuccessor(BBLoop4);
+ BBLoop3->addSuccessor(BBLoop2);
+ BBLoop3->addSuccessor(BBLoop4);
+ BBLoop4->addSuccessor(BBLoop1);
+ BBLoop4->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned SwpValShifted = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), SwpValShifted)
+ .addReg(SwpVal.getReg());
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R5).addReg(AddrAlign).addImm(0);
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), AtomVal).addReg(R5).addReg(Mask2);
+
+ unsigned AtomValPhi = MRI.createVirtualRegister(RC);
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop1, BBLoop1->begin(), DL, TII.get(Xtensa::PHI), AtomValPhi)
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop4)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop1;
+
+ BuildMI(BB, DL, TII.get(Xtensa::MEMW));
+
+ unsigned R6 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::L32I), R6).addReg(AddrAlign).addImm(0);
+
+ unsigned R7 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R7).addReg(R6).addReg(Mask3);
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop2, BBLoop2->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop3)
+ .addReg(R7)
+ .addMBB(BBLoop1);
+
+ BB = BBLoop2;
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp1)
+ .addReg(SwpValShifted)
+ .addReg(MaskPhi);
+
+ unsigned AtomVal1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), AtomVal1)
+ .addReg(AtomValPhi)
+ .addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(AtomVal1);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp2)
+ .addReg(Swp1)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BEQ))
+ .addReg(AtomVal1)
+ .addReg(Swp2)
+ .addMBB(BBLoop4);
+
+ BB = BBLoop3;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp2).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop2);
+
+ BB = BBLoop4;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), AtomValLoop).addReg(Swp2).addReg(Mask2);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(AtomValPhi)
+ .addMBB(BBLoop1);
+
+ BB = BBExit;
+
+ auto St = BB->begin();
+
+ unsigned R8 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R8).addReg(AtomValLoop);
+
+ if (isByteOperand) {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(7);
+ } else {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(15);
+ }
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 32 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MEMW));
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), AtomVal)
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop, BBLoop->begin(), DL, TII.get(Xtensa::PHI), Res.getReg())
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop;
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Res.getReg());
+
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), AtomValLoop)
+ .addReg(SwpVal.getReg())
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(Res.getReg())
+ .addMBB(BBLoop);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *XtensaTargetLowering::emitAtomicRMW(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode,
+ bool inv,
+ bool minmax) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *ThisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BB2.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomicValAddr = MI.getOperand(1);
+ MachineOperand &Val = MI.getOperand(2);
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R1)
+ .addReg(AtomicValAddr.getReg())
+ .addImm(0);
+
+ BB = BBLoop;
+
+ unsigned AtomicValPhi = MRI.createVirtualRegister(RC);
+ unsigned AtomicValLoop = MRI.createVirtualRegister(RC);
+ unsigned R2 = MRI.createVirtualRegister(RC);
+
+ if (minmax) {
+ MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(++BB->getIterator(), BBLoop1);
+ BB->addSuccessor(BBLoop1);
+ MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(++BB->getIterator(), BBLoop2);
+ BB->addSuccessor(BBLoop2);
+ BBLoop2->addSuccessor(BBLoop1);
+
+ BuildMI(BB, DL, TII.get(Opcode))
+ .addReg(AtomicValPhi)
+ .addReg(Val.getReg())
+ .addMBB(BBLoop1);
+
+ unsigned R7 = MRI.createVirtualRegister(RC);
+ BuildMI(BBLoop2, DL, TII.get(Xtensa::MOV_N), R7).addReg(Val.getReg());
+
+ BB = BBLoop1;
+ unsigned R8 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, BB->begin(), DL, TII.get(Xtensa::PHI), R8)
+ .addReg(R7)
+ .addMBB(BBLoop2)
+ .addReg(AtomicValPhi)
+ .addMBB(BBLoop);
+ BuildMI(BB, DL, TII.get(Xtensa::MOV_N), R2).addReg(R8);
+ } else {
+ BuildMI(BB, DL, TII.get(Opcode), R2)
+ .addReg(AtomicValPhi)
+ .addReg(Val.getReg());
+ if (inv) {
+ unsigned Rtmp1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::MOVI), Rtmp1).addImm(-1);
+ unsigned Rtmp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::XOR), Rtmp2).addReg(R2).addReg(Rtmp1);
+ R2 = Rtmp2;
+ }
+ }
+
+ BuildMI(*BBLoop, BBLoop->begin(), DL, TII.get(Xtensa::PHI), AtomicValPhi)
+ .addReg(AtomicValLoop)
+ .addMBB(BB)
+ .addReg(R1)
+ .addMBB(ThisBB);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(AtomicValPhi);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), R4)
+ .addReg(R2)
+ .addReg(AtomicValAddr.getReg(), getKillRegState(AtomicValAddr.isDead()))
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::MOV_N), AtomicValLoop).addReg(R4);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomicValPhi)
+ .addReg(R4)
+ .addMBB(BBLoop);
+
+ BB->addSuccessor(BBLoop);
+ BB->addSuccessor(BBExit);
+
+ BB = BBExit;
+ auto St = BBExit->begin();
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::MOV_N), Res.getReg()).addReg(R4);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicRMW(MachineInstr &MI, MachineBasicBlock *BB,
+ bool isByteOperand, unsigned Opcode,
+ bool inv, bool minmax) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *ThisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BB2.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &Val = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned Val1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Val1).addReg(Val.getReg());
+
+ BB = BBLoop;
+
+ unsigned AtomicValPhi = MRI.createVirtualRegister(RC);
+ unsigned AtomicValLoop = MRI.createVirtualRegister(RC);
+ unsigned Swp2;
+
+ if (minmax) {
+ MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(++BB->getIterator(), BBLoop1);
+ BB->addSuccessor(BBLoop1);
+ MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(++BB->getIterator(), BBLoop2);
+ BB->addSuccessor(BBLoop2);
+ BBLoop2->addSuccessor(BBLoop1);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ unsigned R4 = MRI.createVirtualRegister(RC);
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R5)
+ .addReg(AtomicValPhi)
+ .addReg(Mask2);
+
+ BuildMI(BB, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+ BuildMI(BB, DL, TII.get(Xtensa::SRL), R1).addReg(R5);
+ BuildMI(BB, DL, TII.get(Xtensa::SRL), R2).addReg(Val1);
+
+ if ((Opcode == Xtensa::BLT) || (Opcode == Xtensa::BGE)) {
+ if (isByteOperand) {
+ BuildMI(BB, DL, TII.get(Xtensa::SEXT), R3).addReg(R1).addImm(7);
+ BuildMI(BB, DL, TII.get(Xtensa::SEXT), R4).addReg(R2).addImm(7);
+ } else {
+ BuildMI(BB, DL, TII.get(Xtensa::SEXT), R3).addReg(R1).addImm(15);
+ BuildMI(BB, DL, TII.get(Xtensa::SEXT), R4).addReg(R2).addImm(15);
+ }
+ } else {
+ R3 = R1;
+ R4 = R2;
+ }
+
+ BuildMI(BB, DL, TII.get(Opcode)).addReg(R3).addReg(R4).addMBB(BBLoop1);
+ unsigned R7 = MRI.createVirtualRegister(RC);
+ BuildMI(BBLoop2, DL, TII.get(Xtensa::MOV_N), R7).addReg(Val1);
+
+ BB = BBLoop1;
+ unsigned R9 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, BB->begin(), DL, TII.get(Xtensa::PHI), R9)
+ .addReg(R7)
+ .addMBB(BBLoop2)
+ .addReg(AtomicValPhi)
+ .addMBB(BBLoop);
+
+ unsigned R10 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R10)
+ .addReg(AtomicValPhi)
+ .addReg(Mask3);
+
+ unsigned R11 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R11).addReg(R9).addReg(Mask2);
+
+ Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp2).addReg(R10).addReg(R11);
+ } else {
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R4)
+ .addReg(AtomicValPhi)
+ .addReg(Mask2);
+
+ unsigned Res1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Opcode), Res1).addReg(R4).addReg(Val1);
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), Swp1).addReg(Res1).addReg(Mask2);
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R5)
+ .addReg(AtomicValPhi)
+ .addReg(Mask3);
+
+ if (inv) {
+ unsigned Rtmp1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::XOR), Rtmp1)
+ .addReg(AtomicValPhi)
+ .addReg(Mask2);
+ R5 = Rtmp1;
+ }
+
+ Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp2).addReg(Swp1).addReg(R5);
+ }
+
+ BuildMI(*BBLoop, BBLoop->begin(), DL, TII.get(Xtensa::PHI), AtomicValPhi)
+ .addReg(AtomicValLoop)
+ .addMBB(BB)
+ .addReg(R3)
+ .addMBB(ThisBB);
+
+ unsigned Swp3 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(AtomicValPhi);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp3)
+ .addReg(Swp2)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::MOV_N), AtomicValLoop).addReg(Swp3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(Swp3)
+ .addReg(AtomicValPhi)
+ .addMBB(BBLoop);
+
+ BB->addSuccessor(BBLoop);
+ BB->addSuccessor(BBExit);
+ BB = BBExit;
+ auto St = BBExit->begin();
+
+ unsigned R6 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R6).addReg(AtomicValLoop);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::AND), Res.getReg())
+ .addReg(R6)
+ .addReg(Mask1);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+
+ return BB;
+}
+
MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
@@ -1696,6 +2464,101 @@ MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
return MBB;
}
+ case Xtensa::ATOMIC_CMP_SWAP_8_P: {
+ return emitAtomicCmpSwap(MI, MBB, 1);
+ }
+ case Xtensa::ATOMIC_CMP_SWAP_16_P: {
+ return emitAtomicCmpSwap(MI, MBB, 0);
+ }
+ case Xtensa::ATOMIC_CMP_SWAP_32_P: {
+ MachineOperand &R = MI.getOperand(0);
+ MachineOperand &Addr = MI.getOperand(1);
+ MachineOperand &Cmp = MI.getOperand(2);
+ MachineOperand &Swap = MI.getOperand(3);
+
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1)
+ .addReg(Cmp.getReg());
+
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::S32C1I), R.getReg())
+ .addReg(Swap.getReg())
+ .addReg(Addr.getReg())
+ .addImm(0);
+
+ MI.eraseFromParent();
+ return MBB;
+ }
+ case Xtensa::ATOMIC_SWAP_8_P: {
+ return emitAtomicSwap(MI, MBB, 1);
+ }
+ case Xtensa::ATOMIC_SWAP_16_P: {
+ return emitAtomicSwap(MI, MBB, 0);
+ }
+ case Xtensa::ATOMIC_SWAP_32_P: {
+ return emitAtomicSwap(MI, MBB);
+ }
+ case Xtensa::ATOMIC_LOAD_ADD_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::ADD, false, false);
+ case Xtensa::ATOMIC_LOAD_SUB_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::SUB, false, false);
+ case Xtensa::ATOMIC_LOAD_OR_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::OR, false, false);
+ case Xtensa::ATOMIC_LOAD_XOR_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::XOR, false, false);
+ case Xtensa::ATOMIC_LOAD_AND_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::AND, false, false);
+ case Xtensa::ATOMIC_LOAD_NAND_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::AND, true, false);
+ case Xtensa::ATOMIC_LOAD_MIN_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::BGE, false, true);
+ case Xtensa::ATOMIC_LOAD_MAX_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::BLT, false, true);
+ case Xtensa::ATOMIC_LOAD_UMIN_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::BGEU, false, true);
+ case Xtensa::ATOMIC_LOAD_UMAX_8_P:
+ return emitAtomicRMW(MI, MBB, true, Xtensa::BLTU, false, true);
+
+ case Xtensa::ATOMIC_LOAD_ADD_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::ADD, false, false);
+ case Xtensa::ATOMIC_LOAD_SUB_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::SUB, false, false);
+ case Xtensa::ATOMIC_LOAD_OR_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::OR, false, false);
+ case Xtensa::ATOMIC_LOAD_XOR_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::XOR, false, false);
+ case Xtensa::ATOMIC_LOAD_AND_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::AND, false, false);
+ case Xtensa::ATOMIC_LOAD_NAND_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::AND, true, false);
+ case Xtensa::ATOMIC_LOAD_MIN_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::BGE, false, true);
+ case Xtensa::ATOMIC_LOAD_MAX_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::BLT, false, true);
+ case Xtensa::ATOMIC_LOAD_UMIN_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::BGEU, false, true);
+ case Xtensa::ATOMIC_LOAD_UMAX_16_P:
+ return emitAtomicRMW(MI, MBB, false, Xtensa::BLTU, false, true);
+
+ case Xtensa::ATOMIC_LOAD_ADD_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::ADD, false, false);
+ case Xtensa::ATOMIC_LOAD_SUB_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::SUB, false, false);
+ case Xtensa::ATOMIC_LOAD_OR_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::OR, false, false);
+ case Xtensa::ATOMIC_LOAD_XOR_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::XOR, false, false);
+ case Xtensa::ATOMIC_LOAD_AND_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::AND, false, false);
+ case Xtensa::ATOMIC_LOAD_NAND_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::AND, true, false);
+ case Xtensa::ATOMIC_LOAD_MIN_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::BGE, false, true);
+ case Xtensa::ATOMIC_LOAD_MAX_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::BLT, false, true);
+ case Xtensa::ATOMIC_LOAD_UMIN_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::BGEU, false, true);
+ case Xtensa::ATOMIC_LOAD_UMAX_32_P:
+ return emitAtomicRMW(MI, MBB, Xtensa::BLTU, false, true);
+
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index e6ddf9864932a..c3b1f858c8983 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -37,6 +37,7 @@ enum {
// of the field [1..16]
EXTUI,
+ MEMW,
MOVSP,
// Wraps a TargetGlobalAddress that should be loaded using PC-relative
@@ -145,6 +146,10 @@ class XtensaTargetLowering : public TargetLowering {
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override {
+ return true;
+ }
+
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
@@ -195,12 +200,26 @@ class XtensaTargetLowering : public TargetLowering {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+
SDValue getAddrPCRel(SDValue Op, SelectionDAG &DAG) const;
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
MachineBasicBlock *emitSelectCC(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const;
+ MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const;
+ MachineBasicBlock *emitAtomicSwap(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitAtomicRMW(MachineInstr &MI, MachineBasicBlock *BB,
+ bool isByteOperand, unsigned Opcode,
+ bool inv, bool minmax) const;
+ MachineBasicBlock *emitAtomicRMW(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned Opcode, bool inv,
+ bool minmax) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
index 31608f4659365..06a9de1bce7e5 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
@@ -496,6 +496,8 @@ def EXTW : RRR_Inst<0x00, 0x00, 0x00, (outs), (ins),
let hasSideEffects = 1;
}
+def : Pat<(Xtensa_mem_barrier), (MEMW)>;
+
//===----------------------------------------------------------------------===//
// Illegal instructions
//===----------------------------------------------------------------------===//
@@ -1498,6 +1500,162 @@ def RFI : RRR_Inst<0x00, 0x00, 0x00, (outs), (ins uimm4:$imm),
let t = 0x1;
}
+//===----------------------------------------------------------------------===//
+// S32C1I
+//===----------------------------------------------------------------------===//
+
+let mayStore = 1, mayLoad = 1, Predicates = [HasS32C1I] in {
+ def S32C1I : RRI8_Inst<0x02, (outs AR:$a), (ins AR:$t, mem32:$addr),
+ "s32c1i\t$t, $addr", []> {
+ bits<12> addr;
+
+ let r = 0x0e;
+ let Uses = [SCOMPARE1];
+ let Constraints = "$a = $t";
+ let imm8{7-0} = addr{11-4};
+ let s{3-0} = addr{3-0};
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic patterns
+//===----------------------------------------------------------------------===//
+
+// Atomic load/store are available under both +s32c1i and +force-atomics.
+// Fences will be inserted for atomic load/stores according to the logic in
+// XtensaTargetLowering.
+let Predicates = [HasAtomicLdSt] in {
+ def : Pat<(i32 (atomic_load_8 addr_ish1:$addr)), (L8UI addr_ish1:$addr)>;
+ def : Pat<(i32 (atomic_load_16 addr_ish2:$addr)), (L16UI addr_ish2:$addr)>;
+ def : Pat<(i32 (atomic_load_32 addr_ish4:$addr)), (L32I addr_ish4:$addr)>;
+
+ def : Pat<(atomic_store_8 AR:$t, addr_ish1:$addr), (S8I AR:$t, addr_ish1:$addr)>;
+ def : Pat<(atomic_store_16 AR:$t, addr_ish2:$addr), (S16I AR:$t, addr_ish2:$addr)>;
+ def : Pat<(atomic_store_32 AR:$t, addr_ish4:$addr), (S32I AR:$t, addr_ish4:$addr)>;
+}
+
+let usesCustomInserter = 1, Predicates = [HasS32C1I] in {
+ def ATOMIC_CMP_SWAP_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap),
+ "!atomic_cmp_swap_8_p, $dst, $ptr, $cmp, $swap",
+ [(set AR:$dst, (atomic_cmp_swap_i8 AR:$ptr, AR:$cmp, AR:$swap))]>;
+ def ATOMIC_CMP_SWAP_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap),
+ "!atomic_cmp_swap_16_p, $dst, $ptr, $cmp, $swap",
+ [(set AR:$dst, (atomic_cmp_swap_i16 AR:$ptr, AR:$cmp, AR:$swap))]>;
+ def ATOMIC_CMP_SWAP_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap),
+ "!atomic_cmp_swap_32_p, $dst, $ptr, $cmp, $swap",
+ [(set AR:$dst, (atomic_cmp_swap_i32 AR:$ptr, AR:$cmp, AR:$swap))]>;
+
+ def ATOMIC_SWAP_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$swap),
+ "!atomic_swap_8_p, $dst, $ptr, $swap",
+ [(set AR:$dst, (atomic_swap_i8 AR:$ptr, AR:$swap))]>;
+ def ATOMIC_SWAP_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$swap),
+ "!atomic_swap_16_p, $dst, $ptr, $swap",
+ [(set AR:$dst, (atomic_swap_i16 AR:$ptr, AR:$swap))]>;
+ def ATOMIC_SWAP_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$swap),
+ "!atomic_swap_32_p, $dst, $ptr, $swap",
+ [(set AR:$dst, (atomic_swap_i32 AR:$ptr, AR:$swap))]>;
+
+ def ATOMIC_LOAD_ADD_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_add_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_add_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_ADD_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_add_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_add_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_ADD_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_add_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_add_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_SUB_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_sub_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_sub_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_SUB_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_sub_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_sub_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_SUB_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_sub_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_sub_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_AND_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_and_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_and_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_AND_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_and_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_and_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_AND_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_and_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_and_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_OR_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_or_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_or_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_OR_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_or_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_or_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_OR_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_or_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_or_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_XOR_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_xor_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_xor_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_XOR_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_xor_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_xor_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_XOR_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_xor_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_xor_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_NAND_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_nand_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_nand_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_NAND_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_nand_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_nand_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_NAND_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_nand_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_nand_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_MIN_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_min_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_min_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_MIN_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_min_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_min_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_MIN_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_min_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_min_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_MAX_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_max_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_max_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_MAX_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_max_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_max_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_MAX_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_max_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_max_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_UMIN_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_umin_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_umin_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_UMIN_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_umin_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_umin_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_UMIN_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_umin_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_umin_i32 AR:$ptr, AR:$arg))]>;
+
+ def ATOMIC_LOAD_UMAX_8_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_umax_8_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_umax_i8 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_UMAX_16_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_umax_16_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_umax_i16 AR:$ptr, AR:$arg))]>;
+ def ATOMIC_LOAD_UMAX_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$arg),
+ "!atomic_load_umax_32_p, $dst, $ptr, $arg",
+ [(set AR:$dst, (atomic_load_umax_i32 AR:$ptr, AR:$arg))]>;
+}
+
//===----------------------------------------------------------------------===//
// DSP Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/Xtensa/XtensaOperators.td b/llvm/lib/Target/Xtensa/XtensaOperators.td
index 56be3d51505e1..ce882b3ce4ea0 100644
--- a/llvm/lib/Target/Xtensa/XtensaOperators.td
+++ b/llvm/lib/Target/Xtensa/XtensaOperators.td
@@ -39,6 +39,8 @@ def SDT_XtensaEXTUI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCi
def SDT_XtensaMOVSP : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_XtensaMEMBARRIER : SDTypeProfile<0, 0, []>;
+
def SDT_XtensaRUR : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
//===----------------------------------------------------------------------===//
@@ -79,6 +81,9 @@ def Xtensa_extui: SDNode<"XtensaISD::EXTUI", SDT_XtensaEXTUI>;
def Xtensa_movsp: SDNode<"XtensaISD::MOVSP", SDT_XtensaMOVSP,
[SDNPHasChain, SDNPSideEffect, SDNPInGlue]>;
+def Xtensa_mem_barrier: SDNode<"XtensaISD::MEMW", SDT_XtensaMEMBARRIER,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def Xtensa_rur: SDNode<"XtensaISD::RUR", SDT_XtensaRUR,
[SDNPInGlue]>;
diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td
index 596c4105c1118..d1f2c6b8e43a3 100644
--- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td
+++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td
@@ -84,6 +84,9 @@ def SAR : SRReg<3, "sar", ["SAR","3"]>;
// Boolean Register
def BREG : SRReg<4, "br", ["BR","4"]>;
+// Expected data value for S32C1I operation
+def SCOMPARE1 : SRReg<12, "scompare1", ["SCOMPARE1", "12"]>;
+
// Literal base
def LITBASE : SRReg<5, "litbase", ["LITBASE", "5"]>;
@@ -97,6 +100,9 @@ def IBREAKENABLE : SRReg<96, "ibreakenable", ["IBREAKENABLE", "96"]>;
// Memory Control Register
def MEMCTL : SRReg<97, "memctl", ["MEMCTL", "97"]>;
+// Atomic Operation Control
+def ATOMCTL : SRReg<99, "atomctl", ["ATOMCTL", "99"]>;
+
def DDR : SRReg<104, "ddr", ["DDR", "104"]>;
// Instuction break address register 0
@@ -218,8 +224,8 @@ def MR23 : RegisterClass<"Xtensa", [i32], 32, (add M2, M3)>;
def MR : RegisterClass<"Xtensa", [i32], 32, (add MR01, MR23)>;
def SR : RegisterClass<"Xtensa", [i32], 32, (add
- LBEG, LEND, LCOUNT, SAR, BREG, LITBASE, ACCLO, ACCHI, MR,
- WINDOWBASE, WINDOWSTART, IBREAKENABLE, MEMCTL, DDR, IBREAKA0, IBREAKA1,
+ LBEG, LEND, LCOUNT, SAR, BREG, SCOMPARE1, LITBASE, ACCLO, ACCHI, MR,
+ WINDOWBASE, WINDOWSTART, IBREAKENABLE, MEMCTL, ATOMCTL, DDR, IBREAKA0, IBREAKA1,
DBREAKA0, DBREAKA1, DBREAKC0, DBREAKC1, CONFIGID0, EPC1, EPC2, EPC3, EPC4, EPC5,
EPC6, EPC7, DEPC, EPS2, EPS3, EPS4, EPS5, EPS6, EPS7, CONFIGID1, EXCSAVE1, EXCSAVE2,
EXCSAVE3, EXCSAVE4, EXCSAVE5, EXCSAVE6, EXCSAVE7, CPENABLE, INTERRUPT, INTSET, INTCLEAR, INTENABLE,
diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h
index fd677a451f3fd..b406534a0ec77 100644
--- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h
+++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h
@@ -77,6 +77,8 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo {
bool hasMul32() const { return HasMul32; }
bool hasMul32High() const { return HasMul32High; }
bool hasDiv32() const { return HasDiv32; }
+ bool hasS32C1I() const { return HasS32C1I; }
+ bool hasForcedAtomics() const { return HasForcedAtomics; }
bool hasSingleFloat() const { return HasSingleFloat; }
bool hasRegionProtection() const { return HasRegionProtection; }
bool hasRelocatableVector() const { return HasRelocatableVector; }
diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
index 8d2dca6c23721..c9f1ca8b46dab 100644
--- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
@@ -107,6 +107,7 @@ class XtensaPassConfig : public TargetPassConfig {
}
bool addInstSelector() override;
+ void addIRPasses() override;
void addPreEmitPass() override;
};
} // end anonymous namespace
@@ -116,6 +117,11 @@ bool XtensaPassConfig::addInstSelector() {
return false;
}
+void XtensaPassConfig::addIRPasses() {
+ addPass(createAtomicExpandLegacyPass());
+ TargetPassConfig::addIRPasses();
+}
+
void XtensaPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
TargetPassConfig *XtensaTargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/llvm/test/CodeGen/Xtensa/atomic-load-store.ll b/llvm/test/CodeGen/Xtensa/atomic-load-store.ll
new file mode 100644
index 0000000000000..4f7266195db28
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/atomic-load-store.ll
@@ -0,0 +1,498 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA
+; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC
+
+define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i8_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI0_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i8_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l8ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i8, ptr %a unordered, align 1
+ ret i8 %1
+}
+
+define i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI1_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l8ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i8, ptr %a monotonic, align 1
+ ret i8 %1
+}
+
+define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 2
+; XTENSA-NEXT: l32r a8, .LCPI2_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l8ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i8, ptr %a acquire, align 1
+ ret i8 %1
+}
+
+define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 5
+; XTENSA-NEXT: l32r a8, .LCPI3_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l8ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i8, ptr %a seq_cst, align 1
+ ret i8 %1
+}
+
+define i16 @atomic_load_i16_unordered(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i16_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI4_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i16_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l16ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i16, ptr %a unordered, align 2
+ ret i16 %1
+}
+
+define i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI5_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l16ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i16, ptr %a monotonic, align 2
+ ret i16 %1
+}
+
+define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 2
+; XTENSA-NEXT: l32r a8, .LCPI6_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l16ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i16, ptr %a acquire, align 2
+ ret i16 %1
+}
+
+define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 5
+; XTENSA-NEXT: l32r a8, .LCPI7_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l16ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i16, ptr %a seq_cst, align 2
+ ret i16 %1
+}
+
+define i32 @atomic_load_i32_unordered(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i32_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI8_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i32_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i32, ptr %a unordered, align 4
+ ret i32 %1
+}
+
+define i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI9_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i32, ptr %a monotonic, align 4
+ ret i32 %1
+}
+
+define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 2
+; XTENSA-NEXT: l32r a8, .LCPI10_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i32, ptr %a acquire, align 4
+ ret i32 %1
+}
+
+define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
+; XTENSA-LABEL: atomic_load_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 5
+; XTENSA-NEXT: l32r a8, .LCPI11_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_load_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = load atomic i32, ptr %a seq_cst, align 4
+ ret i32 %1
+}
+
+define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i8_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI12_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i8_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: s8i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i8 %b, ptr %a unordered, align 1
+ ret void
+}
+
+define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI13_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: s8i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i8 %b, ptr %a monotonic, align 1
+ ret void
+}
+
+define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI14_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: s8i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i8 %b, ptr %a release, align 1
+ ret void
+}
+
+define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI15_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: s8i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i8 %b, ptr %a seq_cst, align 1
+ ret void
+}
+
+define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i16_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI16_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i16_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: s16i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i16 %b, ptr %a unordered, align 2
+ ret void
+}
+
+define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI17_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: s16i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i16 %b, ptr %a monotonic, align 2
+ ret void
+}
+
+define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI18_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: s16i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i16 %b, ptr %a release, align 2
+ ret void
+}
+
+define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI19_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: s16i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i16 %b, ptr %a seq_cst, align 2
+ ret void
+}
+
+define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i32_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI20_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i32_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: s32i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 %b, ptr %a unordered, align 4
+ ret void
+}
+
+define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI21_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: s32i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 %b, ptr %a monotonic, align 4
+ ret void
+}
+
+define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI22_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: s32i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 %b, ptr %a release, align 4
+ ret void
+}
+
+define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomic_store_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI23_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomic_store_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: s32i a3, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 %b, ptr %a seq_cst, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/Xtensa/atomic-rmw.ll b/llvm/test/CodeGen/Xtensa/atomic-rmw.ll
new file mode 100644
index 0000000000000..43a9ba247e10d
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/atomic-rmw.ll
@@ -0,0 +1,4728 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA
+; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC
+
+define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI0_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 255
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB0_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB0_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB0_2: # Parent Loop BB0_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB0_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB0_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB0_2
+; XTENSA-ATOMIC-NEXT: .LBB0_4: # in Loop: Header=BB0_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB0_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 7
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI1_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 255
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB1_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB1_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB1_2: # Parent Loop BB1_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB1_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB1_2
+; XTENSA-ATOMIC-NEXT: .LBB1_4: # in Loop: Header=BB1_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB1_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 7
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI2_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 255
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB2_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB2_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB2_2: # Parent Loop BB2_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB2_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB2_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB2_2
+; XTENSA-ATOMIC-NEXT: .LBB2_4: # in Loop: Header=BB2_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB2_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 7
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI3_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i8_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 255
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB3_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB3_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB3_2: # Parent Loop BB3_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB3_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB3_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB3_2
+; XTENSA-ATOMIC-NEXT: .LBB3_4: # in Loop: Header=BB3_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB3_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 7
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI4_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 255
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB4_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB4_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB4_2: # Parent Loop BB4_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB4_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB4_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB4_2
+; XTENSA-ATOMIC-NEXT: .LBB4_4: # in Loop: Header=BB4_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB4_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 7
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI5_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB5_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI6_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB6_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI7_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB7_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i8_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI8_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i8_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB8_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI9_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB9_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI10_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB10_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI11_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB11_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI12_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB12_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i8_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI13_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i8_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB13_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI14_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB14_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI15_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB15_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI16_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB16_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI17_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB17_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i8_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI18_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i8_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB18_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI19_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB19_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI20_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: l32i a13, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB20_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a13
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI21_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: l32i a13, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB21_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a13
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI22_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: l32i a13, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB22_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a13
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i8_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI23_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i8_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: l32i a13, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB23_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a13
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI24_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: l32i a13, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB24_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a13
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI25_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB25_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI26_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB26_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI27_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB27_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i8_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI28_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i8_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB28_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI29_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB29_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i8_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI30_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i8_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB30_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i8 %b monotonic
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i8_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI31_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i8_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB31_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i8_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI32_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i8_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB32_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i8 %b release
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i8_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI33_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i8_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB33_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i8 %b acq_rel
+ ret i8 %1
+}
+
+define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i8_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI34_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i8_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB34_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i8 %b seq_cst
+ ret i8 %1
+}
+
+;define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i8 %b monotonic
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i8 %b acquire
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i8 %b release
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i8 %b acq_rel
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i8 %b seq_cst
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i8 %b monotonic
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i8 %b acquire
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i8 %b release
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i8 %b acq_rel
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i8 %b seq_cst
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i8 %b monotonic
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i8 %b acquire
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i8 %b release
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i8 %b acq_rel
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i8 %b seq_cst
+; ret i8 %1
+;}
+
+;define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i8 %b monotonic
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i8 %b acquire
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i8 %b release
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i8 %b acq_rel
+; ret i8 %1
+;}
+;
+;define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i8 %b seq_cst
+; ret i8 %1
+;}
+
+define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI35_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB35_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB35_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB35_2: # Parent Loop BB35_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB35_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB35_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB35_2
+; XTENSA-ATOMIC-NEXT: .LBB35_4: # in Loop: Header=BB35_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB35_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 15
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI36_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB36_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB36_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB36_2: # Parent Loop BB36_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB36_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB36_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB36_2
+; XTENSA-ATOMIC-NEXT: .LBB36_4: # in Loop: Header=BB36_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB36_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 15
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI37_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB37_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB37_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB37_2: # Parent Loop BB37_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB37_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB37_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB37_2
+; XTENSA-ATOMIC-NEXT: .LBB37_4: # in Loop: Header=BB37_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB37_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 15
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI38_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i16_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB38_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB38_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB38_2: # Parent Loop BB38_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB38_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB38_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB38_2
+; XTENSA-ATOMIC-NEXT: .LBB38_4: # in Loop: Header=BB38_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB38_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 15
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI39_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a11, -1
+; XTENSA-ATOMIC-NEXT: sll a10, a10
+; XTENSA-ATOMIC-NEXT: xor a11, a10, a11
+; XTENSA-ATOMIC-NEXT: l32i a12, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: and a14, a13, a10
+; XTENSA-ATOMIC-NEXT: .LBB39_1: # =>This Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: # Child Loop BB39_2 Depth 2
+; XTENSA-ATOMIC-NEXT: or a13, a14, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: .LBB39_2: # Parent Loop BB39_1 Depth=1
+; XTENSA-ATOMIC-NEXT: # => This Inner Loop Header: Depth=2
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a12, a15
+; XTENSA-ATOMIC-NEXT: or a7, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a7, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: beq a7, a14, .LBB39_4
+; XTENSA-ATOMIC-NEXT: # %bb.3: # in Loop: Header=BB39_2 Depth=2
+; XTENSA-ATOMIC-NEXT: and a7, a14, a11
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB39_2
+; XTENSA-ATOMIC-NEXT: .LBB39_4: # in Loop: Header=BB39_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a14, a14, a10
+; XTENSA-ATOMIC-NEXT: bne a14, a13, .LBB39_1
+; XTENSA-ATOMIC-NEXT: # %bb.5:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: sext a2, a8, 15
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI40_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB40_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI41_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB41_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI42_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB42_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i16_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI43_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i16_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB43_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI44_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB44_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI45_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB45_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI46_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB46_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI47_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB47_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i16_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI48_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i16_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB48_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI49_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: sub a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB49_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI50_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB50_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI51_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB51_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI52_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB52_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i16_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI53_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i16_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB53_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI54_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: and a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB54_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI55_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB55_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a13
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI56_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB56_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a13
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI57_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB57_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a13
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i16_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI58_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i16_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB58_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a13
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_nand_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI59_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_nand_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: l32i a13, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a12, a3
+; XTENSA-ATOMIC-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a14, a13, a13
+; XTENSA-ATOMIC-NEXT: and a13, a14, a11
+; XTENSA-ATOMIC-NEXT: and a13, a13, a12
+; XTENSA-ATOMIC-NEXT: and a13, a13, a11
+; XTENSA-ATOMIC-NEXT: xor a15, a14, a11
+; XTENSA-ATOMIC-NEXT: or a15, a13, a15
+; XTENSA-ATOMIC-NEXT: wsr a14, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a15, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a13, a15
+; XTENSA-ATOMIC-NEXT: bne a15, a14, .LBB59_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a13
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw nand ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI60_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB60_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI61_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB61_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI62_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB62_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i16_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI63_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i16_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB63_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI64_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: or a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB64_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i16_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI65_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i16_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB65_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i16 %b monotonic
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i16_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI66_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i16_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB66_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i16_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI67_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i16_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB67_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i16 %b release
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i16_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI68_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i16_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB68_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i16 %b acq_rel
+ ret i16 %1
+}
+
+define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i16_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI69_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i16_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a9, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a10, 1
+; XTENSA-ATOMIC-NEXT: slli a10, a10, 16
+; XTENSA-ATOMIC-NEXT: addi a10, a10, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a10
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a9, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a3
+; XTENSA-ATOMIC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: xor a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a9, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB69_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i16 %b seq_cst
+ ret i16 %1
+}
+
+;define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i16 %b monotonic
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i16 %b acquire
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i16 %b release
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i16 %b acq_rel
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i16 %b seq_cst
+; ret i16 %1
+;}
+
+;define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i16 %b monotonic
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i16 %b acquire
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i16 %b release
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i16 %b acq_rel
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i16 %b seq_cst
+; ret i16 %1
+;}
+
+;define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i16 %b monotonic
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i16 %b acquire
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i16 %b release
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i16 %b acq_rel
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i16 %b seq_cst
+; ret i16 %1
+;}
+
+;define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i16 %b monotonic
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i16 %b acquire
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i16 %b release
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i16 %b acq_rel
+; ret i16 %1
+;}
+;
+;define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i16 %b seq_cst
+; ret i16 %1
+;}
+
+define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI70_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a9, a9
+; XTENSA-ATOMIC-NEXT: wsr a8, scompare1
+; XTENSA-ATOMIC-NEXT: or a9, a3, a3
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: bne a9, a8, .LBB70_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i32 %b monotonic
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI71_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a9, a9
+; XTENSA-ATOMIC-NEXT: wsr a8, scompare1
+; XTENSA-ATOMIC-NEXT: or a9, a3, a3
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: bne a9, a8, .LBB71_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI72_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a9, a9
+; XTENSA-ATOMIC-NEXT: wsr a8, scompare1
+; XTENSA-ATOMIC-NEXT: or a9, a3, a3
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: bne a9, a8, .LBB72_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i32 %b release
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i32_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI73_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i32_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a9, a9
+; XTENSA-ATOMIC-NEXT: wsr a8, scompare1
+; XTENSA-ATOMIC-NEXT: or a9, a3, a3
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: bne a9, a8, .LBB73_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i32 %b acq_rel
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xchg_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI74_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xchg_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a9, a9
+; XTENSA-ATOMIC-NEXT: wsr a8, scompare1
+; XTENSA-ATOMIC-NEXT: or a9, a3, a3
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: bne a9, a8, .LBB74_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xchg ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI75_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: add a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB75_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i32 %b monotonic
+ ret i32 %1
+}
+
+define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI76_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: add a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB76_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI77_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: add a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB77_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i32 %b release
+ ret i32 %1
+}
+
+define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i32_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI78_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i32_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: add a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB78_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i32 %b acq_rel
+ ret i32 %1
+}
+
+define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_add_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI79_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_add_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: add a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB79_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw add ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI80_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: sub a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB80_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i32 %b monotonic
+ ret i32 %1
+}
+
+define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI81_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: sub a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB81_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI82_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: sub a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB82_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i32 %b release
+ ret i32 %1
+}
+
+define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i32_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI83_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i32_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: sub a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB83_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i32 %b acq_rel
+ ret i32 %1
+}
+
+define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_sub_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI84_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_sub_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: sub a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB84_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw sub ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI85_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: and a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB85_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i32 %b monotonic
+ ret i32 %1
+}
+
+define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI86_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: and a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB86_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI87_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: and a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB87_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i32 %b release
+ ret i32 %1
+}
+
+define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i32_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI88_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i32_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: and a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB88_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i32 %b acq_rel
+ ret i32 %1
+}
+
+define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_and_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI89_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_and_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: and a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB89_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw and ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+;define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw nand ptr %a, i32 %b monotonic
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw nand ptr %a, i32 %b acquire
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw nand ptr %a, i32 %b release
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw nand ptr %a, i32 %b acq_rel
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw nand ptr %a, i32 %b seq_cst
+; ret i32 %1
+;}
+
+define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI90_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: or a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB90_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i32 %b monotonic
+ ret i32 %1
+}
+
+define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI91_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: or a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB91_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI92_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: or a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB92_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i32 %b release
+ ret i32 %1
+}
+
+define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i32_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI93_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i32_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: or a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB93_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i32 %b acq_rel
+ ret i32 %1
+}
+
+define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_or_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI94_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_or_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: or a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB94_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw or ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI95_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: xor a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB95_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i32 %b monotonic
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 2
+; XTENSA-NEXT: l32r a8, .LCPI96_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: xor a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB96_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI97_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: xor a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB97_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i32 %b release
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i32_acq_rel:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 4
+; XTENSA-NEXT: l32r a8, .LCPI98_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i32_acq_rel:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: xor a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB98_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i32 %b acq_rel
+ ret i32 %1
+}
+
+define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; XTENSA-LABEL: atomicrmw_xor_i32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a11, a3, a3
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI99_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: atomicrmw_xor_i32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: xor a8, a10, a3
+; XTENSA-ATOMIC-NEXT: wsr a10, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a9, a8
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB99_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %1 = atomicrmw xor ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+;define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i32 %b monotonic
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i32 %b acquire
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i32 %b release
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i32 %b acq_rel
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw max ptr %a, i32 %b seq_cst
+; ret i32 %1
+;}
+
+;define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i32 %b monotonic
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i32 %b acquire
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i32 %b release
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i32 %b acq_rel
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw min ptr %a, i32 %b seq_cst
+; ret i32 %1
+;}
+
+;define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i32 %b monotonic
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i32 %b acquire
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i32 %b release
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i32 %b acq_rel
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umax ptr %a, i32 %b seq_cst
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i32 %b monotonic
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i32 %b acquire
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i32 %b release
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i32 %b acq_rel
+; ret i32 %1
+;}
+;
+;define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind {
+; %1 = atomicrmw umin ptr %a, i32 %b seq_cst
+; ret i32 %1
+;}
diff --git a/llvm/test/CodeGen/Xtensa/forced-atomics.ll b/llvm/test/CodeGen/Xtensa/forced-atomics.ll
new file mode 100644
index 0000000000000..f803f90d23073
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/forced-atomics.ll
@@ -0,0 +1,1288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=xtensa -mattr=+windowed -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA
+; RUN: llc -mtriple=xtensa -mattr=+windowed,s32c1i -mattr=+forced-atomics -verify-machineinstrs < %s | FileCheck %s --check-prefixes=XTENSA-ATOMIC
+
+define i8 @load8(ptr %p) nounwind {
+; XTENSA-LABEL: load8:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 5
+; XTENSA-NEXT: l32r a8, .LCPI0_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: load8:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l8ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = load atomic i8, ptr %p seq_cst, align 1
+ ret i8 %v
+}
+
+define void @store8(ptr %p) nounwind {
+; XTENSA-LABEL: store8:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI1_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: store8:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 0
+; XTENSA-ATOMIC-NEXT: s8i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i8 0, ptr %p seq_cst, align 1
+ ret void
+}
+
+define i8 @rmw8(ptr %p) nounwind {
+; XTENSA-LABEL: rmw8:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI2_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw8:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a13, 1
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a9, a8, 3
+; XTENSA-ATOMIC-NEXT: movi a8, 255
+; XTENSA-ATOMIC-NEXT: ssl a9
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a8
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a13
+; XTENSA-ATOMIC-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB2_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a9
+; XTENSA-ATOMIC-NEXT: srl a9, a14
+; XTENSA-ATOMIC-NEXT: and a2, a9, a8
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw add ptr %p, i8 1 seq_cst, align 1
+ ret i8 %v
+}
+
+define i8 @cmpxchg8(ptr %p) nounwind {
+; XTENSA-LABEL: cmpxchg8:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a8, 0
+; XTENSA-NEXT: s8i a8, a1, 0
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: movi a12, 1
+; XTENSA-NEXT: movi a13, 5
+; XTENSA-NEXT: l32r a8, .LCPI3_0
+; XTENSA-NEXT: or a14, a13, a13
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: l8ui a2, a1, 0
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: cmpxchg8:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: slli a8, a2, 3
+; XTENSA-ATOMIC-NEXT: movi a9, 24
+; XTENSA-ATOMIC-NEXT: and a8, a8, a9
+; XTENSA-ATOMIC-NEXT: movi a9, 255
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a9, a9
+; XTENSA-ATOMIC-NEXT: movi a10, -1
+; XTENSA-ATOMIC-NEXT: xor a9, a9, a10
+; XTENSA-ATOMIC-NEXT: movi a10, -4
+; XTENSA-ATOMIC-NEXT: and a10, a2, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a11, a10, 0
+; XTENSA-ATOMIC-NEXT: and a7, a11, a9
+; XTENSA-ATOMIC-NEXT: movi a11, 1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a12, a11
+; XTENSA-ATOMIC-NEXT: movi a13, 0
+; XTENSA-ATOMIC-NEXT: .LBB3_1: # %partword.cmpxchg.loop
+; XTENSA-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a15, a12
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: or a7, a11, a11
+; XTENSA-ATOMIC-NEXT: beq a14, a15, .LBB3_3
+; XTENSA-ATOMIC-NEXT: # %bb.2: # %partword.cmpxchg.loop
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB3_1 Depth=1
+; XTENSA-ATOMIC-NEXT: or a7, a13, a13
+; XTENSA-ATOMIC-NEXT: .LBB3_3: # %partword.cmpxchg.loop
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB3_1 Depth=1
+; XTENSA-ATOMIC-NEXT: bnez a7, .LBB3_5
+; XTENSA-ATOMIC-NEXT: # %bb.4: # %partword.cmpxchg.failure
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB3_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a7, a14, a9
+; XTENSA-ATOMIC-NEXT: bne a15, a7, .LBB3_1
+; XTENSA-ATOMIC-NEXT: .LBB3_5: # %partword.cmpxchg.end
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a2, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %res = cmpxchg ptr %p, i8 0, i8 1 seq_cst seq_cst
+ %res.0 = extractvalue { i8, i1 } %res, 0
+ ret i8 %res.0
+}
+
+define i16 @load16(ptr %p) nounwind {
+; XTENSA-LABEL: load16:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 5
+; XTENSA-NEXT: l32r a8, .LCPI4_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: load16:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l16ui a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = load atomic i16, ptr %p seq_cst, align 2
+ ret i16 %v
+}
+
+define void @store16(ptr %p) nounwind {
+; XTENSA-LABEL: store16:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI5_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: store16:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 0
+; XTENSA-ATOMIC-NEXT: s16i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i16 0, ptr %p seq_cst, align 2
+ ret void
+}
+
+define i16 @rmw16(ptr %p) nounwind {
+; XTENSA-LABEL: rmw16:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI6_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw16:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a13, 1
+; XTENSA-ATOMIC-NEXT: movi a8, 3
+; XTENSA-ATOMIC-NEXT: and a8, a8, a2
+; XTENSA-ATOMIC-NEXT: sub a10, a2, a8
+; XTENSA-ATOMIC-NEXT: slli a8, a8, 3
+; XTENSA-ATOMIC-NEXT: slli a9, a13, 16
+; XTENSA-ATOMIC-NEXT: addi a9, a9, -1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: movi a12, -1
+; XTENSA-ATOMIC-NEXT: sll a11, a9
+; XTENSA-ATOMIC-NEXT: xor a12, a11, a12
+; XTENSA-ATOMIC-NEXT: l32i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: sll a13, a13
+; XTENSA-ATOMIC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a14, a14
+; XTENSA-ATOMIC-NEXT: and a14, a15, a11
+; XTENSA-ATOMIC-NEXT: add a14, a14, a13
+; XTENSA-ATOMIC-NEXT: and a14, a14, a11
+; XTENSA-ATOMIC-NEXT: and a7, a15, a12
+; XTENSA-ATOMIC-NEXT: or a7, a14, a7
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a7, a10, 0
+; XTENSA-ATOMIC-NEXT: mov.n a14, a7
+; XTENSA-ATOMIC-NEXT: bne a7, a15, .LBB6_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a8, a14
+; XTENSA-ATOMIC-NEXT: and a2, a8, a9
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw add ptr %p, i16 1 seq_cst, align 2
+ ret i16 %v
+}
+
+define i16 @cmpxchg16(ptr %p) nounwind {
+; XTENSA-LABEL: cmpxchg16:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a8, 0
+; XTENSA-NEXT: s16i a8, a1, 0
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: movi a12, 1
+; XTENSA-NEXT: movi a13, 5
+; XTENSA-NEXT: l32r a8, .LCPI7_0
+; XTENSA-NEXT: or a14, a13, a13
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: l16ui a2, a1, 0
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: cmpxchg16:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: slli a8, a2, 3
+; XTENSA-ATOMIC-NEXT: movi a9, 24
+; XTENSA-ATOMIC-NEXT: and a8, a8, a9
+; XTENSA-ATOMIC-NEXT: l32r a9, .LCPI7_0
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a9, a9
+; XTENSA-ATOMIC-NEXT: movi a10, -1
+; XTENSA-ATOMIC-NEXT: xor a9, a9, a10
+; XTENSA-ATOMIC-NEXT: movi a10, -4
+; XTENSA-ATOMIC-NEXT: and a10, a2, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a11, a10, 0
+; XTENSA-ATOMIC-NEXT: and a7, a11, a9
+; XTENSA-ATOMIC-NEXT: movi a11, 1
+; XTENSA-ATOMIC-NEXT: ssl a8
+; XTENSA-ATOMIC-NEXT: sll a12, a11
+; XTENSA-ATOMIC-NEXT: movi a13, 0
+; XTENSA-ATOMIC-NEXT: .LBB7_1: # %partword.cmpxchg.loop
+; XTENSA-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a15, a7, a7
+; XTENSA-ATOMIC-NEXT: or a14, a15, a12
+; XTENSA-ATOMIC-NEXT: wsr a15, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a14, a10, 0
+; XTENSA-ATOMIC-NEXT: or a7, a11, a11
+; XTENSA-ATOMIC-NEXT: beq a14, a15, .LBB7_3
+; XTENSA-ATOMIC-NEXT: # %bb.2: # %partword.cmpxchg.loop
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB7_1 Depth=1
+; XTENSA-ATOMIC-NEXT: or a7, a13, a13
+; XTENSA-ATOMIC-NEXT: .LBB7_3: # %partword.cmpxchg.loop
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB7_1 Depth=1
+; XTENSA-ATOMIC-NEXT: bnez a7, .LBB7_5
+; XTENSA-ATOMIC-NEXT: # %bb.4: # %partword.cmpxchg.failure
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB7_1 Depth=1
+; XTENSA-ATOMIC-NEXT: and a7, a14, a9
+; XTENSA-ATOMIC-NEXT: bne a15, a7, .LBB7_1
+; XTENSA-ATOMIC-NEXT: .LBB7_5: # %partword.cmpxchg.end
+; XTENSA-ATOMIC-NEXT: ssr a8
+; XTENSA-ATOMIC-NEXT: srl a2, a14
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %res = cmpxchg ptr %p, i16 0, i16 1 seq_cst seq_cst
+ %res.0 = extractvalue { i16, i1 } %res, 0
+ ret i16 %res.0
+}
+
+define i32 @load32_unordered(ptr %p) nounwind {
+; XTENSA-LABEL: load32_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI8_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: load32_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %v = load atomic i32, ptr %p unordered, align 4
+ ret i32 %v
+}
+
+define i32 @load32_monotonic(ptr %p) nounwind {
+; XTENSA-LABEL: load32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI9_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: load32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ %v = load atomic i32, ptr %p monotonic, align 4
+ ret i32 %v
+}
+
+define i32 @load32_acquire(ptr %p) nounwind {
+; XTENSA-LABEL: load32_acquire:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 2
+; XTENSA-NEXT: l32r a8, .LCPI10_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: load32_acquire:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = load atomic i32, ptr %p acquire, align 4
+ ret i32 %v
+}
+
+define i32 @load32_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: load32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 5
+; XTENSA-NEXT: l32r a8, .LCPI11_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: load32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: l32i a2, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = load atomic i32, ptr %p seq_cst, align 4
+ ret i32 %v
+}
+
+define void @store32_unordered(ptr %p) nounwind {
+; XTENSA-LABEL: store32_unordered:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI12_0
+; XTENSA-NEXT: or a12, a11, a11
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: store32_unordered:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 0
+; XTENSA-ATOMIC-NEXT: s32i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 0, ptr %p unordered, align 4
+ ret void
+}
+
+define void @store32_monotonic(ptr %p) nounwind {
+; XTENSA-LABEL: store32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: l32r a8, .LCPI13_0
+; XTENSA-NEXT: or a12, a11, a11
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: store32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 0
+; XTENSA-ATOMIC-NEXT: s32i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 0, ptr %p monotonic, align 4
+ ret void
+}
+
+define void @store32_release(ptr %p) nounwind {
+; XTENSA-LABEL: store32_release:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: movi a12, 3
+; XTENSA-NEXT: l32r a8, .LCPI14_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: store32_release:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 0
+; XTENSA-ATOMIC-NEXT: s32i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 0, ptr %p release, align 4
+ ret void
+}
+
+define void @store32_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: store32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 0
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI15_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: store32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 0
+; XTENSA-ATOMIC-NEXT: s32i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ store atomic i32 0, ptr %p seq_cst, align 4
+ ret void
+}
+
+define i32 @rmw32_add_monotonic(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_add_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 0
+; XTENSA-NEXT: l32r a8, .LCPI16_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_add_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a11, a10, a10
+; XTENSA-ATOMIC-NEXT: add a9, a11, a8
+; XTENSA-ATOMIC-NEXT: wsr a11, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a10, a9
+; XTENSA-ATOMIC-NEXT: bne a11, a9, .LBB16_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a9
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw add ptr %p, i32 1 monotonic, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_add_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_add_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI17_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_add_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a11, a10, a10
+; XTENSA-ATOMIC-NEXT: add a9, a11, a8
+; XTENSA-ATOMIC-NEXT: wsr a11, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a10, a9
+; XTENSA-ATOMIC-NEXT: bne a11, a9, .LBB17_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a9
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw add ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_sub_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_sub_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI18_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_sub_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a11, a10, a10
+; XTENSA-ATOMIC-NEXT: sub a9, a11, a8
+; XTENSA-ATOMIC-NEXT: wsr a11, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a10, a9
+; XTENSA-ATOMIC-NEXT: bne a11, a9, .LBB18_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a9
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw sub ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_and_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_and_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI19_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_and_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a11, a10, a10
+; XTENSA-ATOMIC-NEXT: and a9, a11, a8
+; XTENSA-ATOMIC-NEXT: wsr a11, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a10, a9
+; XTENSA-ATOMIC-NEXT: bne a11, a9, .LBB19_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a9
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw and ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_nand_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_nand_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI20_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_nand_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a11, a2, 0
+; XTENSA-ATOMIC-NEXT: movi a9, -1
+; XTENSA-ATOMIC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a12, a11, a11
+; XTENSA-ATOMIC-NEXT: and a10, a12, a8
+; XTENSA-ATOMIC-NEXT: xor a10, a10, a9
+; XTENSA-ATOMIC-NEXT: wsr a12, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a11, a10
+; XTENSA-ATOMIC-NEXT: bne a12, a10, .LBB20_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw nand ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_or_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_or_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI21_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_or_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a11, a10, a10
+; XTENSA-ATOMIC-NEXT: or a9, a11, a8
+; XTENSA-ATOMIC-NEXT: wsr a11, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a10, a9
+; XTENSA-ATOMIC-NEXT: bne a11, a9, .LBB21_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a9
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw or ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_xor_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_xor_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI22_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_xor_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a11, a10, a10
+; XTENSA-ATOMIC-NEXT: xor a9, a11, a8
+; XTENSA-ATOMIC-NEXT: wsr a11, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a9, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a10, a9
+; XTENSA-ATOMIC-NEXT: bne a11, a9, .LBB22_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a9
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw xor ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_max_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_max_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: or a6, a2, a2
+; XTENSA-NEXT: l32i a2, a6, 0
+; XTENSA-NEXT: movi a5, 1
+; XTENSA-NEXT: movi a7, 5
+; XTENSA-NEXT: l32r a4, .LCPI23_0
+; XTENSA-NEXT: j .LBB23_2
+; XTENSA-NEXT: .LBB23_1: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB23_2 Depth=1
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a6, a6
+; XTENSA-NEXT: or a13, a7, a7
+; XTENSA-NEXT: or a14, a7, a7
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: l32i a2, a1, 0
+; XTENSA-NEXT: bnez a10, .LBB23_4
+; XTENSA-NEXT: .LBB23_2: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a2, a1, 0
+; XTENSA-NEXT: or a12, a5, a5
+; XTENSA-NEXT: bge a5, a2, .LBB23_1
+; XTENSA-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB23_2 Depth=1
+; XTENSA-NEXT: or a12, a2, a2
+; XTENSA-NEXT: j .LBB23_1
+; XTENSA-NEXT: .LBB23_4: # %atomicrmw.end
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_max_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a11, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a9, a11, a11
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: blt a9, a8, .LBB23_3
+; XTENSA-ATOMIC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a8
+; XTENSA-ATOMIC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a10
+; XTENSA-ATOMIC-NEXT: wsr a9, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a11, a10
+; XTENSA-ATOMIC-NEXT: bne a9, a10, .LBB23_1
+; XTENSA-ATOMIC-NEXT: # %bb.4:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw max ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_min_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_min_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: l32i a12, a2, 0
+; XTENSA-NEXT: movi a6, 1
+; XTENSA-NEXT: movi a5, 2
+; XTENSA-NEXT: movi a7, 5
+; XTENSA-NEXT: l32r a4, .LCPI24_0
+; XTENSA-NEXT: j .LBB24_2
+; XTENSA-NEXT: .LBB24_1: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB24_2 Depth=1
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: or a13, a7, a7
+; XTENSA-NEXT: or a14, a7, a7
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: l32i a12, a1, 0
+; XTENSA-NEXT: bnez a10, .LBB24_4
+; XTENSA-NEXT: .LBB24_2: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a12, a1, 0
+; XTENSA-NEXT: blt a12, a5, .LBB24_1
+; XTENSA-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB24_2 Depth=1
+; XTENSA-NEXT: or a12, a6, a6
+; XTENSA-NEXT: j .LBB24_1
+; XTENSA-NEXT: .LBB24_4: # %atomicrmw.end
+; XTENSA-NEXT: or a2, a12, a12
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_min_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a11, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a9, a11, a11
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: bge a9, a8, .LBB24_3
+; XTENSA-ATOMIC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a8
+; XTENSA-ATOMIC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a10
+; XTENSA-ATOMIC-NEXT: wsr a9, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a11, a10
+; XTENSA-ATOMIC-NEXT: bne a9, a10, .LBB24_1
+; XTENSA-ATOMIC-NEXT: # %bb.4:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw min ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_umax_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_umax_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: or a6, a2, a2
+; XTENSA-NEXT: l32i a2, a6, 0
+; XTENSA-NEXT: movi a5, 1
+; XTENSA-NEXT: movi a7, 5
+; XTENSA-NEXT: l32r a4, .LCPI25_0
+; XTENSA-NEXT: j .LBB25_2
+; XTENSA-NEXT: .LBB25_1: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB25_2 Depth=1
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a6, a6
+; XTENSA-NEXT: or a13, a7, a7
+; XTENSA-NEXT: or a14, a7, a7
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: l32i a2, a1, 0
+; XTENSA-NEXT: bnez a10, .LBB25_4
+; XTENSA-NEXT: .LBB25_2: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a2, a1, 0
+; XTENSA-NEXT: or a12, a5, a5
+; XTENSA-NEXT: bgeu a5, a2, .LBB25_1
+; XTENSA-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB25_2 Depth=1
+; XTENSA-NEXT: or a12, a2, a2
+; XTENSA-NEXT: j .LBB25_1
+; XTENSA-NEXT: .LBB25_4: # %atomicrmw.end
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_umax_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a11, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a9, a11, a11
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: bltu a9, a8, .LBB25_3
+; XTENSA-ATOMIC-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a8
+; XTENSA-ATOMIC-NEXT: .LBB25_3: # in Loop: Header=BB25_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a10
+; XTENSA-ATOMIC-NEXT: wsr a9, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a11, a10
+; XTENSA-ATOMIC-NEXT: bne a9, a10, .LBB25_1
+; XTENSA-ATOMIC-NEXT: # %bb.4:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw umax ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_umin_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_umin_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: l32i a12, a2, 0
+; XTENSA-NEXT: movi a6, 1
+; XTENSA-NEXT: movi a5, 2
+; XTENSA-NEXT: movi a7, 5
+; XTENSA-NEXT: l32r a4, .LCPI26_0
+; XTENSA-NEXT: j .LBB26_2
+; XTENSA-NEXT: .LBB26_1: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB26_2 Depth=1
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: or a13, a7, a7
+; XTENSA-NEXT: or a14, a7, a7
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: l32i a12, a1, 0
+; XTENSA-NEXT: bnez a10, .LBB26_4
+; XTENSA-NEXT: .LBB26_2: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a12, a1, 0
+; XTENSA-NEXT: bltu a12, a5, .LBB26_1
+; XTENSA-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-NEXT: # in Loop: Header=BB26_2 Depth=1
+; XTENSA-NEXT: or a12, a6, a6
+; XTENSA-NEXT: j .LBB26_1
+; XTENSA-NEXT: .LBB26_4: # %atomicrmw.end
+; XTENSA-NEXT: or a2, a12, a12
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_umin_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: l32i a11, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a9, a11, a11
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: bgeu a9, a8, .LBB26_3
+; XTENSA-ATOMIC-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a8
+; XTENSA-ATOMIC-NEXT: .LBB26_3: # in Loop: Header=BB26_1 Depth=1
+; XTENSA-ATOMIC-NEXT: mov.n a10, a10
+; XTENSA-ATOMIC-NEXT: wsr a9, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: mov.n a11, a10
+; XTENSA-ATOMIC-NEXT: bne a9, a10, .LBB26_1
+; XTENSA-ATOMIC-NEXT: # %bb.4:
+; XTENSA-ATOMIC-NEXT: mov.n a2, a10
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw umin ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define i32 @rmw32_xchg_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_xchg_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a11, 1
+; XTENSA-NEXT: movi a12, 5
+; XTENSA-NEXT: l32r a8, .LCPI27_0
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_xchg_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a9, 1
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a10, a10
+; XTENSA-ATOMIC-NEXT: wsr a8, scompare1
+; XTENSA-ATOMIC-NEXT: or a10, a9, a9
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: bne a10, a8, .LBB27_1
+; XTENSA-ATOMIC-NEXT: # %bb.2:
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw xchg ptr %p, i32 1 seq_cst, align 4
+ ret i32 %v
+}
+
+define float @rmw32_fadd_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_fadd_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: l32i a10, a2, 0
+; XTENSA-NEXT: l32r a7, .LCPI28_0
+; XTENSA-NEXT: l32r a5, .LCPI28_1
+; XTENSA-NEXT: movi a6, 5
+; XTENSA-NEXT: l32r a4, .LCPI28_2
+; XTENSA-NEXT: .LBB28_1: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a10, a1, 0
+; XTENSA-NEXT: or a11, a7, a7
+; XTENSA-NEXT: callx8 a5
+; XTENSA-NEXT: or a12, a10, a10
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: or a13, a6, a6
+; XTENSA-NEXT: or a14, a6, a6
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: or a8, a10, a10
+; XTENSA-NEXT: l32i a10, a1, 0
+; XTENSA-NEXT: beqz a8, .LBB28_1
+; XTENSA-NEXT: # %bb.2: # %atomicrmw.end
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_fadd_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a6, a2, 0
+; XTENSA-ATOMIC-NEXT: l32r a7, .LCPI28_0
+; XTENSA-ATOMIC-NEXT: l32r a5, .LCPI28_1
+; XTENSA-ATOMIC-NEXT: movi a4, 0
+; XTENSA-ATOMIC-NEXT: movi a3, 1
+; XTENSA-ATOMIC-NEXT: j .LBB28_2
+; XTENSA-ATOMIC-NEXT: .LBB28_1: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB28_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a6, a10, a10
+; XTENSA-ATOMIC-NEXT: beqi a8, 1, .LBB28_4
+; XTENSA-ATOMIC-NEXT: .LBB28_2: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a6, a6
+; XTENSA-ATOMIC-NEXT: or a11, a7, a7
+; XTENSA-ATOMIC-NEXT: callx8 a5
+; XTENSA-ATOMIC-NEXT: wsr a6, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: or a8, a3, a3
+; XTENSA-ATOMIC-NEXT: beq a10, a6, .LBB28_1
+; XTENSA-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB28_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a4, a4
+; XTENSA-ATOMIC-NEXT: j .LBB28_1
+; XTENSA-ATOMIC-NEXT: .LBB28_4: # %atomicrmw.end
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a10, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define float @rmw32_fsub_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_fsub_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: l32i a10, a2, 0
+; XTENSA-NEXT: l32r a7, .LCPI29_0
+; XTENSA-NEXT: l32r a5, .LCPI29_1
+; XTENSA-NEXT: movi a6, 5
+; XTENSA-NEXT: l32r a4, .LCPI29_2
+; XTENSA-NEXT: .LBB29_1: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a10, a1, 0
+; XTENSA-NEXT: or a11, a7, a7
+; XTENSA-NEXT: callx8 a5
+; XTENSA-NEXT: or a12, a10, a10
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: or a13, a6, a6
+; XTENSA-NEXT: or a14, a6, a6
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: or a8, a10, a10
+; XTENSA-NEXT: l32i a10, a1, 0
+; XTENSA-NEXT: beqz a8, .LBB29_1
+; XTENSA-NEXT: # %bb.2: # %atomicrmw.end
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_fsub_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a6, a2, 0
+; XTENSA-ATOMIC-NEXT: l32r a7, .LCPI29_0
+; XTENSA-ATOMIC-NEXT: l32r a5, .LCPI29_1
+; XTENSA-ATOMIC-NEXT: movi a4, 0
+; XTENSA-ATOMIC-NEXT: movi a3, 1
+; XTENSA-ATOMIC-NEXT: j .LBB29_2
+; XTENSA-ATOMIC-NEXT: .LBB29_1: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB29_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a6, a10, a10
+; XTENSA-ATOMIC-NEXT: beqi a8, 1, .LBB29_4
+; XTENSA-ATOMIC-NEXT: .LBB29_2: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a6, a6
+; XTENSA-ATOMIC-NEXT: or a11, a7, a7
+; XTENSA-ATOMIC-NEXT: callx8 a5
+; XTENSA-ATOMIC-NEXT: wsr a6, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: or a8, a3, a3
+; XTENSA-ATOMIC-NEXT: beq a10, a6, .LBB29_1
+; XTENSA-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB29_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a4, a4
+; XTENSA-ATOMIC-NEXT: j .LBB29_1
+; XTENSA-ATOMIC-NEXT: .LBB29_4: # %atomicrmw.end
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a10, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define float @rmw32_fmin_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_fmin_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: l32i a10, a2, 0
+; XTENSA-NEXT: l32r a7, .LCPI30_0
+; XTENSA-NEXT: l32r a5, .LCPI30_1
+; XTENSA-NEXT: movi a6, 5
+; XTENSA-NEXT: l32r a4, .LCPI30_2
+; XTENSA-NEXT: .LBB30_1: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a10, a1, 0
+; XTENSA-NEXT: or a11, a7, a7
+; XTENSA-NEXT: callx8 a5
+; XTENSA-NEXT: or a12, a10, a10
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: or a13, a6, a6
+; XTENSA-NEXT: or a14, a6, a6
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: or a8, a10, a10
+; XTENSA-NEXT: l32i a10, a1, 0
+; XTENSA-NEXT: beqz a8, .LBB30_1
+; XTENSA-NEXT: # %bb.2: # %atomicrmw.end
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_fmin_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a6, a2, 0
+; XTENSA-ATOMIC-NEXT: l32r a7, .LCPI30_0
+; XTENSA-ATOMIC-NEXT: l32r a5, .LCPI30_1
+; XTENSA-ATOMIC-NEXT: movi a4, 0
+; XTENSA-ATOMIC-NEXT: movi a3, 1
+; XTENSA-ATOMIC-NEXT: j .LBB30_2
+; XTENSA-ATOMIC-NEXT: .LBB30_1: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB30_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a6, a10, a10
+; XTENSA-ATOMIC-NEXT: beqi a8, 1, .LBB30_4
+; XTENSA-ATOMIC-NEXT: .LBB30_2: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a6, a6
+; XTENSA-ATOMIC-NEXT: or a11, a7, a7
+; XTENSA-ATOMIC-NEXT: callx8 a5
+; XTENSA-ATOMIC-NEXT: wsr a6, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: or a8, a3, a3
+; XTENSA-ATOMIC-NEXT: beq a10, a6, .LBB30_1
+; XTENSA-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB30_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a4, a4
+; XTENSA-ATOMIC-NEXT: j .LBB30_1
+; XTENSA-ATOMIC-NEXT: .LBB30_4: # %atomicrmw.end
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a10, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define float @rmw32_fmax_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: rmw32_fmax_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: l32i a10, a2, 0
+; XTENSA-NEXT: l32r a7, .LCPI31_0
+; XTENSA-NEXT: l32r a5, .LCPI31_1
+; XTENSA-NEXT: movi a6, 5
+; XTENSA-NEXT: l32r a4, .LCPI31_2
+; XTENSA-NEXT: .LBB31_1: # %atomicrmw.start
+; XTENSA-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-NEXT: s32i a10, a1, 0
+; XTENSA-NEXT: or a11, a7, a7
+; XTENSA-NEXT: callx8 a5
+; XTENSA-NEXT: or a12, a10, a10
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: or a13, a6, a6
+; XTENSA-NEXT: or a14, a6, a6
+; XTENSA-NEXT: callx8 a4
+; XTENSA-NEXT: or a8, a10, a10
+; XTENSA-NEXT: l32i a10, a1, 0
+; XTENSA-NEXT: beqz a8, .LBB31_1
+; XTENSA-NEXT: # %bb.2: # %atomicrmw.end
+; XTENSA-NEXT: or a2, a10, a10
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: rmw32_fmax_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: l32i a6, a2, 0
+; XTENSA-ATOMIC-NEXT: l32r a7, .LCPI31_0
+; XTENSA-ATOMIC-NEXT: l32r a5, .LCPI31_1
+; XTENSA-ATOMIC-NEXT: movi a4, 0
+; XTENSA-ATOMIC-NEXT: movi a3, 1
+; XTENSA-ATOMIC-NEXT: j .LBB31_2
+; XTENSA-ATOMIC-NEXT: .LBB31_1: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB31_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a6, a10, a10
+; XTENSA-ATOMIC-NEXT: beqi a8, 1, .LBB31_4
+; XTENSA-ATOMIC-NEXT: .LBB31_2: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1
+; XTENSA-ATOMIC-NEXT: or a10, a6, a6
+; XTENSA-ATOMIC-NEXT: or a11, a7, a7
+; XTENSA-ATOMIC-NEXT: callx8 a5
+; XTENSA-ATOMIC-NEXT: wsr a6, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a10, a2, 0
+; XTENSA-ATOMIC-NEXT: or a8, a3, a3
+; XTENSA-ATOMIC-NEXT: beq a10, a6, .LBB31_1
+; XTENSA-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start
+; XTENSA-ATOMIC-NEXT: # in Loop: Header=BB31_2 Depth=1
+; XTENSA-ATOMIC-NEXT: or a8, a4, a4
+; XTENSA-ATOMIC-NEXT: j .LBB31_1
+; XTENSA-ATOMIC-NEXT: .LBB31_4: # %atomicrmw.end
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a10, a10
+; XTENSA-ATOMIC-NEXT: retw
+ %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4
+ ret float %v
+}
+
+define i32 @cmpxchg32_monotonic(ptr %p) nounwind {
+; XTENSA-LABEL: cmpxchg32_monotonic:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a13, 0
+; XTENSA-NEXT: s32i a13, a1, 0
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: movi a12, 1
+; XTENSA-NEXT: l32r a8, .LCPI32_0
+; XTENSA-NEXT: or a14, a13, a13
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: l32i a2, a1, 0
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: cmpxchg32_monotonic:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: movi a9, 0
+; XTENSA-ATOMIC-NEXT: wsr a9, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %res = cmpxchg ptr %p, i32 0, i32 1 monotonic monotonic
+ %res.0 = extractvalue { i32, i1 } %res, 0
+ ret i32 %res.0
+}
+
+define i32 @cmpxchg32_seq_cst(ptr %p) nounwind {
+; XTENSA-LABEL: cmpxchg32_seq_cst:
+; XTENSA: # %bb.0:
+; XTENSA-NEXT: entry a1, 48
+; XTENSA-NEXT: or a10, a2, a2
+; XTENSA-NEXT: movi a8, 0
+; XTENSA-NEXT: s32i a8, a1, 0
+; XTENSA-NEXT: addi a11, a1, 0
+; XTENSA-NEXT: movi a12, 1
+; XTENSA-NEXT: movi a13, 5
+; XTENSA-NEXT: l32r a8, .LCPI33_0
+; XTENSA-NEXT: or a14, a13, a13
+; XTENSA-NEXT: callx8 a8
+; XTENSA-NEXT: l32i a2, a1, 0
+; XTENSA-NEXT: retw
+;
+; XTENSA-ATOMIC-LABEL: cmpxchg32_seq_cst:
+; XTENSA-ATOMIC: # %bb.0:
+; XTENSA-ATOMIC-NEXT: entry a1, 32
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: movi a8, 1
+; XTENSA-ATOMIC-NEXT: movi a9, 0
+; XTENSA-ATOMIC-NEXT: wsr a9, scompare1
+; XTENSA-ATOMIC-NEXT: s32c1i a8, a2, 0
+; XTENSA-ATOMIC-NEXT: memw
+; XTENSA-ATOMIC-NEXT: or a2, a8, a8
+; XTENSA-ATOMIC-NEXT: retw
+ %res = cmpxchg ptr %p, i32 0, i32 1 seq_cst seq_cst
+ %res.0 = extractvalue { i32, i1 } %res, 0
+ ret i32 %res.0
+}
More information about the llvm-commits
mailing list