[Mlir-commits] [mlir] 4af249f - Add usub_cond and usub_sat operations to atomicrmw (#105568)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Sep 6 08:19:26 PDT 2024
Author: anjenner
Date: 2024-09-06T16:19:20+01:00
New Revision: 4af249fe6e81abd137c95bc25f5060ae305134ca
URL: https://github.com/llvm/llvm-project/commit/4af249fe6e81abd137c95bc25f5060ae305134ca
DIFF: https://github.com/llvm/llvm-project/commit/4af249fe6e81abd137c95bc25f5060ae305134ca.diff
LOG: Add usub_cond and usub_sat operations to atomicrmw (#105568)
These both perform conditional subtraction, returning the minuend and
zero respectively, if the difference is negative.
Added:
llvm/test/CodeGen/AArch64/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/Hexagon/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/LoongArch/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/VE/Scalar/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/WebAssembly/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/X86/atomicrmw-cond-sub-clamp.ll
Modified:
llvm/bindings/ocaml/llvm/llvm.ml
llvm/bindings/ocaml/llvm/llvm.mli
llvm/docs/GlobalISel/GenericOpcode.rst
llvm/docs/LangRef.rst
llvm/docs/ReleaseNotes.rst
llvm/include/llvm/AsmParser/LLToken.h
llvm/include/llvm/Bitcode/LLVMBitCodes.h
llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/include/llvm/IR/Instructions.h
llvm/include/llvm/Support/TargetOpcodes.def
llvm/include/llvm/Target/GenericOpcodes.td
llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/AsmParser/LLLexer.cpp
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/lib/CodeGen/AtomicExpandPass.cpp
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/lib/IR/Instructions.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Transforms/Utils/LowerAtomic.cpp
llvm/test/Assembler/atomic.ll
llvm/test/Bitcode/compatibility.ll
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
llvm/test/TableGen/GlobalISelEmitter.td
mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
mlir/test/Target/LLVMIR/Import/instructions.ll
mlir/test/Target/LLVMIR/llvmir.mlir
Removed:
################################################################################
diff --git a/llvm/bindings/ocaml/llvm/llvm.ml b/llvm/bindings/ocaml/llvm/llvm.ml
index 8e059ae71613dd..74ba31389b378e 100644
--- a/llvm/bindings/ocaml/llvm/llvm.ml
+++ b/llvm/bindings/ocaml/llvm/llvm.ml
@@ -300,6 +300,8 @@ module AtomicRMWBinOp = struct
| FMin
| UInc_Wrap
| UDec_Wrap
+ | USub_Cond
+ | USub_Sat
end
module ValueKind = struct
diff --git a/llvm/bindings/ocaml/llvm/llvm.mli b/llvm/bindings/ocaml/llvm/llvm.mli
index b8fdac7e38c6a7..076e651ba158fc 100644
--- a/llvm/bindings/ocaml/llvm/llvm.mli
+++ b/llvm/bindings/ocaml/llvm/llvm.mli
@@ -335,6 +335,8 @@ module AtomicRMWBinOp : sig
| FMin
| UInc_Wrap
| UDec_Wrap
+ | USub_Cond
+ | USub_Sat
end
(** The kind of an [llvalue], the result of [classify_value v].
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index d32aeff5a69bb1..bba56d9a5c0ec2 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -863,7 +863,9 @@ operands.
G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD,
G_ATOMICRMW_FSUB, G_ATOMICRMW_FMAX,
- G_ATOMICRMW_FMIN
+ G_ATOMICRMW_FMIN, G_ATOMICRMW_UINC_WRAP,
+ G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_USUB_COND,
+ G_ATOMICRMW_USUB_SAT
Generic atomicrmw. Expects a MachineMemOperand in addition to explicit
operands.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index c75b75edaf2ca0..144b4497ca63ce 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -11241,6 +11241,8 @@ operation. The operation must be one of the following keywords:
- fmin
- uinc_wrap
- udec_wrap
+- usub_cond
+- usub_sat
For most of these operations, the type of '<value>' must be an integer
type whose bit width is a power of two greater than or equal to eight
@@ -11291,6 +11293,8 @@ operation argument:
- fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic)
- uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` (increment value with wraparound to zero when incremented above input value)
- udec_wrap: ``*ptr = ((*ptr == 0) || (*ptr u> val)) ? val : (*ptr - 1)`` (decrement with wraparound to input value when decremented below zero).
+- usub_cond: ``*ptr = (*ptr u>= val) ? *ptr - val : *ptr`` (subtract only if no unsigned overflow).
+- usub_sat: ``*ptr = (*ptr u>= val) ? *ptr - val : 0`` (subtract with unsigned clamping to zero).
Example:
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 9982b5f427e4b2..52456896f2fc6c 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -54,6 +54,8 @@ Changes to the LLVM IR
the standard vector type ``<1 x i64>`` in bitcode upgrade.
* Renamed ``llvm.experimental.stepvector`` intrinsic to ``llvm.stepvector``.
+* Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``.
+
Changes to LLVM infrastructure
------------------------------
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index db6780b70ca5aa..19029842a572a4 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -268,6 +268,8 @@ enum Kind {
kw_fmin,
kw_uinc_wrap,
kw_udec_wrap,
+ kw_usub_cond,
+ kw_usub_sat,
// Instruction Opcodes (Opcode in UIntVal).
kw_fneg,
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 4beac37a583445..49a48f1c1510c3 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -485,7 +485,9 @@ enum RMWOperations {
RMW_FMAX = 13,
RMW_FMIN = 14,
RMW_UINC_WRAP = 15,
- RMW_UDEC_WRAP = 16
+ RMW_UDEC_WRAP = 16,
+ RMW_USUB_COND = 17,
+ RMW_USUB_SAT = 18
};
/// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 56a77b8596a18b..c0b9d0eac23c3f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1636,6 +1636,41 @@ class MachineIRBuilder {
const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
MachineMemOperand &MMO);
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_USUB_COND Addr, Val, MMO`.
+ ///
+ /// Atomically replace the value at \p Addr with the original value minus \p
+ /// Val if the original value is greater than or equal to \p Val, or leaves it
+ /// unchanged otherwise. Puts the original value from \p Addr in \p OldValRes.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p OldValRes must be a generic virtual register.
+ /// \pre \p Addr must be a generic virtual register with pointer type.
+ /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+ /// same type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildAtomicRMWUSubCond(const DstOp &OldValRes,
+ const SrcOp &Addr,
+ const SrcOp &Val,
+ MachineMemOperand &MMO);
+
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_USUB_SAT Addr, Val, MMO`.
+ ///
+ /// Atomically replace the value at \p Addr with the original value minus \p
+ /// Val, with clamping to zero if the unsigned subtraction would overflow.
+ /// Puts the original value from \p Addr in \p OldValRes.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p OldValRes must be a generic virtual register.
+ /// \pre \p Addr must be a generic virtual register with pointer type.
+ /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+ /// same type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildAtomicRMWUSubSat(const DstOp &OldValRes,
+ const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO);
+
/// Build and insert `G_FENCE Ordering, Scope`.
MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope);
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 187d624f0a73b9..65514882343dbe 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1345,6 +1345,8 @@ enum NodeType {
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_UINC_WRAP,
ATOMIC_LOAD_UDEC_WRAP,
+ ATOMIC_LOAD_USUB_COND,
+ ATOMIC_LOAD_USUB_SAT,
/// Masked load and store - consecutive vector load and store operations
/// with additional mask operand that prevents memory accesses to the
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 88549d9c9a2858..6067b3b29ea181 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1484,6 +1484,8 @@ class MemSDNode : public SDNode {
case ISD::ATOMIC_LOAD_FMIN:
case ISD::ATOMIC_LOAD_UINC_WRAP:
case ISD::ATOMIC_LOAD_UDEC_WRAP:
+ case ISD::ATOMIC_LOAD_USUB_COND:
+ case ISD::ATOMIC_LOAD_USUB_SAT:
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE:
case ISD::MLOAD:
@@ -1550,27 +1552,29 @@ class AtomicSDNode : public MemSDNode {
// Methods to support isa and dyn_cast
static bool classof(const SDNode *N) {
- return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
+ return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
- N->getOpcode() == ISD::ATOMIC_SWAP ||
- N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
- N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
- N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
- N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
- N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
- N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
- N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
- N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
- N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
- N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
- N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FMAX ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FMIN ||
+ N->getOpcode() == ISD::ATOMIC_SWAP ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FMAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FMIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP ||
N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP ||
- N->getOpcode() == ISD::ATOMIC_LOAD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_USUB_COND ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_USUB_SAT ||
+ N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE;
}
};
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 6257d03458cab2..ab3321ee755717 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -751,8 +751,16 @@ class AtomicRMWInst : public Instruction {
/// *p = ((old == 0) || (old u> v)) ? v : (old - 1)
UDecWrap,
+ /// Subtract only if no unsigned overflow.
+ /// *p = (old u>= v) ? old - v : old
+ USubCond,
+
+ /// *p = usub.sat(old, v)
+ /// \p usub.sat matches the behavior of \p llvm.usub.sat.*.
+ USubSat,
+
FIRST_BINOP = Xchg,
- LAST_BINOP = UDecWrap,
+ LAST_BINOP = USubSat,
BAD_BINOP
};
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index e1883de0c93b4c..a3692a5fb6ebd7 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -422,12 +422,14 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UINC_WRAP)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UDEC_WRAP)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_USUB_COND)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_USUB_SAT)
// Marker for start of Generic AtomicRMW opcodes
HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_START, G_ATOMICRMW_XCHG)
// Marker for end of Generic AtomicRMW opcodes
-HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_UDEC_WRAP)
+HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_USUB_SAT)
// Generic atomic fence
HANDLE_TARGET_OPCODE(G_FENCE)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 36a0a087ba457c..f4934af4563d83 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1311,6 +1311,8 @@ def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP;
def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP;
def G_ATOMICRMW_UINC_WRAP : G_ATOMICRMW_OP;
def G_ATOMICRMW_UDEC_WRAP : G_ATOMICRMW_OP;
+def G_ATOMICRMW_USUB_COND : G_ATOMICRMW_OP;
+def G_ATOMICRMW_USUB_SAT : G_ATOMICRMW_OP;
def G_FENCE : GenericInstruction {
let OutOperandList = (outs);
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 72d155b483cf2b..93444876b12963 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -260,6 +260,8 @@ def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax>;
def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin>;
def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap>;
def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap>;
+def : GINodeEquiv<G_ATOMICRMW_USUB_COND, atomic_load_usub_cond>;
+def : GINodeEquiv<G_ATOMICRMW_USUB_SAT, atomic_load_usub_sat>;
def : GINodeEquiv<G_FENCE, atomic_fence>;
def : GINodeEquiv<G_PREFETCH, prefetch>;
def : GINodeEquiv<G_TRAP, trap>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index dd79002dcbdb48..759fd7810b2307 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -758,6 +758,10 @@ def atomic_load_uinc_wrap : SDNode<"ISD::ATOMIC_LOAD_UINC_WRAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_udec_wrap : SDNode<"ISD::ATOMIC_LOAD_UDEC_WRAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_usub_cond : SDNode<"ISD::ATOMIC_LOAD_USUB_COND", SDTAtomic2,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_usub_sat : SDNode<"ISD::ATOMIC_LOAD_USUB_SAT", SDTAtomic2,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 7c97f7afbe0933..a3e47da77fe776 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -704,6 +704,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
KEYWORD(uinc_wrap);
KEYWORD(udec_wrap);
+ KEYWORD(usub_cond);
+ KEYWORD(usub_sat);
KEYWORD(splat);
KEYWORD(vscale);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 93dc2bd241581b..d8380fa27a2a25 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8357,6 +8357,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
case lltok::kw_udec_wrap:
Operation = AtomicRMWInst::UDecWrap;
break;
+ case lltok::kw_usub_cond:
+ Operation = AtomicRMWInst::USubCond;
+ break;
+ case lltok::kw_usub_sat:
+ Operation = AtomicRMWInst::USubSat;
+ break;
case lltok::kw_fadd:
Operation = AtomicRMWInst::FAdd;
IsFP = true;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1cd9ec6b8fca20..f887c9be73e214 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1349,6 +1349,10 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
return AtomicRMWInst::UIncWrap;
case bitc::RMW_UDEC_WRAP:
return AtomicRMWInst::UDecWrap;
+ case bitc::RMW_USUB_COND:
+ return AtomicRMWInst::USubCond;
+ case bitc::RMW_USUB_SAT:
+ return AtomicRMWInst::USubSat;
}
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 26fd02b3e1a043..bf2eea5bc1582e 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -668,6 +668,10 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
return bitc::RMW_UINC_WRAP;
case AtomicRMWInst::UDecWrap:
return bitc::RMW_UDEC_WRAP;
+ case AtomicRMWInst::USubCond:
+ return bitc::RMW_USUB_COND;
+ case AtomicRMWInst::USubSat:
+ return bitc::RMW_USUB_SAT;
}
}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index f2b58d542b373c..303058416166d3 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -901,7 +901,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
case AtomicRMWInst::FMin:
case AtomicRMWInst::FMax:
case AtomicRMWInst::UIncWrap:
- case AtomicRMWInst::UDecWrap: {
+ case AtomicRMWInst::UDecWrap:
+ case AtomicRMWInst::USubCond:
+ case AtomicRMWInst::USubSat: {
// Finally, other ops will operate on the full value, so truncate down to
// the original size, and expand out again after doing the
// operation. Bitcasts will be inserted for FP values.
@@ -1816,7 +1818,9 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::FSub:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
- // No atomic libcalls are available for max/min/umax/umin.
+ case AtomicRMWInst::USubCond:
+ case AtomicRMWInst::USubSat:
+ // No atomic libcalls are available for these.
return {};
}
llvm_unreachable("Unexpected AtomicRMW operation.");
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b290d7fb4ce4a1..6c98800b29c26b 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3308,6 +3308,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
case AtomicRMWInst::UDecWrap:
Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP;
break;
+ case AtomicRMWInst::USubCond:
+ Opcode = TargetOpcode::G_ATOMICRMW_USUB_COND;
+ break;
+ case AtomicRMWInst::USubSat:
+ Opcode = TargetOpcode::G_ATOMICRMW_USUB_SAT;
+ break;
}
MIRBuilder.buildAtomicRMW(
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 870dbce3baa86e..29505f444b7650 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8755,24 +8755,18 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
MachineMemOperand *MMO) {
- assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
- Opcode == ISD::ATOMIC_LOAD_SUB ||
- Opcode == ISD::ATOMIC_LOAD_AND ||
- Opcode == ISD::ATOMIC_LOAD_CLR ||
- Opcode == ISD::ATOMIC_LOAD_OR ||
- Opcode == ISD::ATOMIC_LOAD_XOR ||
- Opcode == ISD::ATOMIC_LOAD_NAND ||
- Opcode == ISD::ATOMIC_LOAD_MIN ||
- Opcode == ISD::ATOMIC_LOAD_MAX ||
- Opcode == ISD::ATOMIC_LOAD_UMIN ||
- Opcode == ISD::ATOMIC_LOAD_UMAX ||
- Opcode == ISD::ATOMIC_LOAD_FADD ||
- Opcode == ISD::ATOMIC_LOAD_FSUB ||
- Opcode == ISD::ATOMIC_LOAD_FMAX ||
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND || Opcode == ISD::ATOMIC_LOAD_CLR ||
+ Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND || Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX || Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD ||
+ Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX ||
Opcode == ISD::ATOMIC_LOAD_FMIN ||
Opcode == ISD::ATOMIC_LOAD_UINC_WRAP ||
Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP ||
- Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_LOAD_USUB_COND ||
+ Opcode == ISD::ATOMIC_LOAD_USUB_SAT || Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 382a555aa656f2..1dbcf8fd765101 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5112,6 +5112,12 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::UDecWrap:
NT = ISD::ATOMIC_LOAD_UDEC_WRAP;
break;
+ case AtomicRMWInst::USubCond:
+ NT = ISD::ATOMIC_LOAD_USUB_COND;
+ break;
+ case AtomicRMWInst::USubSat:
+ NT = ISD::ATOMIC_LOAD_USUB_SAT;
+ break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index a253d1a0e20170..1b663eae1fcfc8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -103,6 +103,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "AtomicLoadUIncWrap";
case ISD::ATOMIC_LOAD_UDEC_WRAP:
return "AtomicLoadUDecWrap";
+ case ISD::ATOMIC_LOAD_USUB_COND:
+ return "AtomicLoadUSubCond";
+ case ISD::ATOMIC_LOAD_USUB_SAT:
+ return "AtomicLoadUSubSat";
case ISD::ATOMIC_LOAD: return "AtomicLoad";
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 71d51affba6427..515b74cbb75883 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7936,6 +7936,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
case AtomicRMWInst::FMin:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
+ case AtomicRMWInst::USubCond:
+ case AtomicRMWInst::USubSat:
llvm_unreachable("Unsupported atomic update operation");
}
llvm_unreachable("Unsupported atomic update operation");
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 93fa635e9b4e17..19da1f60d424d2 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1431,6 +1431,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) {
return "uinc_wrap";
case AtomicRMWInst::UDecWrap:
return "udec_wrap";
+ case AtomicRMWInst::USubCond:
+ return "usub_cond";
+ case AtomicRMWInst::USubSat:
+ return "usub_sat";
case AtomicRMWInst::BAD_BINOP:
return "<invalid operation>";
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 0e17ce7ea02bb4..2f7714fe636382 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5713,7 +5713,9 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// operations, use CmpXChg to expand.
if (AI->isFloatingPointOperation() ||
AI->getOperation() == AtomicRMWInst::UIncWrap ||
- AI->getOperation() == AtomicRMWInst::UDecWrap)
+ AI->getOperation() == AtomicRMWInst::UDecWrap ||
+ AI->getOperation() == AtomicRMWInst::USubCond ||
+ AI->getOperation() == AtomicRMWInst::USubSat)
return AtomicExpansionKind::CmpXChg;
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f1bd14d7ee0116..8b794656d5c219 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18858,6 +18858,8 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
switch (AI->getOperation()) {
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
+ case AtomicRMWInst::USubCond:
+ case AtomicRMWInst::USubSat:
return AtomicExpansionKind::CmpXChg;
default:
return TargetLowering::shouldExpandAtomicRMWInIR(AI);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index acee6443bc4520..b0c8c95d67cdea 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20453,7 +20453,9 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// forward-progress guarantee.
if (AI->isFloatingPointOperation() ||
AI->getOperation() == AtomicRMWInst::UIncWrap ||
- AI->getOperation() == AtomicRMWInst::UDecWrap)
+ AI->getOperation() == AtomicRMWInst::UDecWrap ||
+ AI->getOperation() == AtomicRMWInst::USubCond ||
+ AI->getOperation() == AtomicRMWInst::USubSat)
return AtomicExpansionKind::CmpXChg;
// Don't expand forced atomics, we want to have __sync libcalls instead.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f8aa263c301512..44592afcf7216c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31004,6 +31004,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::FMin:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
+ case AtomicRMWInst::USubCond:
+ case AtomicRMWInst::USubSat:
default:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
index f9bf419fb02252..8b3a0ce338e577 100644
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -95,6 +95,14 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
Value *Or = Builder.CreateOr(CmpEq0, CmpOldGtVal);
return Builder.CreateSelect(Or, Val, Dec, "new");
}
+ case AtomicRMWInst::USubCond: {
+ Value *Cmp = Builder.CreateICmpUGE(Loaded, Val);
+ Value *Sub = Builder.CreateSub(Loaded, Val);
+ return Builder.CreateSelect(Cmp, Sub, Loaded, "new");
+ }
+ case AtomicRMWInst::USubSat:
+ return Builder.CreateIntrinsic(Intrinsic::usub_sat, Loaded->getType(),
+ {Loaded, Val}, nullptr, "new");
default:
llvm_unreachable("Unknown atomic op");
}
diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll
index 32fe82ef2268c8..a44dcccc16bef1 100644
--- a/llvm/test/Assembler/atomic.ll
+++ b/llvm/test/Assembler/atomic.ll
@@ -42,6 +42,16 @@ define void @f(ptr %x) {
; CHECK: atomicrmw volatile udec_wrap ptr %x, i32 10 syncscope("agent") monotonic
atomicrmw volatile udec_wrap ptr %x, i32 10 syncscope("agent") monotonic
+ ; CHECK: atomicrmw volatile usub_cond ptr %x, i32 10 monotonic
+ atomicrmw volatile usub_cond ptr %x, i32 10 monotonic
+ ; CHECK: atomicrmw volatile usub_cond ptr %x, i32 10 syncscope("agent") monotonic
+ atomicrmw volatile usub_cond ptr %x, i32 10 syncscope("agent") monotonic
+
+ ; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 monotonic
+ atomicrmw volatile usub_sat ptr %x, i32 10 monotonic
+ ; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic
+ atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic
+
; CHECK: fence syncscope("singlethread") release
fence syncscope("singlethread") release
; CHECK: fence seq_cst
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index fd60c49a4be39b..e38c9783c9a8fe 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -906,6 +906,34 @@ define void @uinc_udec_wrap_atomics(ptr %word) {
ret void
}
+define void @usub_cond_usub_sat_atomics(ptr %word) {
+; CHECK: %atomicrmw.condsub0 = atomicrmw usub_cond ptr %word, i32 64 monotonic
+ %atomicrmw.condsub0 = atomicrmw usub_cond ptr %word, i32 64 monotonic
+
+; CHECK: %atomicrmw.condsub1 = atomicrmw usub_cond ptr %word, i32 128 seq_cst
+ %atomicrmw.condsub1 = atomicrmw usub_cond ptr %word, i32 128 seq_cst
+
+; CHECK: %atomicrmw.condsub2 = atomicrmw volatile usub_cond ptr %word, i32 128 seq_cst
+ %atomicrmw.condsub2 = atomicrmw volatile usub_cond ptr %word, i32 128 seq_cst
+
+; CHECK: %atomicrmw.condsub0.syncscope = atomicrmw usub_cond ptr %word, i32 27 syncscope("agent") monotonic
+ %atomicrmw.condsub0.syncscope = atomicrmw usub_cond ptr %word, i32 27 syncscope("agent") monotonic
+
+; CHECK: %atomicrmw.subclamp0 = atomicrmw usub_sat ptr %word, i32 99 monotonic
+ %atomicrmw.subclamp0 = atomicrmw usub_sat ptr %word, i32 99 monotonic
+
+; CHECK: %atomicrmw.subclamp1 = atomicrmw usub_sat ptr %word, i32 12 seq_cst
+ %atomicrmw.subclamp1 = atomicrmw usub_sat ptr %word, i32 12 seq_cst
+
+; CHECK: %atomicrmw.subclamp2 = atomicrmw volatile usub_sat ptr %word, i32 12 seq_cst
+ %atomicrmw.subclamp2 = atomicrmw volatile usub_sat ptr %word, i32 12 seq_cst
+
+; CHECK: %atomicrmw.subclamp0.syncscope = atomicrmw usub_sat ptr %word, i32 5 syncscope("system") monotonic
+ %atomicrmw.subclamp0.syncscope = atomicrmw usub_sat ptr %word, i32 5 syncscope("system") monotonic
+
+ ret void
+}
+
define void @pointer_atomics(ptr %word) {
; CHECK: %atomicrmw.xchg = atomicrmw xchg ptr %word, ptr null monotonic
%atomicrmw.xchg = atomicrmw xchg ptr %word, ptr null monotonic
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index b3b85090d11251..62d98a224fa3dd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -266,6 +266,12 @@
# DEBUG-NEXT: G_ATOMICRMW_UDEC_WRAP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_ATOMICRMW_USUB_COND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_ATOMICRMW_USUB_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FENCE (opcode {{[0-9]+}}): 0 type indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..83fe8664f72b0d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: .LBB0_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxrb w8, [x0]
+; CHECK-NEXT: sub w9, w8, w1
+; CHECK-NEXT: cmp w8, w1, uxtb
+; CHECK-NEXT: csel w9, w9, w8, hs
+; CHECK-NEXT: stlxrb w10, w9, [x0]
+; CHECK-NEXT: cbnz w10, .LBB0_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: .LBB1_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxrh w8, [x0]
+; CHECK-NEXT: sub w9, w8, w1
+; CHECK-NEXT: cmp w8, w1, uxth
+; CHECK-NEXT: csel w9, w9, w8, hs
+; CHECK-NEXT: stlxrh w10, w9, [x0]
+; CHECK-NEXT: cbnz w10, .LBB1_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: .LBB2_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxr w8, [x0]
+; CHECK-NEXT: subs w9, w8, w1
+; CHECK-NEXT: csel w9, w9, w8, hs
+; CHECK-NEXT: stlxr w10, w9, [x0]
+; CHECK-NEXT: cbnz w10, .LBB2_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: .LBB3_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxr x0, [x8]
+; CHECK-NEXT: subs x9, x0, x1
+; CHECK-NEXT: csel x9, x9, x0, hs
+; CHECK-NEXT: stlxr w10, x9, [x8]
+; CHECK-NEXT: cbnz w10, .LBB3_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: .LBB4_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxrb w8, [x0]
+; CHECK-NEXT: subs w9, w8, w1, uxtb
+; CHECK-NEXT: csel w9, wzr, w9, lo
+; CHECK-NEXT: stlxrb w10, w9, [x0]
+; CHECK-NEXT: cbnz w10, .LBB4_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: .LBB5_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxrh w8, [x0]
+; CHECK-NEXT: subs w9, w8, w1, uxth
+; CHECK-NEXT: csel w9, wzr, w9, lo
+; CHECK-NEXT: stlxrh w10, w9, [x0]
+; CHECK-NEXT: cbnz w10, .LBB5_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: .LBB6_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxr w8, [x0]
+; CHECK-NEXT: subs w9, w8, w1
+; CHECK-NEXT: csel w9, wzr, w9, lo
+; CHECK-NEXT: stlxr w10, w9, [x0]
+; CHECK-NEXT: cbnz w10, .LBB6_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: .LBB7_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldaxr x8, [x0]
+; CHECK-NEXT: subs x9, x8, x1
+; CHECK-NEXT: csel x9, xzr, x9, lo
+; CHECK-NEXT: stlxr w10, x9, [x0]
+; CHECK-NEXT: cbnz w10, .LBB7_1
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..62711ee6834898
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 < %s | FileCheck %s
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB0_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrexb r12, [r0]
+; CHECK-NEXT: uxtb r3, r1
+; CHECK-NEXT: cmp r12, r3
+; CHECK-NEXT: mov r3, r12
+; CHECK-NEXT: subhs r3, r3, r1
+; CHECK-NEXT: strexb r2, r3, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: bne .LBB0_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: bx lr
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB1_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrexh r12, [r0]
+; CHECK-NEXT: uxth r3, r1
+; CHECK-NEXT: cmp r12, r3
+; CHECK-NEXT: mov r3, r12
+; CHECK-NEXT: subhs r3, r3, r1
+; CHECK-NEXT: strexh r2, r3, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: bne .LBB1_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: bx lr
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB2_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrex r12, [r0]
+; CHECK-NEXT: subs r3, r12, r1
+; CHECK-NEXT: movlo r3, r12
+; CHECK-NEXT: strex r2, r3, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: bne .LBB2_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: bx lr
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB3_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrexd r4, r5, [r0]
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: subs r6, r4, r2
+; CHECK-NEXT: sbcs r7, r5, r3
+; CHECK-NEXT: movwhs r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r7, r5
+; CHECK-NEXT: moveq r6, r4
+; CHECK-NEXT: strexd r1, r6, r7, [r0]
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: bne .LBB3_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB4_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrexb r12, [r0]
+; CHECK-NEXT: uqsub8 r3, r12, r1
+; CHECK-NEXT: strexb r2, r3, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: bne .LBB4_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: bx lr
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB5_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrexh r12, [r0]
+; CHECK-NEXT: uqsub16 r3, r12, r1
+; CHECK-NEXT: strexh r2, r3, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: bne .LBB5_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: bx lr
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB6_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrex r12, [r0]
+; CHECK-NEXT: subs r3, r12, r1
+; CHECK-NEXT: movlo r3, #0
+; CHECK-NEXT: strex r2, r3, [r0]
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: bne .LBB6_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: bx lr
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: .LBB7_1: @ %atomicrmw.start
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrexd r4, r5, [r0]
+; CHECK-NEXT: subs r6, r4, r2
+; CHECK-NEXT: sbcs r7, r5, r3
+; CHECK-NEXT: adc r1, r12, #0
+; CHECK-NEXT: eors r1, r1, #1
+; CHECK-NEXT: movwne r7, #0
+; CHECK-NEXT: movwne r6, #0
+; CHECK-NEXT: strexd r1, r6, r7, [r0]
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: bne .LBB7_1
+; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/Hexagon/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/Hexagon/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..92a3da1793b909
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i8:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = and(#24,asl(r0,#3))
+; CHECK-NEXT: r3 = and(r0,#-4)
+; CHECK-NEXT: r2 = #255
+; CHECK-NEXT: r4 = and(r1,#255)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = asl(r2,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = sub(#-1,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r6 = memw_locked(r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = lsr(r6,r0)
+; CHECK-NEXT: r6 = and(r6,r5)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7 = and(r2,#255)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.gtu(r4,r7)
+; CHECK-NEXT: if (p0.new) r7 = add(r2,#0)
+; CHECK-NEXT: if (!p0.new) r7 = sub(r2,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7 = and(r7,#255)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r6 |= asl(r7,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memw_locked(r3,p0) = r6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB0_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r2
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = and(#24,asl(r0,#3))
+; CHECK-NEXT: r3 = and(r0,#-4)
+; CHECK-NEXT: r2 = ##65535
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = asl(r2,r0)
+; CHECK-NEXT: r4 = zxth(r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = sub(#-1,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB1_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r6 = memw_locked(r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = lsr(r6,r0)
+; CHECK-NEXT: r6 = and(r6,r5)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7 = zxth(r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.gtu(r4,r7)
+; CHECK-NEXT: if (p0.new) r7 = add(r2,#0)
+; CHECK-NEXT: if (!p0.new) r7 = sub(r2,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7 = zxth(r7)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r6 |= asl(r7,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memw_locked(r3,p0) = r6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB1_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r2
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB2_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = memw_locked(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.gtu(r1,r2)
+; CHECK-NEXT: if (p0.new) r3 = add(r2,#0)
+; CHECK-NEXT: if (!p0.new) r3 = sub(r2,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memw_locked(r0,p0) = r3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB2_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r2
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB3_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = memd_locked(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = sub(r5:4,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.gtu(r3:2,r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r8 = mux(p0,r4,r6)
+; CHECK-NEXT: r9 = mux(p0,r5,r7)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memd_locked(r0,p0) = r9:8
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB3_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r5,r4)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i8:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = and(#24,asl(r0,#3))
+; CHECK-NEXT: r2 = and(r0,#-4)
+; CHECK-NEXT: r3 = #255
+; CHECK-NEXT: r1 = and(r1,#255)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = asl(r3,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = sub(#-1,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB4_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = #255
+; CHECK-NEXT: r4 = memw_locked(r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 &= lsr(r4,r0)
+; CHECK-NEXT: r6 = and(r4,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = maxu(r5,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = sub(r5,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r6 |= asl(r5,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memw_locked(r2,p0) = r6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB4_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = lsr(r4,r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = and(#24,asl(r0,#3))
+; CHECK-NEXT: r2 = and(r0,#-4)
+; CHECK-NEXT: r3 = ##65535
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = asl(r3,r0)
+; CHECK-NEXT: r1 = zxth(r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = sub(#-1,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB5_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = ##65535
+; CHECK-NEXT: r4 = memw_locked(r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 &= lsr(r4,r0)
+; CHECK-NEXT: r6 = and(r4,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = maxu(r5,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = sub(r5,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r6 |= asl(r5,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memw_locked(r2,p0) = r6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB5_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = lsr(r4,r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB6_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = memw_locked(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = maxu(r2,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 = sub(r3,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memw_locked(r0,p0) = r3
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB6_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r2
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB7_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = memd_locked(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = maxu(r5:4,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = sub(r7:6,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memd_locked(r0,p0) = r7:6
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) jump:nt .LBB7_1
+; CHECK-NEXT: }
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r5,r4)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..95bb25c41dabcb
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; LA64-LABEL: atomicrmw_usub_cond_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a2, $a3, 24
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: ld.w $a5, $a0, 0
+; LA64-NEXT: sll.w $a3, $a4, $a3
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: andi $a4, $a1, 255
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB0_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB0_3 Depth 2
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: srl.w $a5, $a5, $a2
+; LA64-NEXT: andi $a7, $a5, 255
+; LA64-NEXT: sltu $a7, $a7, $a4
+; LA64-NEXT: xori $a7, $a7, 1
+; LA64-NEXT: sub.d $t0, $a5, $a1
+; LA64-NEXT: masknez $a5, $a5, $a7
+; LA64-NEXT: maskeqz $a7, $t0, $a7
+; LA64-NEXT: or $a5, $a7, $a5
+; LA64-NEXT: andi $a5, $a5, 255
+; LA64-NEXT: sll.w $a5, $a5, $a2
+; LA64-NEXT: and $a7, $a6, $a3
+; LA64-NEXT: or $a7, $a7, $a5
+; LA64-NEXT: .LBB0_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB0_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: bne $a5, $a6, .LBB0_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
+; LA64-NEXT: move $t0, $a7
+; LA64-NEXT: sc.w $t0, $a0, 0
+; LA64-NEXT: beqz $t0, .LBB0_3
+; LA64-NEXT: b .LBB0_6
+; LA64-NEXT: .LBB0_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB0_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
+; LA64-NEXT: bne $a5, $a6, .LBB0_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; LA64-LABEL: atomicrmw_usub_cond_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a2, $a3, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: ld.w $a5, $a0, 0
+; LA64-NEXT: sll.w $a3, $a4, $a3
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: bstrpick.d $a4, $a1, 15, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB1_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB1_3 Depth 2
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: srl.w $a5, $a5, $a2
+; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
+; LA64-NEXT: sltu $a7, $a7, $a4
+; LA64-NEXT: xori $a7, $a7, 1
+; LA64-NEXT: sub.d $t0, $a5, $a1
+; LA64-NEXT: masknez $a5, $a5, $a7
+; LA64-NEXT: maskeqz $a7, $t0, $a7
+; LA64-NEXT: or $a5, $a7, $a5
+; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
+; LA64-NEXT: sll.w $a5, $a5, $a2
+; LA64-NEXT: and $a7, $a6, $a3
+; LA64-NEXT: or $a7, $a7, $a5
+; LA64-NEXT: .LBB1_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB1_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: bne $a5, $a6, .LBB1_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
+; LA64-NEXT: move $t0, $a7
+; LA64-NEXT: sc.w $t0, $a0, 0
+; LA64-NEXT: beqz $t0, .LBB1_3
+; LA64-NEXT: b .LBB1_6
+; LA64-NEXT: .LBB1_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB1_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
+; LA64-NEXT: bne $a5, $a6, .LBB1_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: srl.w $a0, $a5, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; LA64-LABEL: atomicrmw_usub_cond_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.w $a2, $a0, 0
+; LA64-NEXT: addi.w $a3, $a1, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB2_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB2_3 Depth 2
+; LA64-NEXT: move $a4, $a2
+; LA64-NEXT: sltu $a2, $a2, $a3
+; LA64-NEXT: xori $a2, $a2, 1
+; LA64-NEXT: sub.w $a5, $a4, $a1
+; LA64-NEXT: maskeqz $a5, $a5, $a2
+; LA64-NEXT: masknez $a2, $a4, $a2
+; LA64-NEXT: or $a5, $a5, $a2
+; LA64-NEXT: .LBB2_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB2_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a4, .LBB2_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB2_3
+; LA64-NEXT: b .LBB2_6
+; LA64-NEXT: .LBB2_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB2_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
+; LA64-NEXT: bne $a2, $a4, .LBB2_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; LA64-LABEL: atomicrmw_usub_cond_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a2, $a0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB3_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB3_3 Depth 2
+; LA64-NEXT: move $a3, $a2
+; LA64-NEXT: sltu $a2, $a2, $a1
+; LA64-NEXT: xori $a2, $a2, 1
+; LA64-NEXT: sub.d $a4, $a3, $a1
+; LA64-NEXT: maskeqz $a4, $a4, $a2
+; LA64-NEXT: masknez $a2, $a3, $a2
+; LA64-NEXT: or $a4, $a4, $a2
+; LA64-NEXT: .LBB3_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB3_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.d $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a3, .LBB3_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: sc.d $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB3_3
+; LA64-NEXT: b .LBB3_6
+; LA64-NEXT: .LBB3_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB3_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; LA64-NEXT: bne $a2, $a3, .LBB3_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; LA64-LABEL: atomicrmw_usub_sat_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a2, $a3, 24
+; LA64-NEXT: ori $a5, $zero, 255
+; LA64-NEXT: ld.w $a4, $a0, 0
+; LA64-NEXT: sll.w $a3, $a5, $a3
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB4_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB4_3 Depth 2
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: srl.w $a4, $a4, $a2
+; LA64-NEXT: andi $a4, $a4, 255
+; LA64-NEXT: sub.d $a6, $a4, $a1
+; LA64-NEXT: sltu $a4, $a4, $a6
+; LA64-NEXT: masknez $a4, $a6, $a4
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: and $a6, $a5, $a3
+; LA64-NEXT: or $a6, $a6, $a4
+; LA64-NEXT: .LBB4_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB4_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: bne $a4, $a5, .LBB4_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
+; LA64-NEXT: move $a7, $a6
+; LA64-NEXT: sc.w $a7, $a0, 0
+; LA64-NEXT: beqz $a7, .LBB4_3
+; LA64-NEXT: b .LBB4_6
+; LA64-NEXT: .LBB4_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB4_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
+; LA64-NEXT: bne $a4, $a5, .LBB4_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; LA64-LABEL: atomicrmw_usub_sat_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a2, $a3, 24
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a5, $a4, 4095
+; LA64-NEXT: ld.w $a4, $a0, 0
+; LA64-NEXT: sll.w $a3, $a5, $a3
+; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB5_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB5_3 Depth 2
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: srl.w $a4, $a4, $a2
+; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
+; LA64-NEXT: sub.d $a6, $a4, $a1
+; LA64-NEXT: sltu $a4, $a4, $a6
+; LA64-NEXT: masknez $a4, $a6, $a4
+; LA64-NEXT: sll.w $a4, $a4, $a2
+; LA64-NEXT: and $a6, $a5, $a3
+; LA64-NEXT: or $a6, $a6, $a4
+; LA64-NEXT: .LBB5_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB5_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: bne $a4, $a5, .LBB5_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
+; LA64-NEXT: move $a7, $a6
+; LA64-NEXT: sc.w $a7, $a0, 0
+; LA64-NEXT: beqz $a7, .LBB5_3
+; LA64-NEXT: b .LBB5_6
+; LA64-NEXT: .LBB5_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB5_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
+; LA64-NEXT: bne $a4, $a5, .LBB5_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: srl.w $a0, $a4, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; LA64-LABEL: atomicrmw_usub_sat_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.w $a2, $a0, 0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB6_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB6_3 Depth 2
+; LA64-NEXT: move $a3, $a2
+; LA64-NEXT: sub.d $a2, $a2, $a1
+; LA64-NEXT: sltu $a4, $a3, $a2
+; LA64-NEXT: masknez $a4, $a2, $a4
+; LA64-NEXT: .LBB6_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB6_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a3, .LBB6_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB6_3
+; LA64-NEXT: b .LBB6_6
+; LA64-NEXT: .LBB6_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB6_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
+; LA64-NEXT: bne $a2, $a3, .LBB6_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; LA64-LABEL: atomicrmw_usub_sat_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a2, $a0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB7_1: # %atomicrmw.start
+; LA64-NEXT: # =>This Loop Header: Depth=1
+; LA64-NEXT: # Child Loop BB7_3 Depth 2
+; LA64-NEXT: move $a3, $a2
+; LA64-NEXT: sub.d $a2, $a2, $a1
+; LA64-NEXT: sltu $a4, $a3, $a2
+; LA64-NEXT: masknez $a4, $a2, $a4
+; LA64-NEXT: .LBB7_3: # %atomicrmw.start
+; LA64-NEXT: # Parent Loop BB7_1 Depth=1
+; LA64-NEXT: # => This Inner Loop Header: Depth=2
+; LA64-NEXT: ll.d $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a3, .LBB7_5
+; LA64-NEXT: # %bb.4: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: sc.d $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB7_3
+; LA64-NEXT: b .LBB7_6
+; LA64-NEXT: .LBB7_5: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
+; LA64-NEXT: dbar 20
+; LA64-NEXT: .LBB7_6: # %atomicrmw.start
+; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
+; LA64-NEXT: bne $a2, $a3, .LBB7_1
+; LA64-NEXT: # %bb.2: # %atomicrmw.end
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..933311140465f5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,384 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: mr 5, 3
+; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28
+; CHECK-NEXT: lbz 3, 0(3)
+; CHECK-NEXT: xori 7, 7, 24
+; CHECK-NEXT: li 8, 255
+; CHECK-NEXT: clrlwi 6, 4, 24
+; CHECK-NEXT: rldicr 5, 5, 0, 61
+; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: b .LBB0_2
+; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: srw 3, 11, 7
+; CHECK-NEXT: cmplw 3, 9
+; CHECK-NEXT: beq 0, .LBB0_7
+; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB0_5 Depth 2
+; CHECK-NEXT: clrlwi 9, 3, 24
+; CHECK-NEXT: cmplw 9, 6
+; CHECK-NEXT: blt 0, .LBB0_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: sub 3, 3, 4
+; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: slw 3, 3, 7
+; CHECK-NEXT: slw 10, 9, 7
+; CHECK-NEXT: and 3, 3, 8
+; CHECK-NEXT: and 10, 10, 8
+; CHECK-NEXT: .LBB0_5: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 12, 0, 5
+; CHECK-NEXT: and 11, 12, 8
+; CHECK-NEXT: cmpw 11, 10
+; CHECK-NEXT: bne 0, .LBB0_1
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: andc 12, 12, 8
+; CHECK-NEXT: or 12, 12, 3
+; CHECK-NEXT: stwcx. 12, 0, 5
+; CHECK-NEXT: bne 0, .LBB0_5
+; CHECK-NEXT: b .LBB0_1
+; CHECK-NEXT: .LBB0_7: # %atomicrmw.end
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: mr 5, 3
+; CHECK-NEXT: li 8, 0
+; CHECK-NEXT: lhz 3, 0(3)
+; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27
+; CHECK-NEXT: xori 7, 7, 16
+; CHECK-NEXT: ori 8, 8, 65535
+; CHECK-NEXT: clrlwi 6, 4, 16
+; CHECK-NEXT: rldicr 5, 5, 0, 61
+; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: b .LBB1_2
+; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: srw 3, 11, 7
+; CHECK-NEXT: cmplw 3, 9
+; CHECK-NEXT: beq 0, .LBB1_7
+; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB1_5 Depth 2
+; CHECK-NEXT: clrlwi 9, 3, 16
+; CHECK-NEXT: cmplw 9, 6
+; CHECK-NEXT: blt 0, .LBB1_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: sub 3, 3, 4
+; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: slw 3, 3, 7
+; CHECK-NEXT: slw 10, 9, 7
+; CHECK-NEXT: and 3, 3, 8
+; CHECK-NEXT: and 10, 10, 8
+; CHECK-NEXT: .LBB1_5: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 12, 0, 5
+; CHECK-NEXT: and 11, 12, 8
+; CHECK-NEXT: cmpw 11, 10
+; CHECK-NEXT: bne 0, .LBB1_1
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: andc 12, 12, 8
+; CHECK-NEXT: or 12, 12, 3
+; CHECK-NEXT: stwcx. 12, 0, 5
+; CHECK-NEXT: bne 0, .LBB1_5
+; CHECK-NEXT: b .LBB1_1
+; CHECK-NEXT: .LBB1_7: # %atomicrmw.end
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: lwz 6, 0(3)
+; CHECK-NEXT: b .LBB2_2
+; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: beq 0, .LBB2_7
+; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB2_5 Depth 2
+; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: bge 0, .LBB2_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 6
+; CHECK-NEXT: b .LBB2_5
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: sub 7, 6, 4
+; CHECK-NEXT: .LBB2_5: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 5, 0, 3
+; CHECK-NEXT: cmpw 5, 6
+; CHECK-NEXT: bne 0, .LBB2_1
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: stwcx. 7, 0, 3
+; CHECK-NEXT: bne 0, .LBB2_5
+; CHECK-NEXT: b .LBB2_1
+; CHECK-NEXT: .LBB2_7: # %atomicrmw.end
+; CHECK-NEXT: mr 3, 5
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: ld 6, 0(3)
+; CHECK-NEXT: b .LBB3_2
+; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: beq 0, .LBB3_7
+; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB3_5 Depth 2
+; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: bge 0, .LBB3_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 6
+; CHECK-NEXT: b .LBB3_5
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT: sub 7, 6, 4
+; CHECK-NEXT: .LBB3_5: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: ldarx 5, 0, 3
+; CHECK-NEXT: cmpd 5, 6
+; CHECK-NEXT: bne 0, .LBB3_1
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: stdcx. 7, 0, 3
+; CHECK-NEXT: bne 0, .LBB3_5
+; CHECK-NEXT: b .LBB3_1
+; CHECK-NEXT: .LBB3_7: # %atomicrmw.end
+; CHECK-NEXT: mr 3, 5
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: mr 5, 3
+; CHECK-NEXT: rlwinm 6, 5, 3, 27, 28
+; CHECK-NEXT: lbz 3, 0(3)
+; CHECK-NEXT: xori 6, 6, 24
+; CHECK-NEXT: li 7, 255
+; CHECK-NEXT: clrlwi 4, 4, 24
+; CHECK-NEXT: rldicr 5, 5, 0, 61
+; CHECK-NEXT: slw 7, 7, 6
+; CHECK-NEXT: b .LBB4_2
+; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: srw 3, 10, 6
+; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: beq 0, .LBB4_7
+; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB4_5 Depth 2
+; CHECK-NEXT: clrlwi 8, 3, 24
+; CHECK-NEXT: sub 3, 8, 4
+; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: li 9, 0
+; CHECK-NEXT: bgt 0, .LBB4_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 9, 3
+; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: slw 3, 9, 6
+; CHECK-NEXT: slw 9, 8, 6
+; CHECK-NEXT: and 3, 3, 7
+; CHECK-NEXT: and 9, 9, 7
+; CHECK-NEXT: .LBB4_5: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 11, 0, 5
+; CHECK-NEXT: and 10, 11, 7
+; CHECK-NEXT: cmpw 10, 9
+; CHECK-NEXT: bne 0, .LBB4_1
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: andc 11, 11, 7
+; CHECK-NEXT: or 11, 11, 3
+; CHECK-NEXT: stwcx. 11, 0, 5
+; CHECK-NEXT: bne 0, .LBB4_5
+; CHECK-NEXT: b .LBB4_1
+; CHECK-NEXT: .LBB4_7: # %atomicrmw.end
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: mr 5, 3
+; CHECK-NEXT: li 7, 0
+; CHECK-NEXT: lhz 3, 0(3)
+; CHECK-NEXT: rlwinm 6, 5, 3, 27, 27
+; CHECK-NEXT: xori 6, 6, 16
+; CHECK-NEXT: ori 7, 7, 65535
+; CHECK-NEXT: clrlwi 4, 4, 16
+; CHECK-NEXT: rldicr 5, 5, 0, 61
+; CHECK-NEXT: slw 7, 7, 6
+; CHECK-NEXT: b .LBB5_2
+; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: srw 3, 10, 6
+; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: beq 0, .LBB5_7
+; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB5_5 Depth 2
+; CHECK-NEXT: clrlwi 8, 3, 16
+; CHECK-NEXT: sub 3, 8, 4
+; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: li 9, 0
+; CHECK-NEXT: bgt 0, .LBB5_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 9, 3
+; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: slw 3, 9, 6
+; CHECK-NEXT: slw 9, 8, 6
+; CHECK-NEXT: and 3, 3, 7
+; CHECK-NEXT: and 9, 9, 7
+; CHECK-NEXT: .LBB5_5: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 11, 0, 5
+; CHECK-NEXT: and 10, 11, 7
+; CHECK-NEXT: cmpw 10, 9
+; CHECK-NEXT: bne 0, .LBB5_1
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: andc 11, 11, 7
+; CHECK-NEXT: or 11, 11, 3
+; CHECK-NEXT: stwcx. 11, 0, 5
+; CHECK-NEXT: bne 0, .LBB5_5
+; CHECK-NEXT: b .LBB5_1
+; CHECK-NEXT: .LBB5_7: # %atomicrmw.end
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: lwz 6, 0(3)
+; CHECK-NEXT: b .LBB6_2
+; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: beq 0, .LBB6_6
+; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB6_4 Depth 2
+; CHECK-NEXT: sub 5, 6, 4
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: li 7, 0
+; CHECK-NEXT: bgt 0, .LBB6_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: .LBB6_4: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 5, 0, 3
+; CHECK-NEXT: cmpw 5, 6
+; CHECK-NEXT: bne 0, .LBB6_1
+; CHECK-NEXT: # %bb.5: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: stwcx. 7, 0, 3
+; CHECK-NEXT: bne 0, .LBB6_4
+; CHECK-NEXT: b .LBB6_1
+; CHECK-NEXT: .LBB6_6: # %atomicrmw.end
+; CHECK-NEXT: mr 3, 5
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sync
+; CHECK-NEXT: ld 6, 0(3)
+; CHECK-NEXT: b .LBB7_2
+; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: beq 0, .LBB7_6
+; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB7_4 Depth 2
+; CHECK-NEXT: sub 5, 6, 4
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: li 7, 0
+; CHECK-NEXT: bgt 0, .LBB7_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: .LBB7_4: # %atomicrmw.start
+; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: ldarx 5, 0, 3
+; CHECK-NEXT: cmpd 5, 6
+; CHECK-NEXT: bne 0, .LBB7_1
+; CHECK-NEXT: # %bb.5: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: stdcx. 7, 0, 3
+; CHECK-NEXT: bne 0, .LBB7_4
+; CHECK-NEXT: b .LBB7_1
+; CHECK-NEXT: .LBB7_6: # %atomicrmw.end
+; CHECK-NEXT: mr 3, 5
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: blr
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..a9c8a4be7d2b47
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,1265 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32IA %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32IA %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64IA %s
+
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; RV32I-LABEL: atomicrmw_usub_cond_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lbu a3, 0(a0)
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: andi s2, a1, 255
+; RV32I-NEXT: .LBB0_1: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: andi a0, a3, 255
+; RV32I-NEXT: sltu a0, a0, s2
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: sub a2, a3, a0
+; RV32I-NEXT: sb a3, 15(sp)
+; RV32I-NEXT: addi a1, sp, 15
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a3, 15(sp)
+; RV32I-NEXT: beqz a0, .LBB0_1
+; RV32I-NEXT: # %bb.2: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_cond_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a2, a0, -4
+; RV32IA-NEXT: slli a3, a0, 3
+; RV32IA-NEXT: andi a0, a3, 24
+; RV32IA-NEXT: li a4, 255
+; RV32IA-NEXT: lw a5, 0(a2)
+; RV32IA-NEXT: sll a3, a4, a3
+; RV32IA-NEXT: not a3, a3
+; RV32IA-NEXT: andi a4, a1, 255
+; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB0_3 Depth 2
+; RV32IA-NEXT: mv a6, a5
+; RV32IA-NEXT: srl a5, a5, a0
+; RV32IA-NEXT: andi a7, a5, 255
+; RV32IA-NEXT: sltu a7, a7, a4
+; RV32IA-NEXT: addi a7, a7, -1
+; RV32IA-NEXT: and a7, a7, a1
+; RV32IA-NEXT: sub a5, a5, a7
+; RV32IA-NEXT: andi a5, a5, 255
+; RV32IA-NEXT: sll a5, a5, a0
+; RV32IA-NEXT: and a7, a6, a3
+; RV32IA-NEXT: or a7, a7, a5
+; RV32IA-NEXT: .LBB0_3: # %atomicrmw.start
+; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NEXT: bne a5, a6, .LBB0_1
+; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2
+; RV32IA-NEXT: sc.w.rl t0, a7, (a2)
+; RV32IA-NEXT: bnez t0, .LBB0_3
+; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-NEXT: srl a0, a5, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_cond_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: andi s2, a1, 255
+; RV64I-NEXT: .LBB0_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: andi a0, a3, 255
+; RV64I-NEXT: sltu a0, a0, s2
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: sub a2, a3, a0
+; RV64I-NEXT: sb a3, 15(sp)
+; RV64I-NEXT: addi a1, sp, 15
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a3, 15(sp)
+; RV64I-NEXT: beqz a0, .LBB0_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_cond_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a2, a0, -4
+; RV64IA-NEXT: slli a4, a0, 3
+; RV64IA-NEXT: andi a0, a4, 24
+; RV64IA-NEXT: li a5, 255
+; RV64IA-NEXT: lw a3, 0(a2)
+; RV64IA-NEXT: sllw a4, a5, a4
+; RV64IA-NEXT: not a4, a4
+; RV64IA-NEXT: andi a5, a1, 255
+; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB0_3 Depth 2
+; RV64IA-NEXT: srlw a6, a3, a0
+; RV64IA-NEXT: sext.w a7, a3
+; RV64IA-NEXT: andi t0, a6, 255
+; RV64IA-NEXT: sltu t0, t0, a5
+; RV64IA-NEXT: addi t0, t0, -1
+; RV64IA-NEXT: and t0, t0, a1
+; RV64IA-NEXT: subw a6, a6, t0
+; RV64IA-NEXT: andi a6, a6, 255
+; RV64IA-NEXT: sllw a6, a6, a0
+; RV64IA-NEXT: and a3, a3, a4
+; RV64IA-NEXT: or a6, a3, a6
+; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a3, (a2)
+; RV64IA-NEXT: bne a3, a7, .LBB0_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2
+; RV64IA-NEXT: sc.w.rl t0, a6, (a2)
+; RV64IA-NEXT: bnez t0, .LBB0_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: srlw a0, a3, a0
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; RV32I-LABEL: atomicrmw_usub_cond_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: .cfi_offset s3, -20
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lhu a1, 0(a0)
+; RV32I-NEXT: lui s2, 16
+; RV32I-NEXT: addi s2, s2, -1
+; RV32I-NEXT: and s3, s0, s2
+; RV32I-NEXT: .LBB1_1: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: and a0, a1, s2
+; RV32I-NEXT: sltu a0, a0, s3
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: sub a2, a1, a0
+; RV32I-NEXT: sh a1, 10(sp)
+; RV32I-NEXT: addi a1, sp, 10
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a1, 10(sp)
+; RV32I-NEXT: beqz a0, .LBB1_1
+; RV32I-NEXT: # %bb.2: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_cond_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a2, a0, -4
+; RV32IA-NEXT: slli a4, a0, 3
+; RV32IA-NEXT: andi a0, a4, 24
+; RV32IA-NEXT: lui a3, 16
+; RV32IA-NEXT: addi a3, a3, -1
+; RV32IA-NEXT: lw a6, 0(a2)
+; RV32IA-NEXT: sll a4, a3, a4
+; RV32IA-NEXT: not a4, a4
+; RV32IA-NEXT: and a5, a1, a3
+; RV32IA-NEXT: .LBB1_1: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB1_3 Depth 2
+; RV32IA-NEXT: mv a7, a6
+; RV32IA-NEXT: srl a6, a6, a0
+; RV32IA-NEXT: and t0, a6, a3
+; RV32IA-NEXT: sltu t0, t0, a5
+; RV32IA-NEXT: addi t0, t0, -1
+; RV32IA-NEXT: and t0, t0, a1
+; RV32IA-NEXT: sub a6, a6, t0
+; RV32IA-NEXT: and a6, a6, a3
+; RV32IA-NEXT: sll a6, a6, a0
+; RV32IA-NEXT: and t0, a7, a4
+; RV32IA-NEXT: or t0, t0, a6
+; RV32IA-NEXT: .LBB1_3: # %atomicrmw.start
+; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a6, (a2)
+; RV32IA-NEXT: bne a6, a7, .LBB1_1
+; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2
+; RV32IA-NEXT: sc.w.rl t1, t0, (a2)
+; RV32IA-NEXT: bnez t1, .LBB1_3
+; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-NEXT: srl a0, a6, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_cond_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: .cfi_offset s3, -40
+; RV64I-NEXT: mv s0, a1
+; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: lhu a1, 0(a0)
+; RV64I-NEXT: lui s2, 16
+; RV64I-NEXT: addiw s2, s2, -1
+; RV64I-NEXT: and s3, s0, s2
+; RV64I-NEXT: .LBB1_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: and a0, a1, s2
+; RV64I-NEXT: sltu a0, a0, s3
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s0
+; RV64I-NEXT: sub a2, a1, a0
+; RV64I-NEXT: sh a1, 6(sp)
+; RV64I-NEXT: addi a1, sp, 6
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a1, 6(sp)
+; RV64I-NEXT: beqz a0, .LBB1_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_cond_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a2, a0, -4
+; RV64IA-NEXT: slli a5, a0, 3
+; RV64IA-NEXT: andi a0, a5, 24
+; RV64IA-NEXT: lui a3, 16
+; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: lw a4, 0(a2)
+; RV64IA-NEXT: sllw a5, a3, a5
+; RV64IA-NEXT: not a5, a5
+; RV64IA-NEXT: and a6, a1, a3
+; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB1_3 Depth 2
+; RV64IA-NEXT: srlw a7, a4, a0
+; RV64IA-NEXT: sext.w t0, a4
+; RV64IA-NEXT: and t1, a7, a3
+; RV64IA-NEXT: sltu t1, t1, a6
+; RV64IA-NEXT: addi t1, t1, -1
+; RV64IA-NEXT: and t1, t1, a1
+; RV64IA-NEXT: subw a7, a7, t1
+; RV64IA-NEXT: and a7, a7, a3
+; RV64IA-NEXT: sllw a7, a7, a0
+; RV64IA-NEXT: and a4, a4, a5
+; RV64IA-NEXT: or a7, a4, a7
+; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a4, (a2)
+; RV64IA-NEXT: bne a4, t0, .LBB1_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2
+; RV64IA-NEXT: sc.w.rl t1, a7, (a2)
+; RV64IA-NEXT: bnez t1, .LBB1_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; RV32I-LABEL: atomicrmw_usub_cond_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lw a3, 0(a0)
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: .LBB2_1: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sltu a0, a3, s1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: sub a2, a3, a0
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a3, 0(sp)
+; RV32I-NEXT: beqz a0, .LBB2_1
+; RV32I-NEXT: # %bb.2: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_cond_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a2, 0(a0)
+; RV32IA-NEXT: .LBB2_1: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
+; RV32IA-NEXT: mv a3, a2
+; RV32IA-NEXT: sltu a2, a2, a1
+; RV32IA-NEXT: addi a2, a2, -1
+; RV32IA-NEXT: and a2, a2, a1
+; RV32IA-NEXT: sub a4, a3, a2
+; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
+; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a2, (a0)
+; RV32IA-NEXT: bne a2, a3, .LBB2_1
+; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB2_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a5, a4, (a0)
+; RV32IA-NEXT: bnez a5, .LBB2_3
+; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-NEXT: mv a0, a2
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_cond_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: sext.w s2, a1
+; RV64I-NEXT: .LBB2_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sltu a0, a3, s2
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: subw a2, a3, a0
+; RV64I-NEXT: sw a3, 12(sp)
+; RV64I-NEXT: addi a1, sp, 12
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: lw a3, 12(sp)
+; RV64I-NEXT: beqz a0, .LBB2_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_cond_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a2, 0(a0)
+; RV64IA-NEXT: sext.w a3, a1
+; RV64IA-NEXT: .LBB2_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB2_3 Depth 2
+; RV64IA-NEXT: sext.w a4, a2
+; RV64IA-NEXT: sltu a5, a4, a3
+; RV64IA-NEXT: addi a5, a5, -1
+; RV64IA-NEXT: and a5, a5, a1
+; RV64IA-NEXT: subw a5, a2, a5
+; RV64IA-NEXT: .LBB2_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB2_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a2, (a0)
+; RV64IA-NEXT: bne a2, a4, .LBB2_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a6, a5, (a0)
+; RV64IA-NEXT: bnez a6, .LBB2_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: mv a0, a2
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; RV32I-LABEL: atomicrmw_usub_cond_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lw a5, 4(a0)
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: j .LBB3_3
+; RV32I-NEXT: .LBB3_1: # %atomicrmw.start
+; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
+; RV32I-NEXT: sltu a0, a5, s1
+; RV32I-NEXT: .LBB3_2: # %atomicrmw.start
+; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
+; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: and a1, a0, s2
+; RV32I-NEXT: sltu a2, a4, a1
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: sub a3, a5, a0
+; RV32I-NEXT: sub a3, a3, a2
+; RV32I-NEXT: sub a2, a4, a1
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: li a5, 5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw a5, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: bnez a0, .LBB3_5
+; RV32I-NEXT: .LBB3_3: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: bne a5, s1, .LBB3_1
+; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
+; RV32I-NEXT: sltu a0, a4, s2
+; RV32I-NEXT: j .LBB3_2
+; RV32I-NEXT: .LBB3_5: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_cond_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: .cfi_def_cfa_offset 32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: .cfi_offset ra, -4
+; RV32IA-NEXT: .cfi_offset s0, -8
+; RV32IA-NEXT: .cfi_offset s1, -12
+; RV32IA-NEXT: .cfi_offset s2, -16
+; RV32IA-NEXT: mv s0, a0
+; RV32IA-NEXT: lw a5, 4(a0)
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s2, a1
+; RV32IA-NEXT: j .LBB3_3
+; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
+; RV32IA-NEXT: sltu a0, a5, s1
+; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
+; RV32IA-NEXT: xori a0, a0, 1
+; RV32IA-NEXT: neg a0, a0
+; RV32IA-NEXT: and a1, a0, s2
+; RV32IA-NEXT: sltu a2, a4, a1
+; RV32IA-NEXT: and a0, a0, s1
+; RV32IA-NEXT: sub a3, a5, a0
+; RV32IA-NEXT: sub a3, a3, a2
+; RV32IA-NEXT: sub a2, a4, a1
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: li a5, 5
+; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw a5, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: bnez a0, .LBB3_5
+; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: bne a5, s1, .LBB3_1
+; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
+; RV32IA-NEXT: sltu a0, a4, s2
+; RV32IA-NEXT: j .LBB3_2
+; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_cond_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: .LBB3_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sltu a0, a3, s1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, s1
+; RV64I-NEXT: sub a2, a3, a0
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB3_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_cond_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a2, 0(a0)
+; RV64IA-NEXT: .LBB3_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
+; RV64IA-NEXT: mv a3, a2
+; RV64IA-NEXT: sltu a2, a2, a1
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: and a2, a2, a1
+; RV64IA-NEXT: sub a4, a3, a2
+; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.d.aqrl a2, (a0)
+; RV64IA-NEXT: bne a2, a3, .LBB3_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB3_3 Depth=2
+; RV64IA-NEXT: sc.d.rl a5, a4, (a0)
+; RV64IA-NEXT: bnez a5, .LBB3_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: mv a0, a2
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; RV32I-LABEL: atomicrmw_usub_sat_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lbu a3, 0(a0)
+; RV32I-NEXT: andi s1, a1, 255
+; RV32I-NEXT: .LBB4_1: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: andi a0, a3, 255
+; RV32I-NEXT: sub a1, a0, s1
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a2, a0, a1
+; RV32I-NEXT: sb a3, 3(sp)
+; RV32I-NEXT: addi a1, sp, 3
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __atomic_compare_exchange_1
+; RV32I-NEXT: lbu a3, 3(sp)
+; RV32I-NEXT: beqz a0, .LBB4_1
+; RV32I-NEXT: # %bb.2: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_sat_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a2, a0, -4
+; RV32IA-NEXT: slli a3, a0, 3
+; RV32IA-NEXT: andi a0, a3, 24
+; RV32IA-NEXT: li a5, 255
+; RV32IA-NEXT: lw a4, 0(a2)
+; RV32IA-NEXT: sll a3, a5, a3
+; RV32IA-NEXT: not a3, a3
+; RV32IA-NEXT: andi a1, a1, 255
+; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB4_3 Depth 2
+; RV32IA-NEXT: mv a5, a4
+; RV32IA-NEXT: srl a4, a4, a0
+; RV32IA-NEXT: andi a4, a4, 255
+; RV32IA-NEXT: sub a6, a4, a1
+; RV32IA-NEXT: sltu a4, a4, a6
+; RV32IA-NEXT: addi a4, a4, -1
+; RV32IA-NEXT: and a4, a4, a6
+; RV32IA-NEXT: sll a4, a4, a0
+; RV32IA-NEXT: and a6, a5, a3
+; RV32IA-NEXT: or a6, a6, a4
+; RV32IA-NEXT: .LBB4_3: # %atomicrmw.start
+; RV32IA-NEXT: # Parent Loop BB4_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NEXT: bne a4, a5, .LBB4_1
+; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB4_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a7, a6, (a2)
+; RV32IA-NEXT: bnez a7, .LBB4_3
+; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-NEXT: srl a0, a4, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_sat_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: lbu a3, 0(a0)
+; RV64I-NEXT: andi s1, a1, 255
+; RV64I-NEXT: .LBB4_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: andi a0, a3, 255
+; RV64I-NEXT: sub a1, a0, s1
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: sb a3, 7(sp)
+; RV64I-NEXT: addi a1, sp, 7
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __atomic_compare_exchange_1
+; RV64I-NEXT: lbu a3, 7(sp)
+; RV64I-NEXT: beqz a0, .LBB4_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_sat_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a2, a0, -4
+; RV64IA-NEXT: slli a4, a0, 3
+; RV64IA-NEXT: andi a0, a4, 24
+; RV64IA-NEXT: li a5, 255
+; RV64IA-NEXT: lw a3, 0(a2)
+; RV64IA-NEXT: sllw a4, a5, a4
+; RV64IA-NEXT: not a4, a4
+; RV64IA-NEXT: andi a1, a1, 255
+; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB4_3 Depth 2
+; RV64IA-NEXT: srlw a5, a3, a0
+; RV64IA-NEXT: sext.w a6, a3
+; RV64IA-NEXT: andi a5, a5, 255
+; RV64IA-NEXT: sub a7, a5, a1
+; RV64IA-NEXT: sltu a5, a5, a7
+; RV64IA-NEXT: addi a5, a5, -1
+; RV64IA-NEXT: and a5, a5, a7
+; RV64IA-NEXT: sllw a5, a5, a0
+; RV64IA-NEXT: and a3, a3, a4
+; RV64IA-NEXT: or a5, a3, a5
+; RV64IA-NEXT: .LBB4_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a3, (a2)
+; RV64IA-NEXT: bne a3, a6, .LBB4_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a7, a5, (a2)
+; RV64IA-NEXT: bnez a7, .LBB4_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: srlw a0, a3, a0
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; RV32I-LABEL: atomicrmw_usub_sat_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lhu a3, 0(a0)
+; RV32I-NEXT: lui s1, 16
+; RV32I-NEXT: addi s1, s1, -1
+; RV32I-NEXT: and s2, a1, s1
+; RV32I-NEXT: .LBB5_1: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: and a0, a3, s1
+; RV32I-NEXT: sub a1, a0, s2
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a2, a0, a1
+; RV32I-NEXT: sh a3, 14(sp)
+; RV32I-NEXT: addi a1, sp, 14
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __atomic_compare_exchange_2
+; RV32I-NEXT: lh a3, 14(sp)
+; RV32I-NEXT: beqz a0, .LBB5_1
+; RV32I-NEXT: # %bb.2: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_sat_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a2, a0, -4
+; RV32IA-NEXT: slli a4, a0, 3
+; RV32IA-NEXT: andi a0, a4, 24
+; RV32IA-NEXT: lui a3, 16
+; RV32IA-NEXT: addi a3, a3, -1
+; RV32IA-NEXT: lw a5, 0(a2)
+; RV32IA-NEXT: sll a4, a3, a4
+; RV32IA-NEXT: not a4, a4
+; RV32IA-NEXT: and a1, a1, a3
+; RV32IA-NEXT: .LBB5_1: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB5_3 Depth 2
+; RV32IA-NEXT: mv a6, a5
+; RV32IA-NEXT: srl a5, a5, a0
+; RV32IA-NEXT: and a5, a5, a3
+; RV32IA-NEXT: sub a7, a5, a1
+; RV32IA-NEXT: sltu a5, a5, a7
+; RV32IA-NEXT: addi a5, a5, -1
+; RV32IA-NEXT: and a5, a5, a7
+; RV32IA-NEXT: sll a5, a5, a0
+; RV32IA-NEXT: and a7, a6, a4
+; RV32IA-NEXT: or a7, a7, a5
+; RV32IA-NEXT: .LBB5_3: # %atomicrmw.start
+; RV32IA-NEXT: # Parent Loop BB5_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NEXT: bne a5, a6, .LBB5_1
+; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB5_3 Depth=2
+; RV32IA-NEXT: sc.w.rl t0, a7, (a2)
+; RV32IA-NEXT: bnez t0, .LBB5_3
+; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-NEXT: srl a0, a5, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_sat_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: .cfi_offset s2, -32
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: lhu a3, 0(a0)
+; RV64I-NEXT: lui s1, 16
+; RV64I-NEXT: addiw s1, s1, -1
+; RV64I-NEXT: and s2, a1, s1
+; RV64I-NEXT: .LBB5_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: and a0, a3, s1
+; RV64I-NEXT: sub a1, a0, s2
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: sh a3, 14(sp)
+; RV64I-NEXT: addi a1, sp, 14
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __atomic_compare_exchange_2
+; RV64I-NEXT: lh a3, 14(sp)
+; RV64I-NEXT: beqz a0, .LBB5_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_sat_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a2, a0, -4
+; RV64IA-NEXT: slli a5, a0, 3
+; RV64IA-NEXT: andi a0, a5, 24
+; RV64IA-NEXT: lui a3, 16
+; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: lw a4, 0(a2)
+; RV64IA-NEXT: sllw a5, a3, a5
+; RV64IA-NEXT: not a5, a5
+; RV64IA-NEXT: and a1, a1, a3
+; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB5_3 Depth 2
+; RV64IA-NEXT: srlw a6, a4, a0
+; RV64IA-NEXT: sext.w a7, a4
+; RV64IA-NEXT: and a6, a6, a3
+; RV64IA-NEXT: sub t0, a6, a1
+; RV64IA-NEXT: sltu a6, a6, t0
+; RV64IA-NEXT: addi a6, a6, -1
+; RV64IA-NEXT: and a6, a6, t0
+; RV64IA-NEXT: sllw a6, a6, a0
+; RV64IA-NEXT: and a4, a4, a5
+; RV64IA-NEXT: or a6, a4, a6
+; RV64IA-NEXT: .LBB5_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB5_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a4, (a2)
+; RV64IA-NEXT: bne a4, a7, .LBB5_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB5_3 Depth=2
+; RV64IA-NEXT: sc.w.rl t0, a6, (a2)
+; RV64IA-NEXT: bnez t0, .LBB5_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; RV32I-LABEL: atomicrmw_usub_sat_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lw a3, 0(a0)
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: .LBB6_1: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sub a0, a3, s1
+; RV32I-NEXT: sltu a1, a3, a0
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a2, a1, a0
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: mv a1, sp
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __atomic_compare_exchange_4
+; RV32I-NEXT: lw a3, 0(sp)
+; RV32I-NEXT: beqz a0, .LBB6_1
+; RV32I-NEXT: # %bb.2: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_sat_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a2, 0(a0)
+; RV32IA-NEXT: .LBB6_1: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB6_3 Depth 2
+; RV32IA-NEXT: mv a3, a2
+; RV32IA-NEXT: sub a2, a2, a1
+; RV32IA-NEXT: sltu a4, a3, a2
+; RV32IA-NEXT: addi a4, a4, -1
+; RV32IA-NEXT: and a4, a4, a2
+; RV32IA-NEXT: .LBB6_3: # %atomicrmw.start
+; RV32IA-NEXT: # Parent Loop BB6_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a2, (a0)
+; RV32IA-NEXT: bne a2, a3, .LBB6_1
+; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB6_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a5, a4, (a0)
+; RV32IA-NEXT: bnez a5, .LBB6_3
+; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-NEXT: mv a0, a2
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_sat_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: .LBB6_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: subw a0, a3, s1
+; RV64I-NEXT: sltu a1, a3, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a2, a1, a0
+; RV64I-NEXT: sw a3, 4(sp)
+; RV64I-NEXT: addi a1, sp, 4
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __atomic_compare_exchange_4
+; RV64I-NEXT: lw a3, 4(sp)
+; RV64I-NEXT: beqz a0, .LBB6_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_sat_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a2, 0(a0)
+; RV64IA-NEXT: .LBB6_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB6_3 Depth 2
+; RV64IA-NEXT: subw a3, a2, a1
+; RV64IA-NEXT: sext.w a4, a2
+; RV64IA-NEXT: sltu a2, a4, a3
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: and a3, a2, a3
+; RV64IA-NEXT: .LBB6_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB6_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a2, (a0)
+; RV64IA-NEXT: bne a2, a4, .LBB6_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB6_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a5, a3, (a0)
+; RV64IA-NEXT: bnez a5, .LBB6_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: mv a0, a2
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; RV32I-LABEL: atomicrmw_usub_sat_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: .cfi_def_cfa_offset 32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset ra, -4
+; RV32I-NEXT: .cfi_offset s0, -8
+; RV32I-NEXT: .cfi_offset s1, -12
+; RV32I-NEXT: .cfi_offset s2, -16
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lw a5, 4(a0)
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: .LBB7_1: # %atomicrmw.start
+; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
+; RV32I-NEXT: sltu a2, a5, a0
+; RV32I-NEXT: .LBB7_2: # %atomicrmw.start
+; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
+; RV32I-NEXT: addi a3, a2, -1
+; RV32I-NEXT: and a2, a3, a1
+; RV32I-NEXT: and a3, a3, a0
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: addi a1, sp, 8
+; RV32I-NEXT: li a4, 5
+; RV32I-NEXT: li a5, 5
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __atomic_compare_exchange_8
+; RV32I-NEXT: lw a5, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: bnez a0, .LBB7_5
+; RV32I-NEXT: .LBB7_3: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: sltu a0, a4, s2
+; RV32I-NEXT: sub a1, a5, s1
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: sub a1, a4, s2
+; RV32I-NEXT: bne a0, a5, .LBB7_1
+; RV32I-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
+; RV32I-NEXT: sltu a2, a4, a1
+; RV32I-NEXT: j .LBB7_2
+; RV32I-NEXT: .LBB7_5: # %atomicrmw.end
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_usub_sat_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -32
+; RV32IA-NEXT: .cfi_def_cfa_offset 32
+; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: .cfi_offset ra, -4
+; RV32IA-NEXT: .cfi_offset s0, -8
+; RV32IA-NEXT: .cfi_offset s1, -12
+; RV32IA-NEXT: .cfi_offset s2, -16
+; RV32IA-NEXT: mv s0, a0
+; RV32IA-NEXT: lw a5, 4(a0)
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s2, a1
+; RV32IA-NEXT: j .LBB7_3
+; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
+; RV32IA-NEXT: sltu a2, a5, a0
+; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start
+; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
+; RV32IA-NEXT: addi a3, a2, -1
+; RV32IA-NEXT: and a2, a3, a1
+; RV32IA-NEXT: and a3, a3, a0
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: addi a1, sp, 8
+; RV32IA-NEXT: li a4, 5
+; RV32IA-NEXT: li a5, 5
+; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: call __atomic_compare_exchange_8
+; RV32IA-NEXT: lw a5, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: bnez a0, .LBB7_5
+; RV32IA-NEXT: .LBB7_3: # %atomicrmw.start
+; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: sltu a0, a4, s2
+; RV32IA-NEXT: sub a1, a5, s1
+; RV32IA-NEXT: sub a0, a1, a0
+; RV32IA-NEXT: sub a1, a4, s2
+; RV32IA-NEXT: bne a0, a5, .LBB7_1
+; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
+; RV32IA-NEXT: sltu a2, a4, a1
+; RV32IA-NEXT: j .LBB7_2
+; RV32IA-NEXT: .LBB7_5: # %atomicrmw.end
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 32
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_usub_sat_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: .cfi_offset s0, -16
+; RV64I-NEXT: .cfi_offset s1, -24
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: .LBB7_1: # %atomicrmw.start
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: sub a0, a3, s1
+; RV64I-NEXT: sltu a1, a3, a0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a2, a1, a0
+; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: li a3, 5
+; RV64I-NEXT: li a4, 5
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __atomic_compare_exchange_8
+; RV64I-NEXT: ld a3, 0(sp)
+; RV64I-NEXT: beqz a0, .LBB7_1
+; RV64I-NEXT: # %bb.2: # %atomicrmw.end
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_usub_sat_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a2, 0(a0)
+; RV64IA-NEXT: .LBB7_1: # %atomicrmw.start
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB7_3 Depth 2
+; RV64IA-NEXT: mv a3, a2
+; RV64IA-NEXT: sub a2, a2, a1
+; RV64IA-NEXT: sltu a4, a3, a2
+; RV64IA-NEXT: addi a4, a4, -1
+; RV64IA-NEXT: and a4, a4, a2
+; RV64IA-NEXT: .LBB7_3: # %atomicrmw.start
+; RV64IA-NEXT: # Parent Loop BB7_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.d.aqrl a2, (a0)
+; RV64IA-NEXT: bne a2, a3, .LBB7_1
+; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
+; RV64IA-NEXT: # in Loop: Header=BB7_3 Depth=2
+; RV64IA-NEXT: sc.d.rl a5, a4, (a0)
+; RV64IA-NEXT: bnez a5, .LBB7_3
+; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
+; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
+; RV64IA-NEXT: mv a0, a2
+; RV64IA-NEXT: ret
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/VE/Scalar/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/VE/Scalar/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..860c4004658dba
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Scalar/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=ve-unknown-unknown < %s | FileCheck %s
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: and %s2, -4, %s0
+; CHECK-NEXT: and %s0, 3, %s0
+; CHECK-NEXT: sla.w.sx %s0, %s0, 3
+; CHECK-NEXT: sla.w.sx %s3, (56)0, %s0
+; CHECK-NEXT: ldl.sx %s5, (, %s2)
+; CHECK-NEXT: xor %s3, -1, %s3
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: and %s4, %s1, (56)0
+; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s6, 0, %s5
+; CHECK-NEXT: and %s5, %s6, (32)0
+; CHECK-NEXT: srl %s5, %s5, %s0
+; CHECK-NEXT: and %s7, %s5, (56)0
+; CHECK-NEXT: subs.w.sx %s34, %s5, %s1
+; CHECK-NEXT: cmpu.w %s7, %s7, %s4
+; CHECK-NEXT: cmov.w.ge %s5, %s34, %s7
+; CHECK-NEXT: and %s5, %s5, (56)0
+; CHECK-NEXT: sla.w.sx %s5, %s5, %s0
+; CHECK-NEXT: and %s7, %s6, %s3
+; CHECK-NEXT: or %s5, %s7, %s5
+; CHECK-NEXT: cas.w %s5, (%s2), %s6
+; CHECK-NEXT: brne.w %s5, %s6, .LBB0_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: and %s1, %s5, (32)0
+; CHECK-NEXT: srl %s0, %s1, %s0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: and %s2, -4, %s0
+; CHECK-NEXT: and %s0, 3, %s0
+; CHECK-NEXT: sla.w.sx %s0, %s0, 3
+; CHECK-NEXT: sla.w.sx %s3, (48)0, %s0
+; CHECK-NEXT: ldl.sx %s5, (, %s2)
+; CHECK-NEXT: xor %s3, -1, %s3
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: and %s4, %s1, (48)0
+; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s6, 0, %s5
+; CHECK-NEXT: and %s5, %s6, (32)0
+; CHECK-NEXT: srl %s5, %s5, %s0
+; CHECK-NEXT: and %s7, %s5, (48)0
+; CHECK-NEXT: subs.w.sx %s34, %s5, %s1
+; CHECK-NEXT: cmpu.w %s7, %s7, %s4
+; CHECK-NEXT: cmov.w.ge %s5, %s34, %s7
+; CHECK-NEXT: and %s5, %s5, (48)0
+; CHECK-NEXT: sla.w.sx %s5, %s5, %s0
+; CHECK-NEXT: and %s7, %s6, %s3
+; CHECK-NEXT: or %s5, %s7, %s5
+; CHECK-NEXT: cas.w %s5, (%s2), %s6
+; CHECK-NEXT: brne.w %s5, %s6, .LBB1_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: and %s1, %s5, (32)0
+; CHECK-NEXT: srl %s0, %s1, %s0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: ldl.sx %s2, (, %s0)
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s3, 0, %s2
+; CHECK-NEXT: subs.w.sx %s4, %s2, %s1
+; CHECK-NEXT: cmpu.w %s5, %s2, %s1
+; CHECK-NEXT: or %s2, 0, %s3
+; CHECK-NEXT: cmov.w.ge %s2, %s4, %s5
+; CHECK-NEXT: cas.w %s2, (%s0), %s3
+; CHECK-NEXT: brne.w %s2, %s3, .LBB2_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: or %s0, 0, %s2
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_sub_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_cond_sub_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: or %s2, 0, %s0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: ld %s0, (, %s0)
+; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s3, 0, %s0
+; CHECK-NEXT: subs.l %s4, %s0, %s1
+; CHECK-NEXT: cmpu.l %s5, %s0, %s1
+; CHECK-NEXT: cmov.l.ge %s0, %s4, %s5
+; CHECK-NEXT: cas.l %s0, (%s2), %s3
+; CHECK-NEXT: brne.l %s0, %s3, .LBB3_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s3, %s1, (32)0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: and %s1, -4, %s0
+; CHECK-NEXT: and %s0, 3, %s0
+; CHECK-NEXT: sla.w.sx %s0, %s0, 3
+; CHECK-NEXT: sla.w.sx %s2, (56)0, %s0
+; CHECK-NEXT: ldl.sx %s4, (, %s1)
+; CHECK-NEXT: xor %s2, -1, %s2
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: and %s3, %s3, (56)0
+; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s5, 0, %s4
+; CHECK-NEXT: and %s4, %s5, (32)0
+; CHECK-NEXT: srl %s4, %s4, %s0
+; CHECK-NEXT: and %s4, %s4, (56)0
+; CHECK-NEXT: subs.w.sx %s6, %s4, %s3
+; CHECK-NEXT: cmpu.w %s4, %s6, %s4
+; CHECK-NEXT: cmov.w.gt %s6, (0)1, %s4
+; CHECK-NEXT: sla.w.sx %s4, %s6, %s0
+; CHECK-NEXT: and %s6, %s5, %s2
+; CHECK-NEXT: or %s4, %s6, %s4
+; CHECK-NEXT: cas.w %s4, (%s1), %s5
+; CHECK-NEXT: brne.w %s4, %s5, .LBB4_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: and %s1, %s4, (32)0
+; CHECK-NEXT: srl %s0, %s1, %s0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s3, %s1, (32)0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: and %s1, -4, %s0
+; CHECK-NEXT: and %s0, 3, %s0
+; CHECK-NEXT: sla.w.sx %s0, %s0, 3
+; CHECK-NEXT: sla.w.sx %s2, (48)0, %s0
+; CHECK-NEXT: ldl.sx %s4, (, %s1)
+; CHECK-NEXT: xor %s2, -1, %s2
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: and %s3, %s3, (48)0
+; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s5, 0, %s4
+; CHECK-NEXT: and %s4, %s5, (32)0
+; CHECK-NEXT: srl %s4, %s4, %s0
+; CHECK-NEXT: and %s4, %s4, (48)0
+; CHECK-NEXT: subs.w.sx %s6, %s4, %s3
+; CHECK-NEXT: cmpu.w %s4, %s6, %s4
+; CHECK-NEXT: cmov.w.gt %s6, (0)1, %s4
+; CHECK-NEXT: sla.w.sx %s4, %s6, %s0
+; CHECK-NEXT: and %s6, %s5, %s2
+; CHECK-NEXT: or %s4, %s6, %s4
+; CHECK-NEXT: cas.w %s4, (%s1), %s5
+; CHECK-NEXT: brne.w %s4, %s5, .LBB5_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: and %s1, %s4, (32)0
+; CHECK-NEXT: srl %s0, %s1, %s0
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: ldl.sx %s2, (, %s0)
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s3, 0, %s2
+; CHECK-NEXT: subs.w.sx %s2, %s2, %s1
+; CHECK-NEXT: cmpu.w %s4, %s2, %s3
+; CHECK-NEXT: cmov.w.gt %s2, (0)1, %s4
+; CHECK-NEXT: cas.w %s2, (%s0), %s3
+; CHECK-NEXT: brne.w %s2, %s3, .LBB6_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: or %s0, 0, %s2
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; CHECK-LABEL: atomicrmw_usub_sat_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: ld %s2, (, %s0)
+; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or %s3, 0, %s2
+; CHECK-NEXT: subs.l %s2, %s2, %s1
+; CHECK-NEXT: cmpu.l %s4, %s2, %s3
+; CHECK-NEXT: cmov.l.gt %s2, (0)1, %s4
+; CHECK-NEXT: cas.l %s2, (%s0), %s3
+; CHECK-NEXT: brne.l %s2, %s3, .LBB7_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: or %s0, 0, %s2
+; CHECK-NEXT: b.l.t (, %s10)
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/WebAssembly/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/WebAssembly/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..3355237425b424
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=wasm32-unknown-unknown < %s | FileCheck -check-prefix=WASM32 %s
+; RUN: llc -mtriple=wasm64-unknown-unknown < %s | FileCheck -check-prefix=WASM64 %s
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; WASM32-LABEL: atomicrmw_usub_cond_i8:
+; WASM32: .functype atomicrmw_usub_cond_i8 (i32, i32) -> (i32)
+; WASM32-NEXT: .local i32
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.load8_u 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.sub
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.const 255
+; WASM32-NEXT: i32.and
+; WASM32-NEXT: i32.ge_u
+; WASM32-NEXT: i32.select
+; WASM32-NEXT: i32.store8 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_cond_i8:
+; WASM64: .functype atomicrmw_usub_cond_i8 (i64, i32) -> (i32)
+; WASM64-NEXT: .local i32
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.load8_u 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.sub
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.const 255
+; WASM64-NEXT: i32.and
+; WASM64-NEXT: i32.ge_u
+; WASM64-NEXT: i32.select
+; WASM64-NEXT: i32.store8 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; WASM32-LABEL: atomicrmw_usub_cond_i16:
+; WASM32: .functype atomicrmw_usub_cond_i16 (i32, i32) -> (i32)
+; WASM32-NEXT: .local i32
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.load16_u 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.sub
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.const 65535
+; WASM32-NEXT: i32.and
+; WASM32-NEXT: i32.ge_u
+; WASM32-NEXT: i32.select
+; WASM32-NEXT: i32.store16 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_cond_i16:
+; WASM64: .functype atomicrmw_usub_cond_i16 (i64, i32) -> (i32)
+; WASM64-NEXT: .local i32
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.load16_u 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.sub
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.const 65535
+; WASM64-NEXT: i32.and
+; WASM64-NEXT: i32.ge_u
+; WASM64-NEXT: i32.select
+; WASM64-NEXT: i32.store16 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; WASM32-LABEL: atomicrmw_usub_cond_i32:
+; WASM32: .functype atomicrmw_usub_cond_i32 (i32, i32) -> (i32)
+; WASM32-NEXT: .local i32
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.load 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.sub
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.ge_u
+; WASM32-NEXT: i32.select
+; WASM32-NEXT: i32.store 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_cond_i32:
+; WASM64: .functype atomicrmw_usub_cond_i32 (i64, i32) -> (i32)
+; WASM64-NEXT: .local i32
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.load 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.sub
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.ge_u
+; WASM64-NEXT: i32.select
+; WASM64-NEXT: i32.store 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; WASM32-LABEL: atomicrmw_usub_cond_i64:
+; WASM32: .functype atomicrmw_usub_cond_i64 (i32, i64) -> (i64)
+; WASM32-NEXT: .local i64
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i64.load 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i64.sub
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i64.ge_u
+; WASM32-NEXT: i64.select
+; WASM32-NEXT: i64.store 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_cond_i64:
+; WASM64: .functype atomicrmw_usub_cond_i64 (i64, i64) -> (i64)
+; WASM64-NEXT: .local i64
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i64.load 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i64.sub
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i64.ge_u
+; WASM64-NEXT: i64.select
+; WASM64-NEXT: i64.store 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; WASM32-LABEL: atomicrmw_usub_sat_i8:
+; WASM32: .functype atomicrmw_usub_sat_i8 (i32, i32) -> (i32)
+; WASM32-NEXT: .local i32
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.const 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.load8_u 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.const 255
+; WASM32-NEXT: i32.and
+; WASM32-NEXT: i32.sub
+; WASM32-NEXT: local.tee 1
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: i32.gt_u
+; WASM32-NEXT: i32.select
+; WASM32-NEXT: i32.store8 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_sat_i8:
+; WASM64: .functype atomicrmw_usub_sat_i8 (i64, i32) -> (i32)
+; WASM64-NEXT: .local i32
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.const 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.load8_u 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.const 255
+; WASM64-NEXT: i32.and
+; WASM64-NEXT: i32.sub
+; WASM64-NEXT: local.tee 1
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: i32.gt_u
+; WASM64-NEXT: i32.select
+; WASM64-NEXT: i32.store8 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; WASM32-LABEL: atomicrmw_usub_sat_i16:
+; WASM32: .functype atomicrmw_usub_sat_i16 (i32, i32) -> (i32)
+; WASM32-NEXT: .local i32
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.const 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.load16_u 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.const 65535
+; WASM32-NEXT: i32.and
+; WASM32-NEXT: i32.sub
+; WASM32-NEXT: local.tee 1
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: i32.gt_u
+; WASM32-NEXT: i32.select
+; WASM32-NEXT: i32.store16 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_sat_i16:
+; WASM64: .functype atomicrmw_usub_sat_i16 (i64, i32) -> (i32)
+; WASM64-NEXT: .local i32
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.const 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.load16_u 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.const 65535
+; WASM64-NEXT: i32.and
+; WASM64-NEXT: i32.sub
+; WASM64-NEXT: local.tee 1
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: i32.gt_u
+; WASM64-NEXT: i32.select
+; WASM64-NEXT: i32.store16 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; WASM32-LABEL: atomicrmw_usub_sat_i32:
+; WASM32: .functype atomicrmw_usub_sat_i32 (i32, i32) -> (i32)
+; WASM32-NEXT: .local i32
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.const 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32.load 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32.sub
+; WASM32-NEXT: local.tee 1
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: i32.gt_u
+; WASM32-NEXT: i32.select
+; WASM32-NEXT: i32.store 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_sat_i32:
+; WASM64: .functype atomicrmw_usub_sat_i32 (i64, i32) -> (i32)
+; WASM64-NEXT: .local i32
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.const 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32.load 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32.sub
+; WASM64-NEXT: local.tee 1
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: i32.gt_u
+; WASM64-NEXT: i32.select
+; WASM64-NEXT: i32.store 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; WASM32-LABEL: atomicrmw_usub_sat_i64:
+; WASM32: .functype atomicrmw_usub_sat_i64 (i32, i64) -> (i64)
+; WASM32-NEXT: .local i64
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i64.const 0
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i64.load 0
+; WASM32-NEXT: local.tee 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i64.sub
+; WASM32-NEXT: local.tee 1
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: i64.gt_u
+; WASM32-NEXT: i64.select
+; WASM32-NEXT: i64.store 0
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: atomicrmw_usub_sat_i64:
+; WASM64: .functype atomicrmw_usub_sat_i64 (i64, i64) -> (i64)
+; WASM64-NEXT: .local i64
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i64.const 0
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i64.load 0
+; WASM64-NEXT: local.tee 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i64.sub
+; WASM64-NEXT: local.tee 1
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: i64.gt_u
+; WASM64-NEXT: i64.select
+; WASM64-NEXT: i64.store 0
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: # fallthrough-return
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/X86/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/X86/atomicrmw-cond-sub-clamp.ll
new file mode 100644
index 00000000000000..04bfb4e367b9d7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomicrmw-cond-sub-clamp.ll
@@ -0,0 +1,436 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple i686-pc-linux < %s | FileCheck %s --check-prefix=CHECK-32
+; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck %s --check-prefix=CHECK-64
+
+define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_cond_i8:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movzbl (%edx), %eax
+; CHECK-32-NEXT: jmp .LBB0_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-32-NEXT: lock cmpxchgb %ah, (%edx)
+; CHECK-32-NEXT: je .LBB0_4
+; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: movb %al, %ah
+; CHECK-32-NEXT: subb %cl, %ah
+; CHECK-32-NEXT: jae .LBB0_3
+; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-32-NEXT: movb %al, %ah
+; CHECK-32-NEXT: jmp .LBB0_3
+; CHECK-32-NEXT: .LBB0_4: # %atomicrmw.end
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_cond_i8:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movzbl (%rdi), %eax
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movzbl %al, %ecx
+; CHECK-64-NEXT: subb %sil, %al
+; CHECK-64-NEXT: movzbl %al, %edx
+; CHECK-64-NEXT: cmovbl %ecx, %edx
+; CHECK-64-NEXT: movl %ecx, %eax
+; CHECK-64-NEXT: lock cmpxchgb %dl, (%rdi)
+; CHECK-64-NEXT: jne .LBB0_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_cond_i16:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: pushl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: .cfi_offset %esi, -8
+; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movzwl (%edx), %eax
+; CHECK-32-NEXT: jmp .LBB1_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB1_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB1_1 Depth=1
+; CHECK-32-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-32-NEXT: lock cmpxchgw %si, (%edx)
+; CHECK-32-NEXT: # kill: def $ax killed $ax def $eax
+; CHECK-32-NEXT: je .LBB1_4
+; CHECK-32-NEXT: .LBB1_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: movl %eax, %esi
+; CHECK-32-NEXT: subw %cx, %si
+; CHECK-32-NEXT: jae .LBB1_3
+; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB1_1 Depth=1
+; CHECK-32-NEXT: movl %eax, %esi
+; CHECK-32-NEXT: jmp .LBB1_3
+; CHECK-32-NEXT: .LBB1_4: # %atomicrmw.end
+; CHECK-32-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-32-NEXT: popl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_cond_i16:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movzwl (%rdi), %eax
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB1_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movl %eax, %ecx
+; CHECK-64-NEXT: subw %si, %cx
+; CHECK-64-NEXT: cmovbl %eax, %ecx
+; CHECK-64-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-64-NEXT: lock cmpxchgw %cx, (%rdi)
+; CHECK-64-NEXT: # kill: def $ax killed $ax def $eax
+; CHECK-64-NEXT: jne .LBB1_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_cond_i32:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: pushl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: .cfi_offset %esi, -8
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movl (%edx), %eax
+; CHECK-32-NEXT: jmp .LBB2_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB2_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB2_1 Depth=1
+; CHECK-32-NEXT: lock cmpxchgl %esi, (%edx)
+; CHECK-32-NEXT: je .LBB2_4
+; CHECK-32-NEXT: .LBB2_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: movl %eax, %esi
+; CHECK-32-NEXT: subl %ecx, %esi
+; CHECK-32-NEXT: jae .LBB2_3
+; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB2_1 Depth=1
+; CHECK-32-NEXT: movl %eax, %esi
+; CHECK-32-NEXT: jmp .LBB2_3
+; CHECK-32-NEXT: .LBB2_4: # %atomicrmw.end
+; CHECK-32-NEXT: popl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_cond_i32:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movl (%rdi), %eax
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB2_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movl %eax, %ecx
+; CHECK-64-NEXT: subl %esi, %ecx
+; CHECK-64-NEXT: cmovbl %eax, %ecx
+; CHECK-64-NEXT: lock cmpxchgl %ecx, (%rdi)
+; CHECK-64-NEXT: jne .LBB2_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_cond_i64:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: pushl %ebp
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: pushl %ebx
+; CHECK-32-NEXT: .cfi_def_cfa_offset 12
+; CHECK-32-NEXT: pushl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-32-NEXT: pushl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 20
+; CHECK-32-NEXT: .cfi_offset %esi, -20
+; CHECK-32-NEXT: .cfi_offset %edi, -16
+; CHECK-32-NEXT: .cfi_offset %ebx, -12
+; CHECK-32-NEXT: .cfi_offset %ebp, -8
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-32-NEXT: movl (%ebp), %eax
+; CHECK-32-NEXT: movl 4(%ebp), %edx
+; CHECK-32-NEXT: jmp .LBB3_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB3_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK-32-NEXT: lock cmpxchg8b (%ebp)
+; CHECK-32-NEXT: je .LBB3_4
+; CHECK-32-NEXT: .LBB3_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: movl %eax, %ebx
+; CHECK-32-NEXT: subl %edi, %ebx
+; CHECK-32-NEXT: movl %edx, %ecx
+; CHECK-32-NEXT: sbbl %esi, %ecx
+; CHECK-32-NEXT: jae .LBB3_3
+; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK-32-NEXT: movl %edx, %ecx
+; CHECK-32-NEXT: movl %eax, %ebx
+; CHECK-32-NEXT: jmp .LBB3_3
+; CHECK-32-NEXT: .LBB3_4: # %atomicrmw.end
+; CHECK-32-NEXT: popl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-32-NEXT: popl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 12
+; CHECK-32-NEXT: popl %ebx
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: popl %ebp
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_cond_i64:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movq (%rdi), %rax
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB3_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movq %rax, %rcx
+; CHECK-64-NEXT: subq %rsi, %rcx
+; CHECK-64-NEXT: cmovbq %rax, %rcx
+; CHECK-64-NEXT: lock cmpxchgq %rcx, (%rdi)
+; CHECK-64-NEXT: jne .LBB3_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_sat_i8:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: pushl %ebx
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: .cfi_offset %ebx, -8
+; CHECK-32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movzbl (%edx), %eax
+; CHECK-32-NEXT: jmp .LBB4_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB4_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB4_1 Depth=1
+; CHECK-32-NEXT: lock cmpxchgb %bl, (%edx)
+; CHECK-32-NEXT: je .LBB4_4
+; CHECK-32-NEXT: .LBB4_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: movl %eax, %ebx
+; CHECK-32-NEXT: subb %cl, %bl
+; CHECK-32-NEXT: jae .LBB4_3
+; CHECK-32-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; CHECK-32-NEXT: xorl %ebx, %ebx
+; CHECK-32-NEXT: jmp .LBB4_3
+; CHECK-32-NEXT: .LBB4_4: # %atomicrmw.end
+; CHECK-32-NEXT: popl %ebx
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_sat_i8:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movzbl (%rdi), %eax
+; CHECK-64-NEXT: xorl %ecx, %ecx
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB4_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movl %eax, %edx
+; CHECK-64-NEXT: subb %sil, %dl
+; CHECK-64-NEXT: movzbl %dl, %edx
+; CHECK-64-NEXT: cmovbl %ecx, %edx
+; CHECK-64-NEXT: lock cmpxchgb %dl, (%rdi)
+; CHECK-64-NEXT: jne .LBB4_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
+ ret i8 %result
+}
+
+define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_sat_i16:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: pushl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: pushl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 12
+; CHECK-32-NEXT: .cfi_offset %esi, -12
+; CHECK-32-NEXT: .cfi_offset %edi, -8
+; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movzwl (%edx), %eax
+; CHECK-32-NEXT: jmp .LBB5_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB5_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB5_1 Depth=1
+; CHECK-32-NEXT: lock cmpxchgw %si, (%edx)
+; CHECK-32-NEXT: je .LBB5_4
+; CHECK-32-NEXT: .LBB5_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: xorl %esi, %esi
+; CHECK-32-NEXT: movl %eax, %edi
+; CHECK-32-NEXT: subw %cx, %di
+; CHECK-32-NEXT: jb .LBB5_3
+; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB5_1 Depth=1
+; CHECK-32-NEXT: movl %edi, %esi
+; CHECK-32-NEXT: jmp .LBB5_3
+; CHECK-32-NEXT: .LBB5_4: # %atomicrmw.end
+; CHECK-32-NEXT: popl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: popl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_sat_i16:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movzwl (%rdi), %eax
+; CHECK-64-NEXT: xorl %ecx, %ecx
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB5_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movl %eax, %edx
+; CHECK-64-NEXT: subw %si, %dx
+; CHECK-64-NEXT: cmovbl %ecx, %edx
+; CHECK-64-NEXT: lock cmpxchgw %dx, (%rdi)
+; CHECK-64-NEXT: jne .LBB5_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
+ ret i16 %result
+}
+
+define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_sat_i32:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: pushl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: pushl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 12
+; CHECK-32-NEXT: .cfi_offset %esi, -12
+; CHECK-32-NEXT: .cfi_offset %edi, -8
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movl (%edx), %eax
+; CHECK-32-NEXT: jmp .LBB6_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB6_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB6_1 Depth=1
+; CHECK-32-NEXT: lock cmpxchgl %esi, (%edx)
+; CHECK-32-NEXT: je .LBB6_4
+; CHECK-32-NEXT: .LBB6_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: xorl %esi, %esi
+; CHECK-32-NEXT: movl %eax, %edi
+; CHECK-32-NEXT: subl %ecx, %edi
+; CHECK-32-NEXT: jb .LBB6_3
+; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB6_1 Depth=1
+; CHECK-32-NEXT: movl %edi, %esi
+; CHECK-32-NEXT: jmp .LBB6_3
+; CHECK-32-NEXT: .LBB6_4: # %atomicrmw.end
+; CHECK-32-NEXT: popl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: popl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_sat_i32:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movl (%rdi), %eax
+; CHECK-64-NEXT: xorl %ecx, %ecx
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB6_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movl %eax, %edx
+; CHECK-64-NEXT: subl %esi, %edx
+; CHECK-64-NEXT: cmovbl %ecx, %edx
+; CHECK-64-NEXT: lock cmpxchgl %edx, (%rdi)
+; CHECK-64-NEXT: jne .LBB6_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
+; CHECK-32-LABEL: atomicrmw_usub_sat_i64:
+; CHECK-32: # %bb.0:
+; CHECK-32-NEXT: pushl %ebp
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: pushl %ebx
+; CHECK-32-NEXT: .cfi_def_cfa_offset 12
+; CHECK-32-NEXT: pushl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-32-NEXT: pushl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 20
+; CHECK-32-NEXT: .cfi_offset %esi, -20
+; CHECK-32-NEXT: .cfi_offset %edi, -16
+; CHECK-32-NEXT: .cfi_offset %ebx, -12
+; CHECK-32-NEXT: .cfi_offset %ebp, -8
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK-32-NEXT: movl (%ebp), %esi
+; CHECK-32-NEXT: movl 4(%ebp), %edi
+; CHECK-32-NEXT: jmp .LBB7_1
+; CHECK-32-NEXT: .p2align 4, 0x90
+; CHECK-32-NEXT: .LBB7_3: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB7_1 Depth=1
+; CHECK-32-NEXT: movl %esi, %eax
+; CHECK-32-NEXT: movl %edi, %edx
+; CHECK-32-NEXT: lock cmpxchg8b (%ebp)
+; CHECK-32-NEXT: movl %eax, %esi
+; CHECK-32-NEXT: movl %edx, %edi
+; CHECK-32-NEXT: je .LBB7_4
+; CHECK-32-NEXT: .LBB7_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-32-NEXT: xorl %ecx, %ecx
+; CHECK-32-NEXT: movl %esi, %eax
+; CHECK-32-NEXT: subl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movl %edi, %edx
+; CHECK-32-NEXT: sbbl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movl $0, %ebx
+; CHECK-32-NEXT: jb .LBB7_3
+; CHECK-32-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-32-NEXT: # in Loop: Header=BB7_1 Depth=1
+; CHECK-32-NEXT: movl %edx, %ecx
+; CHECK-32-NEXT: movl %eax, %ebx
+; CHECK-32-NEXT: jmp .LBB7_3
+; CHECK-32-NEXT: .LBB7_4: # %atomicrmw.end
+; CHECK-32-NEXT: movl %esi, %eax
+; CHECK-32-NEXT: movl %edi, %edx
+; CHECK-32-NEXT: popl %esi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 16
+; CHECK-32-NEXT: popl %edi
+; CHECK-32-NEXT: .cfi_def_cfa_offset 12
+; CHECK-32-NEXT: popl %ebx
+; CHECK-32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-32-NEXT: popl %ebp
+; CHECK-32-NEXT: .cfi_def_cfa_offset 4
+; CHECK-32-NEXT: retl
+;
+; CHECK-64-LABEL: atomicrmw_usub_sat_i64:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: movq (%rdi), %rax
+; CHECK-64-NEXT: xorl %ecx, %ecx
+; CHECK-64-NEXT: .p2align 4, 0x90
+; CHECK-64-NEXT: .LBB7_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-64-NEXT: movq %rax, %rdx
+; CHECK-64-NEXT: subq %rsi, %rdx
+; CHECK-64-NEXT: cmovbq %rcx, %rdx
+; CHECK-64-NEXT: lock cmpxchgq %rdx, (%rdi)
+; CHECK-64-NEXT: jne .LBB7_1
+; CHECK-64-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-64-NEXT: retq
+ %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
index 00601b7ae6e0d2..bafc19a2b15de3 100644
--- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
+++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td
@@ -136,14 +136,14 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 6*/ GIMT_Encode4([[#DEFAULT:]]),
-// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(474), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(510), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(557), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(591), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
-// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(614), GIMT_Encode4(0),
-// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(626),
+// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(482), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(518), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(565), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(599), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
+// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(622), GIMT_Encode4(0),
+// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(634),
// CHECK-NEXT: // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]]
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(498), // Rule ID 4 //
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(506), // Rule ID 4 //
// CHECK-NEXT: GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything),
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled),
// CHECK-NEXT: // MIs[0] a
@@ -156,8 +156,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIM_CheckIsSafeToFold, /*NumInsns*/1,
// CHECK-NEXT: // Combiner Rule #3: InstTest1
// CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner2),
-// CHECK-NEXT: // Label 7: @498
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(509), // Rule ID 3 //
+// CHECK-NEXT: // Label 7: @506
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(517), // Rule ID 3 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled),
// CHECK-NEXT: // MIs[0] a
// CHECK-NEXT: // No operand predicates
@@ -165,10 +165,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: // No operand predicates
// CHECK-NEXT: // Combiner Rule #2: InstTest0
// CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner1),
-// CHECK-NEXT: // Label 8: @509
+// CHECK-NEXT: // Label 8: @517
// CHECK-NEXT: GIM_Reject,
-// CHECK-NEXT: // Label 1: @510
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(556), // Rule ID 6 //
+// CHECK-NEXT: // Label 1: @518
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(564), // Rule ID 6 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled),
// CHECK-NEXT: GIM_RootCheckType, /*Op*/2, /*Type*/GILLT_s32,
// CHECK-NEXT: // MIs[0] dst
@@ -185,10 +185,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // dst
// CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/1, /*OpIdx*/1, // z
// CHECK-NEXT: GIR_EraseRootFromParent_Done,
-// CHECK-NEXT: // Label 9: @556
+// CHECK-NEXT: // Label 9: @564
// CHECK-NEXT: GIM_Reject,
-// CHECK-NEXT: // Label 2: @557
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(590), // Rule ID 5 //
+// CHECK-NEXT: // Label 2: @565
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(598), // Rule ID 5 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled),
// CHECK-NEXT: // MIs[0] tmp
// CHECK-NEXT: GIM_RecordInsnIgnoreCopies, /*DefineMI*/1, /*MI*/0, /*OpIdx*/0, // MIs[1]
@@ -204,29 +204,29 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/1, // ptr
// CHECK-NEXT: GIR_MergeMemOperands, /*InsnID*/0, /*NumInsns*/2, /*MergeInsnID's*/0, 1,
// CHECK-NEXT: GIR_EraseRootFromParent_Done,
-// CHECK-NEXT: // Label 10: @590
+// CHECK-NEXT: // Label 10: @598
// CHECK-NEXT: GIM_Reject,
-// CHECK-NEXT: // Label 3: @591
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(602), // Rule ID 0 //
+// CHECK-NEXT: // Label 3: @599
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(610), // Rule ID 0 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled),
// CHECK-NEXT: // Combiner Rule #0: WipOpcodeTest0; wip_match_opcode 'G_TRUNC'
// CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0),
-// CHECK-NEXT: // Label 11: @602
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(613), // Rule ID 1 //
+// CHECK-NEXT: // Label 11: @610
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(621), // Rule ID 1 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled),
// CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_TRUNC'
// CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0),
-// CHECK-NEXT: // Label 12: @613
+// CHECK-NEXT: // Label 12: @621
// CHECK-NEXT: GIM_Reject,
-// CHECK-NEXT: // Label 4: @614
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(625), // Rule ID 2 //
+// CHECK-NEXT: // Label 4: @622
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(633), // Rule ID 2 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled),
// CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_SEXT'
// CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0),
-// CHECK-NEXT: // Label 13: @625
+// CHECK-NEXT: // Label 13: @633
// CHECK-NEXT: GIM_Reject,
-// CHECK-NEXT: // Label 5: @626
-// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(660), // Rule ID 7 //
+// CHECK-NEXT: // Label 5: @634
+// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(668), // Rule ID 7 //
// CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule6Enabled),
// CHECK-NEXT: // MIs[0] dst
// CHECK-NEXT: // No operand predicates
@@ -240,7 +240,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // dst
// CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0,
// CHECK-NEXT: GIR_EraseRootFromParent_Done,
-// CHECK-NEXT: // Label 14: @660
+// CHECK-NEXT: // Label 14: @668
// CHECK-NEXT: GIM_Reject,
// CHECK-NEXT: // Label 6: @[[#%u, DEFAULT]]
// CHECK-NEXT: GIM_Reject,
diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td
index 853831366fa531..b9aea33ac96aaa 100644
--- a/llvm/test/TableGen/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter.td
@@ -513,7 +513,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
// R00O-NEXT: GIM_Reject,
// R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
// R00O-NEXT: GIM_Reject,
-// R00O-NEXT: }; // Size: 1816 bytes
+// R00O-NEXT: }; // Size: 1824 bytes
def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
[(set GPR32:$dst,
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
index f41a97f9ecc818..4a43c16903394f 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
@@ -106,6 +106,10 @@ def AtomicBinOpUIncWrap : LLVM_EnumAttrCase<"uinc_wrap",
"uinc_wrap", "UIncWrap", 15>;
def AtomicBinOpUDecWrap : LLVM_EnumAttrCase<"udec_wrap",
"udec_wrap", "UDecWrap", 16>;
+def AtomicBinOpUSubCond : LLVM_EnumAttrCase<"usub_cond",
+ "usub_cond", "USubCond", 17>;
+def AtomicBinOpUSubSat : LLVM_EnumAttrCase<"usub_sat",
+ "usub_sat", "USubSat", 18>;
// A sentinel value that has no MLIR counterpart.
def AtomicBadBinOp : LLVM_EnumAttrCase<"", "", "BAD_BINOP", 0>;
@@ -118,7 +122,7 @@ def AtomicBinOp : LLVM_EnumAttr<
AtomicBinOpNand, AtomicBinOpOr, AtomicBinOpXor, AtomicBinOpMax,
AtomicBinOpMin, AtomicBinOpUMax, AtomicBinOpUMin, AtomicBinOpFAdd,
AtomicBinOpFSub, AtomicBinOpFMax, AtomicBinOpFMin, AtomicBinOpUIncWrap,
- AtomicBinOpUDecWrap],
+ AtomicBinOpUDecWrap, AtomicBinOpUSubCond, AtomicBinOpUSubSat],
[AtomicBadBinOp]> {
let cppNamespace = "::mlir::LLVM";
}
diff --git a/mlir/test/Target/LLVMIR/Import/instructions.ll b/mlir/test/Target/LLVMIR/Import/instructions.ll
index 3b1dcee1e85c7c..f75c79ea633804 100644
--- a/mlir/test/Target/LLVMIR/Import/instructions.ll
+++ b/mlir/test/Target/LLVMIR/Import/instructions.ll
@@ -440,11 +440,15 @@ define void @atomic_rmw(ptr %ptr1, i32 %val1, ptr %ptr2, float %val2) {
%16 = atomicrmw uinc_wrap ptr %ptr1, i32 %val1 acquire
; CHECK: llvm.atomicrmw udec_wrap %[[PTR1]], %[[VAL1]] acquire
%17 = atomicrmw udec_wrap ptr %ptr1, i32 %val1 acquire
+ ; CHECK: llvm.atomicrmw usub_cond %[[PTR1]], %[[VAL1]] acquire
+ %18 = atomicrmw usub_cond ptr %ptr1, i32 %val1 acquire
+ ; CHECK: llvm.atomicrmw usub_sat %[[PTR1]], %[[VAL1]] acquire
+ %19 = atomicrmw usub_sat ptr %ptr1, i32 %val1 acquire
; CHECK: llvm.atomicrmw volatile
; CHECK-SAME: syncscope("singlethread")
; CHECK-SAME: {alignment = 8 : i64}
- %18 = atomicrmw volatile udec_wrap ptr %ptr1, i32 %val1 syncscope("singlethread") acquire, align 8
+ %20 = atomicrmw volatile udec_wrap ptr %ptr1, i32 %val1 syncscope("singlethread") acquire, align 8
ret void
}
diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir
index d2cd0221e0ea7a..9086963c6f6830 100644
--- a/mlir/test/Target/LLVMIR/llvmir.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir.mlir
@@ -1522,11 +1522,15 @@ llvm.func @atomicrmw(
%15 = llvm.atomicrmw uinc_wrap %i32_ptr, %i32 monotonic : !llvm.ptr, i32
// CHECK: atomicrmw udec_wrap ptr %{{.*}}, i32 %{{.*}} monotonic
%16 = llvm.atomicrmw udec_wrap %i32_ptr, %i32 monotonic : !llvm.ptr, i32
+ // CHECK: atomicrmw usub_cond ptr %{{.*}}, i32 %{{.*}} monotonic
+ %17 = llvm.atomicrmw usub_cond %i32_ptr, %i32 monotonic : !llvm.ptr, i32
+ // CHECK: atomicrmw usub_sat ptr %{{.*}}, i32 %{{.*}} monotonic
+ %18 = llvm.atomicrmw usub_sat %i32_ptr, %i32 monotonic : !llvm.ptr, i32
// CHECK: atomicrmw volatile
// CHECK-SAME: syncscope("singlethread")
// CHECK-SAME: align 8
- %17 = llvm.atomicrmw volatile udec_wrap %i32_ptr, %i32 syncscope("singlethread") monotonic {alignment = 8 : i64} : !llvm.ptr, i32
+ %19 = llvm.atomicrmw volatile udec_wrap %i32_ptr, %i32 syncscope("singlethread") monotonic {alignment = 8 : i64} : !llvm.ptr, i32
llvm.return
}
More information about the Mlir-commits
mailing list