[Mlir-commits] [llvm] [mlir] Add usub_cond and usub_sat operations to atomicrmw (PR #105553)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Aug 21 10:07:01 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-llvm
Author: None (anjenner)
<details>
<summary>Changes</summary>
These both perform conditional subtraction, returning the minuend and zero respectively, if the difference is negative.
---
Patch is 393.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105553.diff
81 Files Affected:
- (modified) llvm/bindings/ocaml/llvm/llvm.ml (+6)
- (modified) llvm/bindings/ocaml/llvm/llvm.mli (+6)
- (modified) llvm/docs/AMDGPUUsage.rst (-5)
- (modified) llvm/docs/GlobalISel/GenericOpcode.rst (+3-1)
- (modified) llvm/docs/LangRef.rst (+4)
- (modified) llvm/docs/ReleaseNotes.rst (+6)
- (modified) llvm/include/llvm/AsmParser/LLToken.h (+2)
- (modified) llvm/include/llvm/Bitcode/LLVMBitCodes.h (+3-1)
- (modified) llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h (+34)
- (modified) llvm/include/llvm/CodeGen/ISDOpcodes.h (+2)
- (modified) llvm/include/llvm/CodeGen/SelectionDAGNodes.h (+22-18)
- (modified) llvm/include/llvm/IR/Instructions.h (+9-1)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (-8)
- (modified) llvm/include/llvm/Support/TargetOpcodes.def (+3-1)
- (modified) llvm/include/llvm/Target/GenericOpcodes.td (+2)
- (modified) llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td (+2)
- (modified) llvm/include/llvm/Target/TargetSelectionDAG.td (+4)
- (modified) llvm/lib/AsmParser/LLLexer.cpp (+2)
- (modified) llvm/lib/AsmParser/LLParser.cpp (+6)
- (modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (+4)
- (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+4)
- (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+7-1)
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+6)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+9-15)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+6)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp (+4)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+2)
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+7-4)
- (modified) llvm/lib/IR/Instructions.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUGISel.td (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (+2-4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+7-3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (+9-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+2-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-6)
- (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+1-1)
- (modified) llvm/lib/Target/AMDGPU/DSInstructions.td (+35-11)
- (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+7-16)
- (modified) llvm/lib/Target/AMDGPU/R600ISelLowering.cpp (+8)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+10-25)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+2)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+3-1)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+2)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+3-1)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+2)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp (+2)
- (modified) llvm/lib/Transforms/Utils/LowerAtomic.cpp (+11)
- (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir (+6)
- (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll (-57)
- (modified) llvm/test/Assembler/atomic.ll (+10)
- (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+147)
- (modified) llvm/test/Bitcode/compatibility.ll (+28)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (+6)
- (added) llvm/test/CodeGen/AArch64/atomicrmw-cond-sub-clamp.ll (+142)
- (removed) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll (-215)
- (added) llvm/test/CodeGen/AMDGPU/atomicrmw_cond_sub.ll (+197)
- (added) llvm/test/CodeGen/AMDGPU/atomicrmw_sub_clamp.ll (+495)
- (modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+100-60)
- (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll (+7-5)
- (modified) llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll (+14-4)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll (-219)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.csub.ll (+4-6)
- (modified) llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll (+52)
- (modified) llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll (+1-1)
- (added) llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll (+186)
- (added) llvm/test/CodeGen/Hexagon/atomicrmw-cond-sub-clamp.ll (+355)
- (added) llvm/test/CodeGen/LoongArch/atomicrmw-cond-sub-clamp.ll (+362)
- (added) llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll (+396)
- (added) llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll (+1412)
- (added) llvm/test/CodeGen/SPARC/atomicrmw-cond-sub-clamp.ll (+326)
- (added) llvm/test/CodeGen/VE/Scalar/atomicrmw-cond-sub-clamp.ll (+240)
- (added) llvm/test/CodeGen/WebAssembly/atomicrmw-cond-sub-clamp.ll (+355)
- (added) llvm/test/CodeGen/X86/atomicrmw-cond-sub-clamp.ll (+413)
- (modified) llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td (+27-27)
- (modified) llvm/test/TableGen/GlobalISelEmitter.td (+1-1)
- (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll (+358)
- (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll (+798)
- (modified) mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td (+5-1)
- (modified) mlir/test/Target/LLVMIR/Import/instructions.ll (+5-1)
- (modified) mlir/test/Target/LLVMIR/llvmir.mlir (+5-1)
``````````diff
diff --git a/llvm/bindings/ocaml/llvm/llvm.ml b/llvm/bindings/ocaml/llvm/llvm.ml
index 908e6658a89f73..74ba31389b378e 100644
--- a/llvm/bindings/ocaml/llvm/llvm.ml
+++ b/llvm/bindings/ocaml/llvm/llvm.ml
@@ -296,6 +296,12 @@ module AtomicRMWBinOp = struct
| UMin
| FAdd
| FSub
+ | FMax
+ | FMin
+ | UInc_Wrap
+ | UDec_Wrap
+ | USub_Cond
+ | USub_Sat
end
module ValueKind = struct
diff --git a/llvm/bindings/ocaml/llvm/llvm.mli b/llvm/bindings/ocaml/llvm/llvm.mli
index b8a430adf6cf2d..076e651ba158fc 100644
--- a/llvm/bindings/ocaml/llvm/llvm.mli
+++ b/llvm/bindings/ocaml/llvm/llvm.mli
@@ -331,6 +331,12 @@ module AtomicRMWBinOp : sig
| UMin
| FAdd
| FSub
+ | FMax
+ | FMin
+ | UInc_Wrap
+ | UDec_Wrap
+ | USub_Cond
+ | USub_Sat
end
(** The kind of an [llvalue], the result of [classify_value v].
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 80140734cbefd6..7eef420fdeaf1c 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1358,11 +1358,6 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
The iglp_opt strategy implementations are subject to change.
- llvm.amdgcn.atomic.cond.sub.u32 Provides direct access to flat_atomic_cond_sub_u32, global_atomic_cond_sub_u32
- and ds_cond_sub_u32 based on address space on gfx12 targets. This
- performs subtraction only if the memory value is greater than or
- equal to the data value.
-
llvm.amdgcn.s.getpc Provides access to the s_getpc_b64 instruction, but with the return value
sign-extended from the width of the underlying PC hardware register even on
processors where the s_getpc_b64 instruction returns a zero-extended value.
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index d32aeff5a69bb1..bba56d9a5c0ec2 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -863,7 +863,9 @@ operands.
G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD,
G_ATOMICRMW_FSUB, G_ATOMICRMW_FMAX,
- G_ATOMICRMW_FMIN
+ G_ATOMICRMW_FMIN, G_ATOMICRMW_UINC_WRAP,
+ G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_USUB_COND,
+ G_ATOMICRMW_USUB_SAT
Generic atomicrmw. Expects a MachineMemOperand in addition to explicit
operands.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 445980a18e7e93..d44db5999dbe2d 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -11241,6 +11241,8 @@ operation. The operation must be one of the following keywords:
- fmin
- uinc_wrap
- udec_wrap
+- usub_cond
+- usub_sat
For most of these operations, the type of '<value>' must be an integer
type whose bit width is a power of two greater than or equal to eight
@@ -11291,6 +11293,8 @@ operation argument:
- fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic)
- uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` (increment value with wraparound to zero when incremented above input value)
- udec_wrap: ``*ptr = ((*ptr == 0) || (*ptr u> val)) ? val : (*ptr - 1)`` (decrement with wraparound to input value when decremented below zero).
+- usub_cond: ``*ptr = (*ptr u>= val) ? *ptr - val : *ptr`` (subtract only if no unsigned overflow).
+- usub_sat: ``*ptr = (*ptr u>= val) ? *ptr - val : 0`` (subtract with clamping to zero).
Example:
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 65fa21e517940b..f6d6b3576722c1 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -53,6 +53,8 @@ Changes to the LLVM IR
* The ``x86_mmx`` IR type has been removed. It will be translated to
the standard vector type ``<1 x i64>`` in bitcode upgrade.
+* Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``.
+
Changes to LLVM infrastructure
------------------------------
@@ -75,6 +77,10 @@ Changes to the AArch64 Backend
Changes to the AMDGPU Backend
-----------------------------
+* Removed ``llvm.amdgcn.atomic.cond.sub.u32`` and
+ ``llvm.amdgcn.atomic.csub.u32`` intrinsics. :ref:`atomicrmw <i_atomicrmw>`
+ should be used instead with ``usub_cond`` and ``usub_sat``.
+
Changes to the ARM Backend
--------------------------
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index db6780b70ca5aa..19029842a572a4 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -268,6 +268,8 @@ enum Kind {
kw_fmin,
kw_uinc_wrap,
kw_udec_wrap,
+ kw_usub_cond,
+ kw_usub_sat,
// Instruction Opcodes (Opcode in UIntVal).
kw_fneg,
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 4beac37a583445..49a48f1c1510c3 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -485,7 +485,9 @@ enum RMWOperations {
RMW_FMAX = 13,
RMW_FMIN = 14,
RMW_UINC_WRAP = 15,
- RMW_UDEC_WRAP = 16
+ RMW_UDEC_WRAP = 16,
+ RMW_USUB_COND = 17,
+ RMW_USUB_SAT = 18
};
/// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 56a77b8596a18b..d3b3ffe15285e1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1636,6 +1636,40 @@ class MachineIRBuilder {
const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
MachineMemOperand &MMO);
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_USUB_COND Addr, Val, MMO`.
+ ///
+ /// Atomically replace the value at \p Addr with the original value minus \p Val
+ /// if the original value is greater than or equal to \p Val, or leaves it
+ /// unchanged otherwise. Puts the original value from \p Addr in \p OldValRes.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p OldValRes must be a generic virtual register.
+ /// \pre \p Addr must be a generic virtual register with pointer type.
+ /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+ /// same type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildAtomicRMWCondSub(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO);
+
+ /// Build and insert `OldValRes<def> = G_ATOMICRMW_SUB_CLAMP Addr, Val, MMO`.
+ ///
+ /// Atomically replace the value at \p Addr with the original value minus \p Val
+ /// if the original value is greater than or equal to \p Val, or with zero
+ /// otherwise. Puts the original value from \p Addr in \p OldValRes.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p OldValRes must be a generic virtual register.
+ /// \pre \p Addr must be a generic virtual register with pointer type.
+ /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+ /// same type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildAtomicRMWSubClamp(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO);
+
/// Build and insert `G_FENCE Ordering, Scope`.
MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope);
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 86ff2628975942..c3fbb20dbc3b17 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1345,6 +1345,8 @@ enum NodeType {
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_UINC_WRAP,
ATOMIC_LOAD_UDEC_WRAP,
+ ATOMIC_LOAD_USUB_COND,
+ ATOMIC_LOAD_USUB_SAT,
/// Masked load and store - consecutive vector load and store operations
/// with additional mask operand that prevents memory accesses to the
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 88549d9c9a2858..6067b3b29ea181 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1484,6 +1484,8 @@ class MemSDNode : public SDNode {
case ISD::ATOMIC_LOAD_FMIN:
case ISD::ATOMIC_LOAD_UINC_WRAP:
case ISD::ATOMIC_LOAD_UDEC_WRAP:
+ case ISD::ATOMIC_LOAD_USUB_COND:
+ case ISD::ATOMIC_LOAD_USUB_SAT:
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE:
case ISD::MLOAD:
@@ -1550,27 +1552,29 @@ class AtomicSDNode : public MemSDNode {
// Methods to support isa and dyn_cast
static bool classof(const SDNode *N) {
- return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
+ return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
- N->getOpcode() == ISD::ATOMIC_SWAP ||
- N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
- N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
- N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
- N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
- N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
- N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
- N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
- N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
- N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
- N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
- N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FMAX ||
- N->getOpcode() == ISD::ATOMIC_LOAD_FMIN ||
+ N->getOpcode() == ISD::ATOMIC_SWAP ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FMAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FMIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP ||
N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP ||
- N->getOpcode() == ISD::ATOMIC_LOAD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_USUB_COND ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_USUB_SAT ||
+ N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE;
}
};
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index dbd7d49a3e7672..41f4a51a782304 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -751,8 +751,16 @@ class AtomicRMWInst : public Instruction {
/// *p = ((old == 0) || (old u> v)) ? v : (old - 1)
UDecWrap,
+ /// Subtract only if result would be positive.
+ /// *p = (old u>= v) ? old - v : old
+ USubCond,
+
+ /// Subtract with clamping of negative results to zero.
+ /// *p = (old u>= v) ? old - v : 0
+ USubSat,
+
FIRST_BINOP = Xchg,
- LAST_BINOP = UDecWrap,
+ LAST_BINOP = USubSat,
BAD_BINOP
};
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 539410f1ed05e6..f61822578096c1 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1353,7 +1353,6 @@ def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
-def int_amdgcn_raw_buffer_atomic_cond_sub_u32 : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
[llvm_anyint_ty],
[LLVMMatchType<0>, // src(VGPR)
@@ -1390,7 +1389,6 @@ def int_amdgcn_raw_ptr_buffer_atomic_or : AMDGPURawPtrBufferAtomic;
def int_amdgcn_raw_ptr_buffer_atomic_xor : AMDGPURawPtrBufferAtomic;
def int_amdgcn_raw_ptr_buffer_atomic_inc : AMDGPURawPtrBufferAtomic;
def int_amdgcn_raw_ptr_buffer_atomic_dec : AMDGPURawPtrBufferAtomic;
-def int_amdgcn_raw_ptr_buffer_atomic_cond_sub_u32 : AMDGPURawPtrBufferAtomic;
def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
[llvm_anyint_ty],
[LLVMMatchType<0>, // src(VGPR)
@@ -1431,7 +1429,6 @@ def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
-def int_amdgcn_struct_buffer_atomic_cond_sub_u32 : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
[llvm_anyint_ty],
[LLVMMatchType<0>, // src(VGPR)
@@ -1467,7 +1464,6 @@ def int_amdgcn_struct_ptr_buffer_atomic_or : AMDGPUStructPtrBufferAtomic;
def int_amdgcn_struct_ptr_buffer_atomic_xor : AMDGPUStructPtrBufferAtomic;
def int_amdgcn_struct_ptr_buffer_atomic_inc : AMDGPUStructPtrBufferAtomic;
def int_amdgcn_struct_ptr_buffer_atomic_dec : AMDGPUStructPtrBufferAtomic;
-def int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32 : AMDGPUStructPtrBufferAtomic;
def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
[llvm_anyint_ty],
[LLVMMatchType<0>, // src(VGPR)
@@ -2463,8 +2459,6 @@ class AMDGPUAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic <
[IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "",
[SDNPMemOperand]>;
-def int_amdgcn_global_atomic_csub : AMDGPUAtomicRtn<llvm_i32_ty>;
-
// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
// <ray_dir>, <ray_inv_dir>, <texture_descr>
// <node_ptr> is i32 or i64.
@@ -2664,8 +2658,6 @@ def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_global_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
-def int_amdgcn_atomic_cond_sub_u32 : AMDGPUAtomicRtn<llvm_i32_ty>;
-
class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
Intrinsic<
[llvm_any_ty],
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 9fb6de49fb2055..bf2ba5e352c3ce 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -414,12 +414,14 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UINC_WRAP)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UDEC_WRAP)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_USUB_COND)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_USUB_SAT)
// Marker for start of Generic AtomicRMW opcodes
HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_START, G_ATOMICRMW_XCHG)
// Marker for end of Generic AtomicRMW opcodes
-HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_UDEC_WRAP)
+HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_USUB_SAT)
// Generic atomic fence
HANDLE_TARGET_OPCODE(G_FENCE)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 36a0a087ba457c..f4934af4563d83 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1311,6 +1311,8 @@ def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP;
def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP;
def G_ATOMICRMW_UINC_WRAP : G_ATOMICRMW_OP;
def G_ATOMICRMW_UDEC_WRAP : G_ATOMICRMW_OP;
+def G_ATOMICRMW_USUB_COND : G_ATOMICRMW_OP;
+def G_ATOMICRMW_USUB_SAT : G_ATOMICRMW_OP;
def G_FENCE : GenericInstruction {
let OutOperandList = (outs);
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index e9dbdef9fe9e7c..507716e68097db 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -259,6 +259,8 @@ def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax>;
def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin>;
def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap>;
def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap>;
+def : GINodeEquiv<G_ATOMICRMW_USUB_COND, atomic_load_usub_cond>;
+def : GINodeEquiv<G_ATOMICRMW_USUB_SAT, atomic_load_usub_sat>;
def : GINodeEquiv<G_FENCE, atomic_fence>;
def : GINodeEquiv<G_PREFETCH, prefetch>;
def : GINodeEquiv<G_TRAP, trap>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 172deffbd31771..f8f17036269f6e 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -753,6 +753,10 @@ def atomic_load_uinc_wrap : SDNode<"ISD::ATOMIC_LOAD_UINC_WRAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_udec_wrap : SDNode<"ISD::ATOMIC_LOAD_UDEC_WRAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_usub_cond : SDNode<"ISD::ATOMIC_LOAD_USUB_COND", SDTAtomic2,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_usub_sat : SDNode<"ISD::ATOMIC_LOAD_USUB_SAT", SDTAtomic2,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 7c97f7afbe0933..a3e47da77fe776 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -704,6 +704,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
KEYWORD(uinc_wrap);
KEYWORD(udec_wrap);
+ KEYWORD(usub_cond);
+ KEYWORD(usub_sat);
KEYWORD(splat);
KEYWORD(vscale);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f41907f0351257..d379393ff61ccd 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8352,6 +8352,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
case lltok::kw_udec_wrap:
Operation = AtomicRMWInst::UDecWrap;
break;
+ case lltok::kw_usub_cond:
+ Operation = AtomicRMWInst::USubCond;
+ break;
+ case lltok::kw_usub_sat:
+ Operation = AtomicRMWInst::USubSat;
+ break;
case lltok::kw_fadd:
Operation = AtomicRMWInst::FAdd;
IsFP = true;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index d4dbab04e8ecdb..8005847f763750 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1349,6 +1349,10 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
return AtomicRMWInst::UIncWrap;
case bitc::RMW_UDEC_WRAP:
return AtomicRMWInst::UDecWrap;
+ case bitc::RMW_USUB_COND:
+ return AtomicRMWInst::USubCond;
+ case bitc::RMW_USUB_SAT:
+ return AtomicRMWInst::USubSat;
}
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 03d0537291dada..655b4cf984d7c7 100644
--- a/llv...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/105553
More information about the Mlir-commits
mailing list