[llvm] 93ec3fa - AMDGPU: Support atomicrmw uinc_wrap/udec_wrap
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 27 18:17:52 PST 2023
Author: Matt Arsenault
Date: 2023-01-27T22:17:16-04:00
New Revision: 93ec3fa4021d7080ff4e1d1eb2f36997ebd2f86c
URL: https://github.com/llvm/llvm-project/commit/93ec3fa4021d7080ff4e1d1eb2f36997ebd2f86c
DIFF: https://github.com/llvm/llvm-project/commit/93ec3fa4021d7080ff4e1d1eb2f36997ebd2f86c.diff
LOG: AMDGPU: Support atomicrmw uinc_wrap/udec_wrap
For now keep the exising intrinsics working.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/lib/Target/AMDGPU/R600ISelLowering.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/flat_atomics.ll
llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
llvm/test/CodeGen/AMDGPU/global_atomics.ll
llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
llvm/test/CodeGen/AMDGPU/local-atomics.ll
llvm/test/CodeGen/AMDGPU/local-atomics64.ll
llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7e7dbacaac118..9ef59a4e9c493 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -227,10 +227,8 @@ def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT, SItbuffer_store>;
def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>;
// FIXME: Check MMO is atomic
-def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;
-def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>;
-def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>;
-def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>;
+def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap_glue>;
+def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap_glue>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, SIatomic_fmin>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_FMAX, SIatomic_fmax>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, atomic_load_fmin_glue>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 42d1f58e4239c..ca2d7ff2584e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -503,10 +503,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
// isa<MemSDNode> almost works but is slightly too permissive for some DS
// intrinsics.
if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
- (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
- Opc == ISD::ATOMIC_LOAD_FADD ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX) {
N = glueCopyToM0LDSInit(N);
SelectCode(N);
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8121b381e83f1..f4d49fcc0838a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4508,8 +4508,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
NODE_NAME_CASE(DS_ORDERED_COUNT)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
- NODE_NAME_CASE(ATOMIC_INC)
- NODE_NAME_CASE(ATOMIC_DEC)
NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
NODE_NAME_CASE(BUFFER_LOAD)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bc3b57a82d086..e5efa1362664d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -505,8 +505,6 @@ enum NodeType : unsigned {
TBUFFER_LOAD_FORMAT_D16,
DS_ORDERED_COUNT,
ATOMIC_CMP_SWAP,
- ATOMIC_INC,
- ATOMIC_DEC,
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_FMAX,
BUFFER_LOAD,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b0c5df50b72b5..5f630ea79b1f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3446,9 +3446,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ATOMICRMW_MAX:
case TargetOpcode::G_ATOMICRMW_UMIN:
case TargetOpcode::G_ATOMICRMW_UMAX:
+ case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
+ case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
case TargetOpcode::G_ATOMICRMW_FADD:
- case AMDGPU::G_AMDGPU_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_ATOMIC_DEC:
case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
return selectG_LOAD_STORE_ATOMICRMW(I);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 22b3272792119..64977b183fa7a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -638,6 +638,8 @@ defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>;
+defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
+defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
let MemoryVT = v2f16 in
defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>;
defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 41cb0a99b420d..992200b076d07 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1335,7 +1335,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
- G_ATOMICRMW_UMIN})
+ G_ATOMICRMW_UMIN, G_ATOMICRMW_UINC_WRAP, G_ATOMICRMW_UDEC_WRAP})
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
{S64, GlobalPtr}, {S64, LocalPtr},
{S32, RegionPtr}, {S64, RegionPtr}});
@@ -4627,8 +4627,8 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI,
MachineIRBuilder &B,
bool IsInc) const {
- unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC :
- AMDGPU::G_AMDGPU_ATOMIC_DEC;
+ unsigned Opc = IsInc ? AMDGPU::G_ATOMICRMW_UINC_WRAP :
+ AMDGPU::G_ATOMICRMW_UDEC_WRAP;
B.buildInstr(Opc)
.addDef(MI.getOperand(0).getReg())
.addUse(MI.getOperand(2).getReg())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 5e16a405f375c..d462ee5ce1a26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4828,9 +4828,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_UMAX:
case AMDGPU::G_ATOMICRMW_UMIN:
case AMDGPU::G_ATOMICRMW_FADD:
+ case AMDGPU::G_ATOMICRMW_UINC_WRAP:
+ case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
- case AMDGPU::G_AMDGPU_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_ATOMIC_DEC:
case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_ATOMIC_FMAX: {
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index ebd32c63fe5c6..b55ebaac86bd3 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1465,8 +1465,8 @@ defm : BufferAtomicPat<"atomic_load_umax_global", Ty, "BUFFER_ATOMIC_UMAX" # Suf
defm : BufferAtomicPat<"atomic_load_and_global", Ty, "BUFFER_ATOMIC_AND" # Suffix>;
defm : BufferAtomicPat<"atomic_load_or_global", Ty, "BUFFER_ATOMIC_OR" # Suffix>;
defm : BufferAtomicPat<"atomic_load_xor_global", Ty, "BUFFER_ATOMIC_XOR" # Suffix>;
-defm : BufferAtomicPat<"atomic_inc_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>;
-defm : BufferAtomicPat<"atomic_dec_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_uinc_wrap_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_udec_wrap_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>;
} // end foreach Ty
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 26f3537ff0957..326266cadd69a 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1069,8 +1069,8 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
-defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_inc">;
-defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_dec">;
+defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
+defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
@@ -1097,8 +1097,8 @@ defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
-defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_inc">;
-defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_dec">;
+defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
+defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 3168aaa889953..dff309127bc2f 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1160,8 +1160,8 @@ def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
foreach as = [ "flat", "global" ] in {
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_inc_"#as, i32>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_dec_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
@@ -1174,8 +1174,8 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_inc_"#as, i64>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_dec_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
@@ -1429,8 +1429,8 @@ defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global,
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_inc_global", i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_dec_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>;
@@ -1444,8 +1444,8 @@ defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_inc_global", i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_dec_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>;
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index fad393267a717..c7e3755632c15 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -2182,3 +2182,18 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
return Node;
}
+
+TargetLowering::AtomicExpansionKind
+R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ switch (RMW->getOperation()) {
+ case AtomicRMWInst::UIncWrap:
+ case AtomicRMWInst::UDecWrap:
+ // FIXME: Cayman at least appears to have instructions for this, but the
+ // instruction defintions appear to be missing.
+ return AtomicExpansionKind::CmpXChg;
+ default:
+ break;
+ }
+
+ return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 8a5479db4ee67..fc361c01bc67a 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -114,6 +114,9 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
SelectionDAG &DAG) const;
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
+
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
};
} // End namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e0ad11d5af24f..056daee92b272 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -791,6 +791,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
ISD::ATOMIC_LOAD_UMIN,
ISD::ATOMIC_LOAD_UMAX,
ISD::ATOMIC_LOAD_FADD,
+ ISD::ATOMIC_LOAD_UINC_WRAP,
+ ISD::ATOMIC_LOAD_UDEC_WRAP,
ISD::INTRINSIC_VOID,
ISD::INTRINSIC_W_CHAIN});
@@ -7317,6 +7319,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fadd: {
MemSDNode *M = cast<MemSDNode>(Op);
unsigned Opc;
@@ -7324,24 +7328,28 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_ds_fadd:
Opc = ISD::ATOMIC_LOAD_FADD;
break;
+ case Intrinsic::amdgcn_atomic_inc:
+ Opc = ISD::ATOMIC_LOAD_UINC_WRAP;
+ break;
+ case Intrinsic::amdgcn_atomic_dec:
+ Opc = ISD::ATOMIC_LOAD_UDEC_WRAP;
+ break;
}
return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(),
M->getOperand(0), M->getOperand(2), M->getOperand(3),
M->getMemOperand());
}
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
MemSDNode *M = cast<MemSDNode>(Op);
unsigned Opc;
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
- Opc = AMDGPUISD::ATOMIC_INC;
+ Opc = ISD::ATOMIC_LOAD_UINC_WRAP;
break;
case Intrinsic::amdgcn_atomic_dec:
- Opc = AMDGPUISD::ATOMIC_DEC;
+ Opc = ISD::ATOMIC_LOAD_UDEC_WRAP;
break;
case Intrinsic::amdgcn_ds_fmin:
Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
@@ -12794,8 +12802,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(
return AMDGPU::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
case AMDGPUISD::ATOMIC_CMP_SWAP:
- case AMDGPUISD::ATOMIC_INC:
- case AMDGPUISD::ATOMIC_DEC:
case AMDGPUISD::ATOMIC_LOAD_FMIN:
case AMDGPUISD::ATOMIC_LOAD_FMAX:
case AMDGPUISD::BUFFER_ATOMIC_SWAP:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 0fb39c5324965..41a7ced8abd62 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -50,14 +50,6 @@ def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
>;
-def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
- [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
-def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
- [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
]>;
@@ -355,8 +347,6 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>;
-defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>;
defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>;
defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>;
@@ -762,8 +752,8 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
-defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
-defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
+defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">;
+defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">;
defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0c2a13852fcb7..19f2f27120f6b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3486,8 +3486,6 @@ def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
}
let Namespace = "AMDGPU" in {
-def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP;
-def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP;
def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP;
def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP;
}
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
index a15854c061825..c5d8414c08188 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
@@ -7106,3 +7106,209 @@ entry:
store atomic half %in, ptr %out seq_cst, align 2
ret void
}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_offset:
+; CIVI: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX9: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
+define amdgpu_kernel void @atomic_inc_i32_offset(ptr %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 4
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_max_offset:
+; CIVI: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX9: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
+define amdgpu_kernel void @atomic_inc_i32_max_offset(ptr %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 1023
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_max_offset_p1:
+; GCN: flat_atomic_inc v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @atomic_inc_i32_max_offset_p1(ptr %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 1024
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_ret_offset:
+; CIVI: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr %out, ptr %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 4
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_incr64_offset:
+; CIVI: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
+define amdgpu_kernel void @atomic_inc_i32_incr64_offset(ptr %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_ret_incr64_offset:
+; CIVI: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i32_ret_incr64_offset(ptr %out, ptr %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32:
+; GCN: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+define amdgpu_kernel void @atomic_inc_i32(ptr %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_ret:
+; GCN: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i32_ret(ptr %out, ptr %out2, i32 %in) {
+entry:
+ %val = atomicrmw volatile uinc_wrap ptr %out, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_incr64:
+; GCN: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+define amdgpu_kernel void @atomic_inc_i32_incr64(ptr %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_ret_incr64:
+; GCN: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i32_ret_incr64(ptr %out, ptr %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_offset:
+; CIVI: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX9: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
+define amdgpu_kernel void @atomic_dec_i32_offset(ptr %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 4
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_max_offset:
+; CIVI: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX9: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
+define amdgpu_kernel void @atomic_dec_i32_max_offset(ptr %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 1023
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_max_offset_p1:
+; GCN: flat_atomic_dec v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @atomic_dec_i32_max_offset_p1(ptr %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 1024
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_ret_offset:
+; CIVI: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr %out, ptr %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr %out, i32 4
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_decr64_offset:
+; CIVI: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
+define amdgpu_kernel void @atomic_dec_i32_decr64_offset(ptr %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_ret_decr64_offset:
+; CIVI: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i32_ret_decr64_offset(ptr %out, ptr %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %gep = getelementptr i32, ptr %ptr, i32 4
+ %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32:
+; GCN: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+define amdgpu_kernel void @atomic_dec_i32(ptr %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile udec_wrap ptr %out, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_ret:
+; GCN: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i32_ret(ptr %out, ptr %out2, i32 %in) {
+entry:
+ %val = atomicrmw volatile udec_wrap ptr %out, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_decr64:
+; GCN: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+define amdgpu_kernel void @atomic_dec_i32_decr64(ptr %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_ret_decr64:
+; GCN: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i32_ret_decr64(ptr %out, ptr %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr %out, i64 %index
+ %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in seq_cst
+ store i32 %val, ptr %out2
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
index 027f3c360b426..e5dc41388639b 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -4585,3 +4585,163 @@ entry:
store atomic double %in, ptr %ptr seq_cst, align 8
ret void
}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_offset:
+; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
+define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr %out, i64 4
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_ret_offset:
+; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr %out, i64 4
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_incr64_offset:
+; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
+define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %gep = getelementptr i64, ptr %ptr, i64 4
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_ret_incr64_offset:
+; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %gep = getelementptr i64, ptr %ptr, i64 4
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64:
+; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @atomic_inc_i64(ptr %out, i64 %in) {
+entry:
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_ret:
+; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i64_ret(ptr %out, ptr %out2, i64 %in) {
+entry:
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_incr64:
+; GCN: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_ret_incr64:
+; GCN: flat_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_offset:
+; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
+define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr %out, i64 4
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_ret_offset:
+; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr %out, i64 4
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_decr64_offset:
+; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
+define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %gep = getelementptr i64, ptr %ptr, i64 4
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_ret_decr64_offset:
+; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %gep = getelementptr i64, ptr %ptr, i64 4
+ %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64:
+; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @atomic_dec_i64(ptr %out, i64 %in) {
+entry:
+ %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_ret:
+; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i64_ret(ptr %out, ptr %out2, i64 %in) {
+entry:
+ %tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_decr64:
+; GCN: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_ret_decr64:
+; GCN: flat_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr %out, i64 %index
+ %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in seq_cst
+ store i64 %tmp0, ptr %out2
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
index 7c6058b7e382a..1d5cd698e0320 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
@@ -6598,3 +6598,185 @@ entry:
store atomic half %in, ptr addrspace(1) %out seq_cst, align 2
ret void
}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_offset:
+; SIVI: buffer_atomic_inc v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
+define amdgpu_kernel void @atomic_inc_i32_offset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_max_neg_offset:
+; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}}
+define amdgpu_kernel void @atomic_inc_i32_max_neg_offset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 -1024
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_soffset:
+; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
+; SIVI: buffer_atomic_inc v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
+
+; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}}
+; GFX9: global_atomic_inc [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}}
+define amdgpu_kernel void @atomic_inc_i32_soffset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 9000
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_huge_offset:
+; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
+; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
+; SI: buffer_atomic_inc v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+
+; VI: flat_atomic_inc
+
+; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac
+; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd
+; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[[[LOW_K]]:[[HIGH_K]]]{{$}}
+define amdgpu_kernel void @atomic_inc_i32_huge_offset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595
+
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_ret_offset:
+; SIVI: buffer_atomic_inc [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; SIVI: buffer_store_dword [[RET]]
+
+; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
+define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ store i32 %val, ptr addrspace(1) %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_addr64_offset:
+; SI: buffer_atomic_inc v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: flat_atomic_inc v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
+define amdgpu_kernel void @atomic_inc_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i32_ret_addr64_offset:
+; SI: buffer_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; VI: flat_atomic_inc [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; SIVI: buffer_store_dword [[RET]]
+
+; GFX9: global_atomic_inc [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
+; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
+define amdgpu_kernel void @atomic_inc_i32_ret_addr64_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ store i32 %val, ptr addrspace(1) %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_offset:
+; SIVI: buffer_atomic_dec v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
+define amdgpu_kernel void @atomic_dec_i32_offset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_max_neg_offset:
+; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}}
+define amdgpu_kernel void @atomic_dec_i32_max_neg_offset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 -1024
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_soffset:
+; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
+; SIVI: buffer_atomic_dec v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
+
+; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}}
+; GFX9: global_atomic_dec [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}}
+define amdgpu_kernel void @atomic_dec_i32_soffset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 9000
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_huge_offset:
+; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
+; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
+; SI: buffer_atomic_dec v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+
+; VI: flat_atomic_dec
+
+; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac
+; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd
+; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[[[LOW_K]]:[[HIGH_K]]]{{$}}
+define amdgpu_kernel void @atomic_dec_i32_huge_offset(ptr addrspace(1) %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595
+
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_ret_offset:
+; SIVI: buffer_atomic_dec [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; SIVI: buffer_store_dword [[RET]]
+
+; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
+define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ store i32 %val, ptr addrspace(1) %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_addr64_offset:
+; SI: buffer_atomic_dec v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: flat_atomic_dec v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: global_atomic_dec v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
+define amdgpu_kernel void @atomic_dec_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i32_ret_addr64_offset:
+; SI: buffer_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; VI: flat_atomic_dec [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; SIVI: buffer_store_dword [[RET]]
+
+; GFX9: global_atomic_dec [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
+; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
+define amdgpu_kernel void @atomic_dec_i32_ret_addr64_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i32 %in seq_cst
+ store i32 %val, ptr addrspace(1) %out2
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
index d295efc6d015f..3182a1e12258f 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
@@ -5969,3 +5969,75 @@ entry:
store atomic double %in, ptr addrspace(1) %gep seq_cst, align 8
ret void
}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_offset:
+; CIVI: buffer_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+
+; GFX9: global_atomic_inc_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
+define amdgpu_kernel void @atomic_inc_i64_offset(ptr addrspace(1) %out, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr addrspace(1) %out, i64 4
+ %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_ret_offset:
+; CIVI: buffer_atomic_inc_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; CIVI: buffer_store_dwordx2 [[RET]]
+
+; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}}
+define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr addrspace(1) %out, i64 4
+ %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ store i64 %tmp0, ptr addrspace(1) %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_inc_i64_incr64_offset:
+; CI: buffer_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
+; VI: flat_atomic_inc_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
+; GFX9: global_atomic_inc_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
+define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr addrspace(1) %out, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
+ %tmp0 = atomicrmw volatile uinc_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_offset:
+; CIVI: buffer_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+
+; GFX9: global_atomic_dec_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
+define amdgpu_kernel void @atomic_dec_i64_offset(ptr addrspace(1) %out, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr addrspace(1) %out, i64 4
+ %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_ret_offset:
+; CIVI: buffer_atomic_dec_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; CIVI: buffer_store_dwordx2 [[RET]]
+
+; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}}
+define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr addrspace(1) %out, ptr addrspace(1) %out2, i64 %in) {
+entry:
+ %gep = getelementptr i64, ptr addrspace(1) %out, i64 4
+ %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ store i64 %tmp0, ptr addrspace(1) %out2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_i64_decr64_offset:
+; CI: buffer_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
+; VI: flat_atomic_dec_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
+; GFX9: global_atomic_dec_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
+define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr addrspace(1) %out, i64 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i64, ptr addrspace(1) %ptr, i64 4
+ %tmp0 = atomicrmw volatile udec_wrap ptr addrspace(1) %gep, i64 %in seq_cst
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics.ll b/llvm/test/CodeGen/AMDGPU/local-atomics.ll
index 140c615170fcf..22b52209afc41 100644
--- a/llvm/test/CodeGen/AMDGPU/local-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-atomics.ll
@@ -728,3 +728,103 @@ define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(ptr addrspace(3) %pt
%result = atomicrmw umax ptr addrspace(3) %gep, i32 4 seq_cst
ret void
}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; EG: LDS_CMPST
+; GCN: ds_inc_rtn_u32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
+ store i32 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; EG: LDS_CMPST
+; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 4 seq_cst
+ store i32 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_inc_u32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; EG: LDS_CMPST
+; GCN: ds_dec_rtn_u32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
+ store i32 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; EG: LDS_CMPST
+; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 4 seq_cst
+ store i32 %result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_dec_u32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_dec_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 4 seq_cst
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics64.ll b/llvm/test/CodeGen/AMDGPU/local-atomics64.ll
index aa93adfe427b5..8c43c7791fdc1 100644
--- a/llvm/test/CodeGen/AMDGPU/local-atomics64.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-atomics64.ll
@@ -639,3 +639,121 @@ define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(ptr addrspace(3) %pt
%result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst
ret void
}
+
+; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_inc_rtn_u64
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 4 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
+; SICIVI-DAG: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 9 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64:
+; SICIVI-DAG: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
+; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 1 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_inc_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_inc1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+ %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 1 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_dec_rtn_u64
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 4 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
+; SICIVI-DAG: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 9 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64:
+; SICIVI-DAG: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
+; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 1 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_dec_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_dec1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
+ %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+ %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 1 seq_cst
+ store i64 %result, ptr addrspace(1) %out, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 3c9a05af3c595..b89d9ea027868 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -280,6 +280,34 @@ define amdgpu_kernel void @atomic_umax_shl_base_lds_0(ptr addrspace(1) %out, ptr
ret void
}
+; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
+ %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i32 31 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
+ %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 31 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
+ ret void
+}
+
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_lds:
; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
More information about the llvm-commits
mailing list