[llvm] 171cf53 - AMDGPU/GlobalISel: Handle flat/global G_ATOMIC_CMPXCHG
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 13:11:30 PDT 2019
Author: Matt Arsenault
Date: 2019-10-25T13:11:09-07:00
New Revision: 171cf5302f43776b07615e32b2ffd6ddf4e5d890
URL: https://github.com/llvm/llvm-project/commit/171cf5302f43776b07615e32b2ffd6ddf4e5d890
DIFF: https://github.com/llvm/llvm-project/commit/171cf5302f43776b07615e32b2ffd6ddf4e5d890.diff
LOG: AMDGPU/GlobalISel: Handle flat/global G_ATOMIC_CMPXCHG
Custom lower this to a target instruction with the merge operands. I
think it might be better to directly select this and emit a
REG_SEQUENCE, but this would be more work since it would require
splitting the tablegen patterns for these cases from the other
atomics.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.h
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index f2be1ca44d34..4e123d094eeb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -117,6 +117,8 @@ def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
+
class GISelSop2Pat <
SDPatternOperator node,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 846e7f577a28..e16c104b2716 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -497,6 +497,7 @@ defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
+defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
@@ -569,21 +570,7 @@ defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
}
-class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
-
// Legacy.
-def AMDGPUatomic_cmp_swap_global : PatFrag<
- (ops node:$ptr, node:$value),
- (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
-
-def atomic_cmp_swap_global : PatFrag<
- (ops node:$ptr, node:$cmp, node:$value),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
-
-
def atomic_cmp_swap_global_noret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 200946f2c7d0..f780d43475da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -848,7 +848,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
- G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
+ G_ATOMICRMW_UMIN})
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
{S64, GlobalPtr}, {S64, LocalPtr}});
if (ST.hasFlatAddressSpace()) {
@@ -858,6 +858,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
.legalFor({{S32, LocalPtr}});
+ // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
+ // demarshalling
+ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
+ .customFor({{S32, GlobalPtr}, {S64, GlobalPtr},
+ {S32, FlatPtr}, {S64, FlatPtr}})
+ .legalFor({{S32, LocalPtr}, {S64, LocalPtr},
+ {S32, RegionPtr}, {S64, RegionPtr}});
+
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
.lower();
@@ -1116,6 +1124,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
return legalizeFMad(MI, MRI, B);
case TargetOpcode::G_FDIV:
return legalizeFDIV(MI, MRI, B);
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ return legalizeAtomicCmpXChg(MI, MRI, B);
default:
return false;
}
@@ -1724,6 +1734,33 @@ bool AMDGPULegalizerInfo::legalizeFMad(
return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
}
+bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ Register CmpVal = MI.getOperand(2).getReg();
+ Register NewVal = MI.getOperand(3).getReg();
+
+ assert(SITargetLowering::isFlatGlobalAddrSpace(
+ MRI.getType(PtrReg).getAddressSpace()) &&
+ "this should not have been custom lowered");
+
+ LLT ValTy = MRI.getType(CmpVal);
+ LLT VecTy = LLT::vector(2, ValTy);
+
+ B.setInstr(MI);
+ Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0);
+
+ B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG)
+ .addDef(DstReg)
+ .addUse(PtrReg)
+ .addUse(PackedVal)
+ .setMemRefs(MI.memoperands());
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 357142d9f3d9..bbf71674f606 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -72,6 +72,9 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
Register getLiveInRegister(MachineRegisterInfo &MRI,
Register Reg, LLT Ty) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 4d78188b3dc3..b82475ebd834 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2957,7 +2957,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_UMAX:
case AMDGPU::G_ATOMICRMW_UMIN:
case AMDGPU::G_ATOMICRMW_FADD:
- case AMDGPU::G_ATOMIC_CMPXCHG: {
+ case AMDGPU::G_ATOMIC_CMPXCHG:
+ case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
return getDefaultMappingAllVGPR(MI);
}
case AMDGPU::G_BRCOND: {
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 80ee17eba141..94b038b38f5f 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -370,27 +370,6 @@ multiclass FLAT_Global_Atomic_Pseudo<
FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
-class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
->;
-
-def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
-def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
-def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
-def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
-def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
-def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
-def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
-def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
-def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
-def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
-def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
-def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
-def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
-
-
//===----------------------------------------------------------------------===//
// Flat Instructions
@@ -425,84 +404,84 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
}
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
- VGPR_32, i32, atomic_cmp_swap_flat,
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
v2i32, VReg_64>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
- VReg_64, i64, atomic_cmp_swap_flat,
+ VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
v2i64, VReg_128>;
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
- VGPR_32, i32, atomic_swap_flat>;
+ VGPR_32, i32, atomic_swap_flat_32>;
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
- VReg_64, i64, atomic_swap_flat>;
+ VReg_64, i64, atomic_swap_flat_64>;
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
- VGPR_32, i32, atomic_add_flat>;
+ VGPR_32, i32, atomic_load_add_flat_32>;
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
- VGPR_32, i32, atomic_sub_flat>;
+ VGPR_32, i32, atomic_load_sub_flat_32>;
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
- VGPR_32, i32, atomic_min_flat>;
+ VGPR_32, i32, atomic_load_min_flat_32>;
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
- VGPR_32, i32, atomic_umin_flat>;
+ VGPR_32, i32, atomic_load_umin_flat_32>;
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
- VGPR_32, i32, atomic_max_flat>;
+ VGPR_32, i32, atomic_load_max_flat_32>;
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
- VGPR_32, i32, atomic_umax_flat>;
+ VGPR_32, i32, atomic_load_umax_flat_32>;
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
- VGPR_32, i32, atomic_and_flat>;
+ VGPR_32, i32, atomic_load_and_flat_32>;
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
- VGPR_32, i32, atomic_or_flat>;
+ VGPR_32, i32, atomic_load_or_flat_32>;
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
- VGPR_32, i32, atomic_xor_flat>;
+ VGPR_32, i32, atomic_load_xor_flat_32>;
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
- VGPR_32, i32, atomic_inc_flat>;
+ VGPR_32, i32, atomic_inc_flat_32>;
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
- VGPR_32, i32, atomic_dec_flat>;
+ VGPR_32, i32, atomic_dec_flat_32>;
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
- VReg_64, i64, atomic_add_flat>;
+ VReg_64, i64, atomic_load_add_flat_64>;
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
- VReg_64, i64, atomic_sub_flat>;
+ VReg_64, i64, atomic_load_sub_flat_64>;
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
- VReg_64, i64, atomic_min_flat>;
+ VReg_64, i64, atomic_load_min_flat_64>;
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
- VReg_64, i64, atomic_umin_flat>;
+ VReg_64, i64, atomic_load_umin_flat_64>;
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
- VReg_64, i64, atomic_max_flat>;
+ VReg_64, i64, atomic_load_max_flat_64>;
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
- VReg_64, i64, atomic_umax_flat>;
+ VReg_64, i64, atomic_load_umax_flat_64>;
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
- VReg_64, i64, atomic_and_flat>;
+ VReg_64, i64, atomic_load_and_flat_64>;
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
- VReg_64, i64, atomic_or_flat>;
+ VReg_64, i64, atomic_load_or_flat_64>;
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
- VReg_64, i64, atomic_xor_flat>;
+ VReg_64, i64, atomic_load_xor_flat_64>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
- VReg_64, i64, atomic_inc_flat>;
+ VReg_64, i64, atomic_inc_flat_64>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
- VReg_64, i64, atomic_dec_flat>;
+ VReg_64, i64, atomic_dec_flat_64>;
// GFX7-, GFX10-only flat instructions.
let SubtargetPredicate = isGFX7GFX10 in {
@@ -556,11 +535,11 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
let is_flat_global = 1 in {
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
- VGPR_32, i32, AMDGPUatomic_cmp_swap_global,
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
v2i32, VReg_64>;
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
- VReg_64, i64, AMDGPUatomic_cmp_swap_global,
+ VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
v2i64, VReg_128>;
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
@@ -813,7 +792,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -827,7 +806,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
@@ -923,7 +902,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i3
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -937,7 +916,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64,
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 56ebf9c06741..9c2329d9fecc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1326,13 +1326,6 @@ EVT SITargetLowering::getOptimalMemOpType(
return MVT::Other;
}
-static bool isFlatGlobalAddrSpace(unsigned AS) {
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS ||
- AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
-}
-
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f0102feb65c4..b36574f8ff60 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -260,6 +260,14 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isMemOpUniform(const SDNode *N) const;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
+
+ static bool isFlatGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
+ }
+
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 21984c6ad910..013bd5fd40a8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2018,3 +2018,14 @@ def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
+
+// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
+// operand Expects a MachineMemOperand in addition to explicit
+// operands.
+def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$oldval);
+ let InOperandList = (ins ptype1:$addr, type0:$cmpval_nnenwval);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
new file mode 100644
index 000000000000..517e84fa58c9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
@@ -0,0 +1,374 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 0)
+ $vgpr0 = COPY %4
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_CONSTANT i64 4
+ %5:vgpr(p0) = G_GEP %0, %4
+ %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 0)
+ $vgpr0 = COPY %6
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s64_flat
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = COPY $vgpr2_vgpr3
+ %2:vgpr(s64) = COPY $vgpr4_vgpr5
+ %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 0)
+ $vgpr0_vgpr1 = COPY %4
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = COPY $vgpr2_vgpr3
+ %2:vgpr(s64) = COPY $vgpr4_vgpr5
+ %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_CONSTANT i64 4
+ %5:vgpr(p0) = G_GEP %0, %4
+ %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 8, addrspace 0)
+ $vgpr0_vgpr1 = COPY %6
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+ ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+ ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_CONSTANT i64 -4
+ %5:vgpr(p0) = G_GEP %0, %4
+ %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 0)
+ $vgpr0 = COPY %6
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 0)
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = COPY $vgpr2_vgpr3
+ %2:vgpr(s64) = COPY $vgpr4_vgpr5
+ %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 0)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
index b5fe7334e066..b2526ca975b3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
@@ -11,9 +11,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1)
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
- ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
+ ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = COPY $vgpr3
@@ -32,9 +33,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4)
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
- ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
+ ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = COPY $vgpr3
@@ -74,9 +76,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1)
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]]
- ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]]
+ ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = COPY $vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
index 738106de9f79..2cadc7afa9b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
@@ -3,37 +3,55 @@
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
---
-name: atomic_cmpxchg_global_i32
+name: atomic_cmpxchg_local_i32
body: |
bb.0:
- liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
- ; CHECK-LABEL: name: atomic_cmpxchg_global_i32
- ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 1)
- %0:_(p1) = COPY $sgpr0_sgpr1
- %1:_(s32) = COPY $sgpr2
- %2:_(s32) = COPY $sgpr3
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1)
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomic_cmpxchg_local_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 3)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3)
...
---
-name: atomic_cmpxchg_local_i32
+name: atomic_cmpxchg_local_i64
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2
- ; CHECK-LABEL: name: atomic_cmpxchg_local_i32
+ ; CHECK-LABEL: name: atomic_cmpxchg_local_i64
; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 3)
+ ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 8, addrspace 3)
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3)
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 3)
+...
+
+---
+name: atomic_cmpxchg_global_i32
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
+ ; CHECK-LABEL: name: atomic_cmpxchg_global_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 4, addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = COPY $sgpr3
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1)
...
---
@@ -46,26 +64,48 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 8, addrspace 1)
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = COPY $sgpr2
%2:_(s32) = COPY $sgpr3
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1)
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 1)
...
---
-name: atomic_cmpxchg_local_i64
+name: atomic_cmpxchg_flat_i32
body: |
bb.0:
- liveins: $sgpr0, $sgpr1, $sgpr2
- ; CHECK-LABEL: name: atomic_cmpxchg_local_i64
- ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 3)
- %0:_(p3) = COPY $sgpr0
- %1:_(s32) = COPY $sgpr1
- %2:_(s32) = COPY $sgpr2
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3)
+ liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: atomic_cmpxchg_flat_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst 4)
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = COPY $sgpr3
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 0)
+...
+
+---
+name: atomic_cmpxchg_flat_i64
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: atomic_cmpxchg_flat_i64
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst 8)
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = COPY $sgpr3
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 0)
...
More information about the llvm-commits
mailing list