[llvm] a88deb4 - [AMDGPU] Use aperture registers instead of S_GETREG
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 30 04:25:17 PST 2022
Author: Pierre van Houtryve
Date: 2022-11-30T12:25:10Z
New Revision: a88deb4b65f842138813334a77bbcecf8cfb9aaf
URL: https://github.com/llvm/llvm-project/commit/a88deb4b65f842138813334a77bbcecf8cfb9aaf
DIFF: https://github.com/llvm/llvm-project/commit/a88deb4b65f842138813334a77bbcecf8cfb9aaf.diff
LOG: [AMDGPU] Use aperture registers instead of S_GETREG
Fixes a longstanding TODO in the codebase where we were using S_GETREG + shift to do something that could simply be done with an inline constant (register).
Patch based on D31874 by @kzhuravl
Depends on D137767
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D137542
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/SIDefines.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
llvm/test/CodeGen/AMDGPU/addrspacecast.ll
llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c148c322be15..b48d8a1bb6af 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1818,32 +1818,28 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
MachineFunction &MF = B.getMF();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS);
if (ST.hasApertureRegs()) {
- // FIXME: Use inline constants (src_{shared, private}_base) instead of
- // getreg.
- unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
- AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
- AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
- unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
- AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
- AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
- unsigned Encoding =
- AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
- Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
- WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
-
- Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-
- B.buildInstr(AMDGPU::S_GETREG_B32)
- .addDef(GetReg)
- .addImm(Encoding);
- MRI.setType(GetReg, S32);
-
- auto ShiftAmt = B.buildConstant(S32, WidthM1 + 1);
- return B.buildShl(S32, GetReg, ShiftAmt).getReg(0);
+ // Note: this register is somewhat broken. When used as a 32-bit operand,
+ // it only returns zeroes. The real value is in the upper 32 bits.
+ // Thus, we must emit extract the high 32 bits.
+ const unsigned ApertureRegNo = (AS == AMDGPUAS::LOCAL_ADDRESS)
+ ? AMDGPU::SRC_SHARED_BASE
+ : AMDGPU::SRC_PRIVATE_BASE;
+ // FIXME: It would be more natural to emit a COPY here, but then copy
+ // coalescing would kick in and it would think it's okay to use the "HI"
+ // subregister (instead of extracting the HI 32 bits) which is an artificial
+ // (unusable) register.
+ // Register TableGen definitions would need an overhaul to get rid of the
+ // artificial "HI" aperture registers and prevent this kind of issue from
+ // happening.
+ Register Dst = MRI.createGenericVirtualRegister(S64);
+ MRI.setRegClass(Dst, &AMDGPU::SReg_64RegClass);
+ B.buildInstr(AMDGPU::S_MOV_B64, {Dst}, {Register(ApertureRegNo)});
+ return B.buildUnmerge(S32, Dst).getReg(1);
}
// TODO: can we be smarter about machine pointer info?
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 85930312352b..3cf91d14cd91 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -420,9 +420,6 @@ enum Offset : unsigned { // Offset, (5) [10:6]
OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
OFFSET_MEM_VIOL = 8,
-
- OFFSET_SRC_SHARED_BASE = 16,
- OFFSET_SRC_PRIVATE_BASE = 0
};
enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11]
@@ -430,9 +427,6 @@ enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11]
WIDTH_M1_SHIFT_ = 11,
WIDTH_M1_WIDTH_ = 5,
WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_),
-
- WIDTH_M1_SRC_SHARED_BASE = 15,
- WIDTH_M1_SRC_PRIVATE_BASE = 15
};
// Some values from WidthMinusOne mapped into Width domain.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b1bb6dfdcb2a..d55b73037b7c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5533,24 +5533,33 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
- // FIXME: Use inline constants (src_{shared, private}_base) instead.
if (Subtarget->hasApertureRegs()) {
- unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
- AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
- AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
- unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
- AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
- AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
- unsigned Encoding =
- AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
- Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
- WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
-
- SDValue EncodingImm = DAG.getTargetConstant(Encoding, DL, MVT::i16);
- SDValue ApertureReg = SDValue(
- DAG.getMachineNode(AMDGPU::S_GETREG_B32, DL, MVT::i32, EncodingImm), 0);
- SDValue ShiftAmount = DAG.getTargetConstant(WidthM1 + 1, DL, MVT::i32);
- return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
+ const unsigned ApertureRegNo = (AS == AMDGPUAS::LOCAL_ADDRESS)
+ ? AMDGPU::SRC_SHARED_BASE
+ : AMDGPU::SRC_PRIVATE_BASE;
+ // Note: this feature (register) is broken. When used as a 32-bit operand,
+ // it returns a wrong value (all zeroes?). The real value is in the upper 32
+ // bits.
+ //
+ // To work around the issue, directly emit a 64 bit mov from this register
+ // then extract the high bits. Note that this shouldn't even result in a
+ // shift being emitted and simply become a pair of registers (e.g.):
+ // s_mov_b64 s[6:7], src_shared_base
+ // v_mov_b32_e32 v1, s7
+ //
+ // FIXME: It would be more natural to emit a CopyFromReg here, but then copy
+ // coalescing would kick in and it would think it's okay to use the "HI"
+ // subregister directly (instead of extracting the HI 32 bits) which is an
+ // artificial (unusable) register.
+ // Register TableGen definitions would need an overhaul to get rid of the
+ // artificial "HI" aperture registers and prevent this kind of issue from
+ // happening.
+ SDNode *Mov = DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64,
+ DAG.getRegister(ApertureRegNo, MVT::i64));
+ return DAG.getNode(
+ ISD::TRUNCATE, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i64,
+ {SDValue(Mov, 0), DAG.getConstant(32, DL, MVT::i64)}));
}
// For code object version 5, private_base and shared_base are passed through
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
index 0edc177a8be5..dcad707acaf2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
@@ -10,11 +10,10 @@ define amdgpu_ps void @amdgpu_ps() {
; MESA-LABEL: amdgpu_ps:
; MESA: ; %bb.0:
; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4
-; MESA-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; MESA-NEXT: s_mov_b64 s[0:1], src_private_base
; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
-; MESA-NEXT: s_lshl_b32 s0, s0, 16
; MESA-NEXT: v_mov_b32_e32 v0, 4
-; MESA-NEXT: v_mov_b32_e32 v1, s0
+; MESA-NEXT: v_mov_b32_e32 v1, s1
; MESA-NEXT: v_mov_b32_e32 v2, 0
; MESA-NEXT: flat_store_dword v[0:1], v2
; MESA-NEXT: s_waitcnt vmcnt(0)
@@ -30,10 +29,9 @@ define amdgpu_ps void @amdgpu_ps() {
; PAL-NEXT: s_waitcnt lgkmcnt(0)
; PAL-NEXT: s_and_b32 s3, s3, 0xffff
; PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
-; PAL-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; PAL-NEXT: s_mov_b64 s[0:1], src_private_base
; PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
-; PAL-NEXT: s_lshl_b32 s0, s0, 16
-; PAL-NEXT: v_mov_b32_e32 v1, s0
+; PAL-NEXT: v_mov_b32_e32 v1, s1
; PAL-NEXT: flat_store_dword v[0:1], v2
; PAL-NEXT: s_waitcnt vmcnt(0)
; PAL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index a5c9b896eb0d..537027bf3979 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -85,15 +85,13 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX9V3-LABEL: addrspacecast:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V3-NEXT: s_lshl_b32 s3, s2, 16
-; GFX9V3-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
+; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
+; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V3-NEXT: v_mov_b32_e32 v2, 1
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V3-NEXT: s_mov_b32 s2, s0
; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V3-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
-; GFX9V3-NEXT: s_lshl_b32 s5, s4, 16
; GFX9V3-NEXT: s_mov_b32 s4, s1
; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V3-NEXT: v_mov_b32_e32 v0, s2
@@ -111,15 +109,13 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V4-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V4-NEXT: s_lshl_b32 s3, s2, 16
-; GFX9V4-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
+; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
+; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_mov_b32 s2, s0
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V4-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
-; GFX9V4-NEXT: s_lshl_b32 s5, s4, 16
; GFX9V4-NEXT: s_mov_b32 s4, s1
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
@@ -137,15 +133,13 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX9V5-LABEL: addrspacecast:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V5-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V5-NEXT: s_lshl_b32 s3, s2, 16
-; GFX9V5-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
+; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
+; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_mov_b32 s2, s0
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V5-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
-; GFX9V5-NEXT: s_lshl_b32 s5, s4, 16
; GFX9V5-NEXT: s_mov_b32 s4, s1
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
@@ -209,10 +203,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
; GFX9V3-LABEL: llvm_amdgcn_is_shared:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V3-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9V3-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
@@ -222,10 +215,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V4-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9V4-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@@ -235,10 +227,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
; GFX9V5-LABEL: llvm_amdgcn_is_shared:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V5-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9V5-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
@@ -293,10 +284,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
; GFX9V3-LABEL: llvm_amdgcn_is_private:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9V3-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
@@ -306,10 +296,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9V4-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@@ -319,10 +308,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
; GFX9V5-LABEL: llvm_amdgcn_is_private:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9V5-NEXT: s_cmp_eq_u32 s1, s0
+; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
index cb4989c3ee2d..62652fd9ab58 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
@@ -228,15 +228,14 @@ body: |
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
+ ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
- ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
- ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C1]]
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
; SI-LABEL: name: test_addrspacecast_p5_to_p0
; SI: liveins: $vgpr0
@@ -323,15 +322,14 @@ body: |
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
+ ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
- ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
- ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C1]]
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
; SI-LABEL: name: test_addrspacecast_p3_to_p0
; SI: liveins: $vgpr0
@@ -568,21 +566,20 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
- ; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
+ ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
- ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
- ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]]
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
- ; GFX9-NEXT: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C]](s32)
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; GFX9-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
+ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_1]](s64)
; GFX9-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
- ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[SHL1]](s32)
- ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]]
- ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C2]]
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[UV5]](s32)
+ ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C1]]
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
; SI-LABEL: name: test_addrspacecast_v2p3_to_v2p0
@@ -778,11 +775,10 @@ body: |
; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](p0)
; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0
; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
- ; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
+ ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
- ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[MV]](p0)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](p0)
; SI-LABEL: name: test_addrspacecast_p5_fi_to_p0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index b2e4d6787b74..ccdd9f654db9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -32,9 +32,8 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
+; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_endpgm
@@ -47,9 +46,8 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX10-NEXT: s_lshl_b32 s0, s0, 16
-; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1
+; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10-NEXT: global_store_dword v[0:1], v0, off
; GFX10-NEXT: s_endpgm
@@ -61,10 +59,9 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_lshl_b32 s0, s0, 16
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1
+; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -97,10 +94,9 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
; GFX9-LABEL: is_private_sgpr:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9-NEXT: s_cmp_lg_u32 s1, s0
+; GFX9-NEXT: s_cmp_lg_u32 s1, s3
; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
@@ -112,10 +108,9 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
; GFX10-LABEL: is_private_sgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX10-NEXT: s_lshl_b32 s0, s0, 16
-; GFX10-NEXT: s_cmp_lg_u32 s1, s0
+; GFX10-NEXT: s_cmp_lg_u32 s1, s3
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %bb0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
@@ -127,11 +122,9 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
; GFX11-LABEL: is_private_sgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_lshl_b32 s0, s0, 16
-; GFX11-NEXT: s_cmp_lg_u32 s1, s0
+; GFX11-NEXT: s_cmp_lg_u32 s1, s3
; GFX11-NEXT: s_cbranch_scc1 .LBB1_2
; GFX11-NEXT: ; %bb.1: ; %bb0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index 048fb5586ac0..9a4b2338c87d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -32,9 +32,8 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
+; GFX9-NEXT: s_mov_b64 s[0:1], src_shared_base
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_endpgm
@@ -47,9 +46,8 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX10-NEXT: s_lshl_b32 s0, s0, 16
-; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1
+; GFX10-NEXT: s_mov_b64 s[0:1], src_shared_base
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10-NEXT: global_store_dword v[0:1], v0, off
; GFX10-NEXT: s_endpgm
@@ -61,10 +59,9 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_lshl_b32 s0, s0, 16
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1
+; GFX11-NEXT: s_mov_b64 s[0:1], src_shared_base
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -97,10 +94,9 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
; GFX9-LABEL: is_local_sgpr:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9-NEXT: s_lshl_b32 s0, s0, 16
-; GFX9-NEXT: s_cmp_lg_u32 s1, s0
+; GFX9-NEXT: s_cmp_lg_u32 s1, s3
; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
@@ -112,10 +108,9 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
; GFX10-LABEL: is_local_sgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX10-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX10-NEXT: s_lshl_b32 s0, s0, 16
-; GFX10-NEXT: s_cmp_lg_u32 s1, s0
+; GFX10-NEXT: s_cmp_lg_u32 s1, s3
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %bb0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
@@ -127,11 +122,9 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
; GFX11-LABEL: is_local_sgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_lshl_b32 s0, s0, 16
-; GFX11-NEXT: s_cmp_lg_u32 s1, s0
+; GFX11-NEXT: s_cmp_lg_u32 s1, s3
; GFX11-NEXT: s_cbranch_scc1 .LBB1_2
; GFX11-NEXT: ; %bb.1: ; %bb0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
index 5de8a6f2430e..e8c5c38b846b 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
@@ -10,10 +10,8 @@ define void @flat_user(ptr %ptr) {
}
; CHECK-LABEL: {{^}}cast_alloca:
-; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; CHECK: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
-; CHECK: v_lshrrev_b32_e64 v0, 6, s33
-; CHECK-NEXT: v_mov_b32_e32 v1, [[APERTURE]]
+; CHECK: s_mov_b64 s[{{[0-9]+}}:[[HIREG:[0-9]+]]], src_private_base
+; CHECK: v_mov_b32_e32 v1, s[[HIREG]]
; CHECK-NOT: v0
; CHECK-NOT: v1
define void @cast_alloca() {
@@ -26,10 +24,9 @@ define void @cast_alloca() {
@lds = internal unnamed_addr addrspace(3) global i8 undef, align 4
; CHECK-LABEL: {{^}}cast_lds_gv:
-; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; CHECK: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
+; CHECK: s_mov_b64 s[{{[0-9]+}}:[[HIREG:[0-9]+]]], src_shared_base
; CHECK: v_mov_b32_e32 v0, 0
-; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
+; CHECK: v_mov_b32_e32 v1, s[[HIREG]]
; CHECK-NOT: v0
; CHECK-NOT: v1
define void @cast_lds_gv() {
@@ -55,20 +52,18 @@ define void @cast_constant_private_neg1_gv() {
}
; CHECK-LABEL: {{^}}cast_constant_lds_other_gv:
-; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; CHECK: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
+; CHECK: s_mov_b64 s[{{[0-9]+}}:[[HIREG:[0-9]+]]], src_shared_base
; CHECK: v_mov_b32_e32 v0, 0x7b
-; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
+; CHECK: v_mov_b32_e32 v1, s[[HIREG]]
define void @cast_constant_lds_other_gv() {
call void @flat_user(ptr addrspacecast (ptr addrspace(3) inttoptr (i32 123 to ptr addrspace(3)) to ptr))
ret void
}
; CHECK-LABEL: {{^}}cast_constant_private_other_gv:
-; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; CHECK: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
+; CHECK: s_mov_b64 s[{{[0-9]+}}:[[HIREG:[0-9]+]]], src_private_base
; CHECK: v_mov_b32_e32 v0, 0x7b
-; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
+; CHECK: v_mov_b32_e32 v1, s[[HIREG]]
define void @cast_constant_private_other_gv() {
call void @flat_user(ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr))
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index b44616678ac8..b54f1c79735d 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -13,15 +13,14 @@
; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
+
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
-; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
-; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
; GFX9: s_cmp_lg_u32 [[PTR]], -1
-; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[SSRC_SHARED_BASE]], 0
-; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
+; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
+; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
@@ -43,15 +42,14 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
+
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
-; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]]
-; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
+; GFX9-DAG: v_mov_b32_e32 v[[VREG_HIBASE:[0-9]+]], s[[HIBASE]]
; GFX9-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc
-; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
+; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, v[[VREG_HIBASE]], vcc
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
@@ -75,15 +73,12 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
-; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16
-
-; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_private_base
; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; GFX9: s_cmp_lg_u32 [[PTR]], -1
-; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[SSRC_PRIVATE_BASE]], 0
-; GFX9: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
+; GFX9: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
+; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
@@ -213,11 +208,8 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
-; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
-; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]]
-; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
@@ -263,11 +255,8 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
-; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
-; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]]
-; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
index a7584ac5cd78..355a72232d35 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
@@ -134,17 +134,15 @@ define float @syncscope_workgroup_rtn(float* %addr, float %val) #0 {
; GFX90A-LABEL: syncscope_workgroup_rtn:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX90A-NEXT: s_lshl_b32 s4, s4, 16
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1
+; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: ; implicit-def: $vgpr3
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB1_6
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.check.private
-; GFX90A-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX90A-NEXT: s_lshl_b32 s6, s6, 16
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1
+; GFX90A-NEXT: s_mov_b64 s[6:7], src_private_base
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1
; GFX90A-NEXT: ; implicit-def: $vgpr3
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
@@ -206,9 +204,8 @@ define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 {
; GFX908-LABEL: syncscope_workgroup_nortn:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX908-NEXT: s_lshl_b32 s4, s4, 16
-; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1
+; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
+; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX908-NEXT: s_cbranch_execnz .LBB2_3
@@ -220,9 +217,8 @@ define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 {
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_setpc_b64 s[30:31]
; GFX908-NEXT: .LBB2_3: ; %atomicrmw.check.private
-; GFX908-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX908-NEXT: s_lshl_b32 s6, s6, 16
-; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1
+; GFX908-NEXT: s_mov_b64 s[6:7], src_private_base
+; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1
; GFX908-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX908-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GFX908-NEXT: s_cbranch_execz .LBB2_5
@@ -260,9 +256,8 @@ define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 {
; GFX90A-LABEL: syncscope_workgroup_nortn:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX90A-NEXT: s_lshl_b32 s4, s4, 16
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1
+; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execnz .LBB2_3
@@ -274,9 +269,8 @@ define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 {
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
; GFX90A-NEXT: .LBB2_3: ; %atomicrmw.check.private
-; GFX90A-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX90A-NEXT: s_lshl_b32 s6, s6, 16
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1
+; GFX90A-NEXT: s_mov_b64 s[6:7], src_private_base
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GFX90A-NEXT: s_cbranch_execz .LBB2_5
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index e510328bf176..9eed9b5bb26c 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -37,10 +37,13 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[6:7], 0x10
-; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
-; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
-; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]]
+; CIVI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
+
+; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
+; GFX9-DAG: v_mov_b32_e32 v[[VGPR_HI:[0-9]+]], s[[HI]]
+; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[VGPR_HI]]]
+
; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]]
define hidden void @use_queue_ptr_addrspacecast() #1 {
%asc = addrspacecast ptr addrspace(3) inttoptr (i32 16 to ptr addrspace(3)) to ptr
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index 91f3e3581fa2..6e019dc0b16f 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -11,22 +11,20 @@ define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
; FLAT_SCR_OPT-NEXT: s_addc_u32 s1, s1, 0
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; FLAT_SCR_OPT-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; FLAT_SCR_OPT-NEXT: s_mov_b64 s[0:1], src_private_base
; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, 4
-; FLAT_SCR_OPT-NEXT: s_lshl_b32 s0, s0, 16
+; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v1, s1
; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v2, 0
-; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v1, s0
; FLAT_SCR_OPT-NEXT: flat_store_dword v[0:1], v2
; FLAT_SCR_OPT-NEXT: s_waitcnt_vscnt null, 0x0
; FLAT_SCR_OPT-NEXT: s_endpgm
;
; FLAT_SCR_ARCH-LABEL: stack_object_addrspacecast_in_kernel_no_calls:
; FLAT_SCR_ARCH: ; %bb.0:
-; FLAT_SCR_ARCH-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[0:1], src_private_base
; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, 4
-; FLAT_SCR_ARCH-NEXT: s_lshl_b32 s0, s0, 16
+; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v1, s1
; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v2, 0
-; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v1, s0
; FLAT_SCR_ARCH-NEXT: flat_store_dword v[0:1], v2
; FLAT_SCR_ARCH-NEXT: s_waitcnt_vscnt null, 0x0
; FLAT_SCR_ARCH-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
index 03cdd85a2960..85ba5c90e44f 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
@@ -80,19 +80,17 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add
; GFX9V3-LABEL: addrspacecast:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V3-NEXT: s_lshl_b32 s2, s2, 16
+; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
+; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V3-NEXT: v_mov_b32_e32 v4, 1
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1
+; GFX9V3-NEXT: s_cselect_b32 s2, s3, 0
; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V3-NEXT: s_cselect_b32 s2, s2, 0
-; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16
; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1
+; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
; GFX9V3-NEXT: v_mov_b32_e32 v1, s2
-; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0
+; GFX9V3-NEXT: s_cselect_b32 s0, s5, 0
; GFX9V3-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V3-NEXT: v_mov_b32_e32 v2, s1
; GFX9V3-NEXT: v_mov_b32_e32 v3, s0
@@ -106,19 +104,17 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V4-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V4-NEXT: s_lshl_b32 s2, s2, 16
+; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
+; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V4-NEXT: v_mov_b32_e32 v4, 1
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
+; GFX9V4-NEXT: s_cselect_b32 s2, s3, 0
; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0
-; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
-; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V4-NEXT: s_cselect_b32 s2, s2, 0
-; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
+; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: v_mov_b32_e32 v1, s2
-; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0
+; GFX9V4-NEXT: s_cselect_b32 s0, s5, 0
; GFX9V4-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v2, s1
; GFX9V4-NEXT: v_mov_b32_e32 v3, s0
@@ -132,19 +128,17 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add
; GFX9V5-LABEL: addrspacecast:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V5-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V5-NEXT: s_lshl_b32 s2, s2, 16
+; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
+; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V5-NEXT: v_mov_b32_e32 v4, 1
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
+; GFX9V5-NEXT: s_cselect_b32 s2, s3, 0
; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0
-; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
-; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V5-NEXT: s_cselect_b32 s2, s2, 0
-; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
+; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: v_mov_b32_e32 v1, s2
-; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0
+; GFX9V5-NEXT: s_cselect_b32 s0, s5, 0
; GFX9V5-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v2, s1
; GFX9V5-NEXT: v_mov_b32_e32 v3, s0
@@ -200,11 +194,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) {
;
; GFX9V3-LABEL: llvm_amdgcn_is_shared:
; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dword s0, s[4:5], 0x4
-; GFX9V3-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V3-NEXT: s_lshl_b32 s1, s1, 16
+; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4
+; GFX9V3-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_cmp_eq_u32 s0, s1
+; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
@@ -213,11 +206,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) {
;
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
-; GFX9V4-NEXT: s_load_dword s0, s[4:5], 0x4
-; GFX9V4-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V4-NEXT: s_lshl_b32 s1, s1, 16
+; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4
+; GFX9V4-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1
+; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@@ -226,11 +218,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) {
;
; GFX9V5-LABEL: llvm_amdgcn_is_shared:
; GFX9V5: ; %bb.0:
-; GFX9V5-NEXT: s_load_dword s0, s[4:5], 0x4
-; GFX9V5-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9V5-NEXT: s_lshl_b32 s1, s1, 16
+; GFX9V5-NEXT: s_load_dword s2, s[4:5], 0x4
+; GFX9V5-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1
+; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
@@ -281,11 +272,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) {
;
; GFX9V3-LABEL: llvm_amdgcn_is_private:
; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dword s0, s[4:5], 0x4
-; GFX9V3-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V3-NEXT: s_lshl_b32 s1, s1, 16
+; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4
+; GFX9V3-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_cmp_eq_u32 s0, s1
+; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
@@ -294,11 +284,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) {
;
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
-; GFX9V4-NEXT: s_load_dword s0, s[4:5], 0x4
-; GFX9V4-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V4-NEXT: s_lshl_b32 s1, s1, 16
+; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4
+; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1
+; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@@ -307,11 +296,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) {
;
; GFX9V5-LABEL: llvm_amdgcn_is_private:
; GFX9V5: ; %bb.0:
-; GFX9V5-NEXT: s_load_dword s0, s[4:5], 0x4
-; GFX9V5-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9V5-NEXT: s_lshl_b32 s1, s1, 16
+; GFX9V5-NEXT: s_load_dword s2, s[4:5], 0x4
+; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1
+; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
index 7760c41ce8f4..2a92101203eb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
@@ -4,9 +4,11 @@
; GCN-LABEL: {{^}}is_private_vgpr:
; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
-; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16
-; GCN: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
+; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
+
+; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
+; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
+
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
%id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -23,13 +25,15 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
; GCN-LABEL: {{^}}is_private_sgpr:
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
-; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}
-; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16
-; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
+; CI: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
+
+; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
+; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
+
; GCN: s_cbranch_vccnz
define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
%val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
index 69e49064c1c4..1a10a9c15dc5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
@@ -4,10 +4,11 @@
; GCN-LABEL: {{^}}is_local_vgpr:
; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
-; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16
-; GCN: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
+; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
+; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
+
+; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
%id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -24,13 +25,14 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; GCN-LABEL: {{^}}is_local_sgpr:
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
-; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
-; GFX9-DAG: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16
; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}
-; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
+; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
+; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
+
+; CI: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
; GCN: s_cbranch_vccnz
define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
More information about the llvm-commits
mailing list