[llvm] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_BLOCK_ADDR and G_GLOBAL_VALUE (PR #165340)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 08:49:47 PST 2025
https://github.com/vangthao95 updated https://github.com/llvm/llvm-project/pull/165340
>From 3e62f0298e873c3960185a77e795170628531a32 Mon Sep 17 00:00:00 2001
From: Vang Thao <vthao at amd.com>
Date: Mon, 27 Oct 2025 17:30:25 -0700
Subject: [PATCH 1/2] [AMDGPU][GlobalISel] Add RegBankLegalize support for
G_BLOCK_ADDR and G_GLOBAL_VALUE
---
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 4 ++++
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 ++++
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h | 1 +
llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll | 2 +-
.../CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll | 8 ++++----
.../AMDGPU/GlobalISel/regbankselect-block-addr.mir | 2 +-
6 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 540756653dd22..023253b3cb0bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -841,6 +841,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case Sgpr128:
case Vgpr128:
return LLT::scalar(128);
+ case SgprP0:
case VgprP0:
return LLT::pointer(0, 64);
case SgprP1:
@@ -940,6 +941,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case Sgpr32_WF:
case Sgpr64:
case Sgpr128:
+ case SgprP0:
case SgprP1:
case SgprP3:
case SgprP4:
@@ -1022,6 +1024,7 @@ void RegBankLegalizeHelper::applyMappingDst(
case Sgpr32:
case Sgpr64:
case Sgpr128:
+ case SgprP0:
case SgprP1:
case SgprP3:
case SgprP4:
@@ -1163,6 +1166,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
case Sgpr32:
case Sgpr64:
case Sgpr128:
+ case SgprP0:
case SgprP1:
case SgprP3:
case SgprP4:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index a67b12a22589c..85666beb3e9bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -903,6 +903,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});
+ addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
+
+ addRulesForGOpcs({G_GLOBAL_VALUE}).Any({{UniP3}, {{SgprP3}, {}}});
+
bool hasSALUFloat = ST->hasSALUFloatInsts();
addRulesForGOpcs({G_FADD}, Standard)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 93e0efda77fdd..9c85b6531e421 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -134,6 +134,7 @@ enum RegBankLLTMappingApplyID {
Sgpr32,
Sgpr64,
Sgpr128,
+ SgprP0,
SgprP1,
SgprP3,
SgprP4,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
index 82886ab9e7d55..e1ac8ba5e6db4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
; FIXME: Merge with DAG test
@lds.external = external unnamed_addr addrspace(3) global [0 x i32]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
index cabb37c330b4a..3396eaedf359e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel < %s 2>&1 | FileCheck %s
-; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s
; CHECK: error: lds: unsupported initializer for address space
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir
index a50c7fe0748b8..fc86dd884fac0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s
+# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
--- |
>From 59638178c9e026c5b370d1ab3da51820cf7746b3 Mon Sep 17 00:00:00 2001
From: Vang Thao <vthao at amd.com>
Date: Fri, 7 Nov 2025 11:29:28 -0800
Subject: [PATCH 2/2] Add more rules for G_GLOBA_VALUE and add .ll test for
G_GLOBAL_VALUE
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 5 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 11 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 3 +
.../GlobalISel/global-value-addrspaces.ll | 104 ++++++++++++++++++
4 files changed, 122 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 023253b3cb0bf..664a511efd973 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -856,6 +856,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case SgprP5:
case VgprP5:
return LLT::pointer(5, 32);
+ case SgprP8:
+ return LLT::pointer(8, 128);
case SgprV2S16:
case VgprV2S16:
case UniInVgprV2S16:
@@ -946,6 +948,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case SgprP3:
case SgprP4:
case SgprP5:
+ case SgprP8:
case SgprPtr32:
case SgprPtr64:
case SgprPtr128:
@@ -1029,6 +1032,7 @@ void RegBankLegalizeHelper::applyMappingDst(
case SgprP3:
case SgprP4:
case SgprP5:
+ case SgprP8:
case SgprV2S16:
case SgprV2S32:
case SgprV4S32:
@@ -1171,6 +1175,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
case SgprP3:
case SgprP4:
case SgprP5:
+ case SgprP8:
case SgprV2S16:
case SgprV2S32:
case SgprV4S32: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 85666beb3e9bc..6c04fabdbbde7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -66,6 +66,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return MRI.getType(Reg) == LLT::pointer(4, 64);
case P5:
return MRI.getType(Reg) == LLT::pointer(5, 32);
+ case P8:
+ return MRI.getType(Reg) == LLT::pointer(8, 128);
case Ptr32:
return isAnyPtr(MRI.getType(Reg), 32);
case Ptr64:
@@ -108,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
case UniP5:
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
+ case UniP8:
+ return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
case UniPtr32:
return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
case UniPtr64:
@@ -905,7 +909,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
- addRulesForGOpcs({G_GLOBAL_VALUE}).Any({{UniP3}, {{SgprP3}, {}}});
+ addRulesForGOpcs({G_GLOBAL_VALUE})
+ .Any({{UniP0}, {{SgprP0}, {}}})
+ .Any({{UniP1}, {{SgprP1}, {}}})
+ .Any({{UniP3}, {{SgprP3}, {}}})
+ .Any({{UniP4}, {{SgprP4}, {}}})
+ .Any({{UniP8}, {{SgprP8}, {}}});
bool hasSALUFloat = ST->hasSALUFloatInsts();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 9c85b6531e421..fb392d7ae332b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -63,6 +63,7 @@ enum UniformityLLTOpPredicateID {
P3,
P4,
P5,
+ P8,
Ptr32,
Ptr64,
Ptr128,
@@ -72,6 +73,7 @@ enum UniformityLLTOpPredicateID {
UniP3,
UniP4,
UniP5,
+ UniP8,
UniPtr32,
UniPtr64,
UniPtr128,
@@ -139,6 +141,7 @@ enum RegBankLLTMappingApplyID {
SgprP3,
SgprP4,
SgprP5,
+ SgprP8,
SgprPtr32,
SgprPtr64,
SgprPtr128,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll
new file mode 100644
index 0000000000000..cf9524b860fd2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+
+ at flat = external global i32, align 4
+ at global = external addrspace(1) global i32, align 4
+ at lds = addrspace(3) global i32 poison, align 4
+ at constant = external addrspace(4) constant i32, align 4
+ at buf = external addrspace(8) global i8
+
+define ptr @global_value_as0_external() {
+; GCN-LABEL: global_value_as0_external:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, flat at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, flat at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ ret ptr @flat
+}
+
+define ptr addrspace(1) @global_value_as1_external() {
+; GCN-LABEL: global_value_as1_external:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, global at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, global at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ ret ptr addrspace(1) @global
+}
+
+define ptr addrspace(4) @global_value_as4_external() {
+; GCN-LABEL: global_value_as4_external:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, constant at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, constant at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ ret ptr addrspace(4) @constant
+}
+
+define amdgpu_kernel void @global_value_as3_lds_kernel(ptr addrspace(1) %out) {
+; GCN-LABEL: global_value_as3_lds_kernel:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: global_store_dword v0, v0, s[0:1]
+; GCN-NEXT: s_endpgm
+ %addr = ptrtoint ptr addrspace(3) @lds to i32
+ store i32 %addr, ptr addrspace(1) %out
+ ret void
+}
+
+define void @global_value_as8_buffer_store(i32 %val) {
+; GCN-LABEL: global_value_as8_buffer_store:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_getpc_b64 s[8:9]
+; GCN-NEXT: s_add_u32 s8, s8, buf at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s9, s9, buf at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %val, ptr addrspace(8) @buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define i32 @global_value_as8_buffer_load(i32 %offset) {
+; GCN-LABEL: global_value_as8_buffer_load:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_getpc_b64 s[8:9]
+; GCN-NEXT: s_add_u32 s8, s8, buf at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s9, s9, buf at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) @buf, i32 %offset, i32 0, i32 0)
+ ret i32 %val
+}
+
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) #0
+declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg) #1
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
More information about the llvm-commits
mailing list