[llvm] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_BLOCK_ADDR and G_GLOBAL_VALUE (PR #165340)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 10 08:49:47 PST 2025


https://github.com/vangthao95 updated https://github.com/llvm/llvm-project/pull/165340

>From 3e62f0298e873c3960185a77e795170628531a32 Mon Sep 17 00:00:00 2001
From: Vang Thao <vthao at amd.com>
Date: Mon, 27 Oct 2025 17:30:25 -0700
Subject: [PATCH 1/2] [AMDGPU][GlobalISel] Add RegBankLegalize support for
 G_BLOCK_ADDR and G_GLOBAL_VALUE

---
 llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp    | 4 ++++
 llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp     | 4 ++++
 llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h       | 1 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll         | 2 +-
 .../CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll     | 8 ++++----
 .../AMDGPU/GlobalISel/regbankselect-block-addr.mir        | 2 +-
 6 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 540756653dd22..023253b3cb0bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -841,6 +841,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr128:
   case Vgpr128:
     return LLT::scalar(128);
+  case SgprP0:
   case VgprP0:
     return LLT::pointer(0, 64);
   case SgprP1:
@@ -940,6 +941,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case Sgpr32_WF:
   case Sgpr64:
   case Sgpr128:
+  case SgprP0:
   case SgprP1:
   case SgprP3:
   case SgprP4:
@@ -1022,6 +1024,7 @@ void RegBankLegalizeHelper::applyMappingDst(
     case Sgpr32:
     case Sgpr64:
     case Sgpr128:
+    case SgprP0:
     case SgprP1:
     case SgprP3:
     case SgprP4:
@@ -1163,6 +1166,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
     case Sgpr32:
     case Sgpr64:
     case Sgpr128:
+    case SgprP0:
     case SgprP1:
     case SgprP3:
     case SgprP4:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index a67b12a22589c..85666beb3e9bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -903,6 +903,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
 
   addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});
 
+  addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
+
+  addRulesForGOpcs({G_GLOBAL_VALUE}).Any({{UniP3}, {{SgprP3}, {}}});
+
   bool hasSALUFloat = ST->hasSALUFloatInsts();
 
   addRulesForGOpcs({G_FADD}, Standard)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 93e0efda77fdd..9c85b6531e421 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -134,6 +134,7 @@ enum RegBankLLTMappingApplyID {
   Sgpr32,
   Sgpr64,
   Sgpr128,
+  SgprP0,
   SgprP1,
   SgprP3,
   SgprP4,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
index 82886ab9e7d55..e1ac8ba5e6db4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
 ; FIXME: Merge with DAG test
 
 @lds.external = external unnamed_addr addrspace(3) global [0 x i32]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
index cabb37c330b4a..3396eaedf359e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel < %s 2>&1 | FileCheck %s
-; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s
 
 ; CHECK: error: lds: unsupported initializer for address space
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir
index a50c7fe0748b8..fc86dd884fac0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s
+# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
 
 --- |
 

>From 59638178c9e026c5b370d1ab3da51820cf7746b3 Mon Sep 17 00:00:00 2001
From: Vang Thao <vthao at amd.com>
Date: Fri, 7 Nov 2025 11:29:28 -0800
Subject: [PATCH 2/2] Add more rules for G_GLOBA_VALUE and add .ll test for
 G_GLOBAL_VALUE

---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp    |   5 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |  11 +-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h       |   3 +
 .../GlobalISel/global-value-addrspaces.ll     | 104 ++++++++++++++++++
 4 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 023253b3cb0bf..664a511efd973 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -856,6 +856,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
   case SgprP5:
   case VgprP5:
     return LLT::pointer(5, 32);
+  case SgprP8:
+    return LLT::pointer(8, 128);
   case SgprV2S16:
   case VgprV2S16:
   case UniInVgprV2S16:
@@ -946,6 +948,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case SgprP3:
   case SgprP4:
   case SgprP5:
+  case SgprP8:
   case SgprPtr32:
   case SgprPtr64:
   case SgprPtr128:
@@ -1029,6 +1032,7 @@ void RegBankLegalizeHelper::applyMappingDst(
     case SgprP3:
     case SgprP4:
     case SgprP5:
+    case SgprP8:
     case SgprV2S16:
     case SgprV2S32:
     case SgprV4S32:
@@ -1171,6 +1175,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
     case SgprP3:
     case SgprP4:
     case SgprP5:
+    case SgprP8:
     case SgprV2S16:
     case SgprV2S32:
     case SgprV4S32: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 85666beb3e9bc..6c04fabdbbde7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -66,6 +66,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return MRI.getType(Reg) == LLT::pointer(4, 64);
   case P5:
     return MRI.getType(Reg) == LLT::pointer(5, 32);
+  case P8:
+    return MRI.getType(Reg) == LLT::pointer(8, 128);
   case Ptr32:
     return isAnyPtr(MRI.getType(Reg), 32);
   case Ptr64:
@@ -108,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
   case UniP5:
     return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
+  case UniP8:
+    return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
   case UniPtr32:
     return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
   case UniPtr64:
@@ -905,7 +909,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
 
   addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
 
-  addRulesForGOpcs({G_GLOBAL_VALUE}).Any({{UniP3}, {{SgprP3}, {}}});
+  addRulesForGOpcs({G_GLOBAL_VALUE})
+      .Any({{UniP0}, {{SgprP0}, {}}})
+      .Any({{UniP1}, {{SgprP1}, {}}})
+      .Any({{UniP3}, {{SgprP3}, {}}})
+      .Any({{UniP4}, {{SgprP4}, {}}})
+      .Any({{UniP8}, {{SgprP8}, {}}});
 
   bool hasSALUFloat = ST->hasSALUFloatInsts();
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 9c85b6531e421..fb392d7ae332b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -63,6 +63,7 @@ enum UniformityLLTOpPredicateID {
   P3,
   P4,
   P5,
+  P8,
   Ptr32,
   Ptr64,
   Ptr128,
@@ -72,6 +73,7 @@ enum UniformityLLTOpPredicateID {
   UniP3,
   UniP4,
   UniP5,
+  UniP8,
   UniPtr32,
   UniPtr64,
   UniPtr128,
@@ -139,6 +141,7 @@ enum RegBankLLTMappingApplyID {
   SgprP3,
   SgprP4,
   SgprP5,
+  SgprP8,
   SgprPtr32,
   SgprPtr64,
   SgprPtr128,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll
new file mode 100644
index 0000000000000..cf9524b860fd2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+
+ at flat = external global i32, align 4
+ at global = external addrspace(1) global i32, align 4
+ at lds = addrspace(3) global i32 poison, align 4
+ at constant = external addrspace(4) constant i32, align 4
+ at buf = external addrspace(8) global i8
+
+define ptr @global_value_as0_external() {
+; GCN-LABEL: global_value_as0_external:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, flat at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, flat at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s5
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  ret ptr @flat
+}
+
+define ptr addrspace(1) @global_value_as1_external() {
+; GCN-LABEL: global_value_as1_external:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, global at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, global at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s5
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  ret ptr addrspace(1) @global
+}
+
+define ptr addrspace(4) @global_value_as4_external() {
+; GCN-LABEL: global_value_as4_external:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, constant at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, constant at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s5
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  ret ptr addrspace(4) @constant
+}
+
+define amdgpu_kernel void @global_value_as3_lds_kernel(ptr addrspace(1) %out) {
+; GCN-LABEL: global_value_as3_lds_kernel:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_store_dword v0, v0, s[0:1]
+; GCN-NEXT:    s_endpgm
+  %addr = ptrtoint ptr addrspace(3) @lds to i32
+  store i32 %addr, ptr addrspace(1) %out
+  ret void
+}
+
+define void @global_value_as8_buffer_store(i32 %val) {
+; GCN-LABEL: global_value_as8_buffer_store:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_getpc_b64 s[8:9]
+; GCN-NEXT:    s_add_u32 s8, s8, buf at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s9, s9, buf at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %val, ptr addrspace(8) @buf, i32 0, i32 0, i32 0)
+  ret void
+}
+
+define i32 @global_value_as8_buffer_load(i32 %offset) {
+; GCN-LABEL: global_value_as8_buffer_load:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_getpc_b64 s[8:9]
+; GCN-NEXT:    s_add_u32 s8, s8, buf at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s9, s9, buf at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %val = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) @buf, i32 %offset, i32 0, i32 0)
+  ret i32 %val
+}
+
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) #0
+declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg) #1
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }



More information about the llvm-commits mailing list