[llvm-branch-commits] [llvm] [AMDGPU] Fold constant offsets into named barrier addresses (PR #205216)

Chinmay Deshpande via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jun 22 19:48:10 PDT 2026


https://github.com/chinmaydd updated https://github.com/llvm/llvm-project/pull/205216

>From 77879b441d4698fddb7048e95d12f1cc857f93c5 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Mon, 22 Jun 2026 22:44:58 -0400
Subject: [PATCH] [AMDGPU] Fold constant offsets into named barrier addresses

Allow isOffsetFoldingLegal to fold a constant offset into an LDS
named-barrier global, and include the node offset when materializing the
LDS address in LowerGlobalAddress. s_barrier_signal_var on a GEP'd named
barrier now selects the immediate form, matching a bare global and GlobalISel.
With object linking the offset folds into the relocation addend.

Change-Id: I639bc723eb001573585cc05d0ad19f2773054f21
Assisted-by: Cursor
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp    | 16 +++++++++++-----
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp        | 13 ++++++++++++-
 .../CodeGen/AMDGPU/s-barrier-signal-var-gep.ll   |  7 ++-----
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index be6fd4d243252..31b74ee44b0be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1551,7 +1551,11 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI,
         unsigned BarCnt = cast<GlobalVariable>(GV)->getGlobalSize(DL) / 16;
         MFI->recordNumNamedBarriers(Address.value(), BarCnt);
       }
-      return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
+      assert((G->getOffset() == 0 ||
+              (IsNamedBarrier && G->getOffset() % 16 == 0)) &&
+             "named barrier offset must land on a barrier object");
+      return DAG.getConstant(*Address + G->getOffset(), SDLoc(Op),
+                             Op.getValueType());
     } else if (IsNamedBarrier) {
       llvm_unreachable("named barrier should have an assigned address");
     }
@@ -1580,15 +1584,17 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI,
       return DAG.getPOISON(Op.getValueType());
     }
 
-    // XXX: What does the value of G->getOffset() mean?
-    assert(G->getOffset() == 0 &&
-         "Do not know what to do with an non-zero offset");
+    assert((G->getOffset() == 0 ||
+            (AMDGPU::isNamedBarrier(*cast<GlobalVariable>(GV)) &&
+             G->getOffset() % 16 == 0)) &&
+           "named barrier offset must land on a barrier object");
 
     // TODO: We could emit code to handle the initialization somewhere.
     // We ignore the initializer for now and legalize it to allow selection.
     // The initializer will anyway get errored out during assembly emission.
     unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
-    return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
+    return DAG.getConstant(Offset + G->getOffset(), SDLoc(Op),
+                           Op.getValueType());
   }
   return SDValue();
 }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 076bd7c97c44c..fa959913c8d7a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPULaneMaskUtils.h"
+#include "AMDGPUMemoryUtils.h"
 #include "AMDGPUSelectionDAGInfo.h"
 #include "AMDGPUTargetMachine.h"
 #include "GCNSubtarget.h"
@@ -9837,6 +9838,13 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
 
 bool SITargetLowering::isOffsetFoldingLegal(
     const GlobalAddressSDNode *GA) const {
+  // Named barriers have fixed, non-relocated LDS addresses, so a constant
+  // offset into an array of them can be folded into the address.
+  if (GA->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+    const auto *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+    return GV && AMDGPU::isNamedBarrier(*GV);
+  }
+
   // OSes that use ELF REL relocations (instead of RELA) can only store a
   // 32-bit addend in the instruction, so it is not safe to allow offset folding
   // which can create arbitrary 64-bit addends. (This is only a problem for
@@ -12570,8 +12578,11 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
         BarVal = C->getZExtValue();
       else if (auto *GA = dyn_cast<GlobalAddressSDNode>(BarOp))
         if (auto Addr = AMDGPUMachineFunctionInfo::getLDSAbsoluteAddress(
-                *GA->getGlobal()))
+                *GA->getGlobal())) {
+          assert(GA->getOffset() % 16 == 0 &&
+                 "named barrier offset must land on a barrier object");
           BarVal = *Addr + GA->getOffset();
+        }
 
       if (BarVal) {
         unsigned BarID = (*BarVal >> 4) & 0x3F;
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll
index ed40f669f0d71..b12950536057c 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll
@@ -68,8 +68,7 @@ define amdgpu_kernel void @signal_var_bar1() {
 ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
 ; SDAG-NEXT:    s_mov_b32 m0, 0x400002
 ; SDAG-NEXT:    s_barrier_init m0
-; SDAG-NEXT:    s_mov_b32 m0, 2
-; SDAG-NEXT:    s_barrier_signal m0
+; SDAG-NEXT:    s_barrier_signal 2
 ; SDAG-NEXT:    s_barrier_wait 1
 ; SDAG-NEXT:    s_endpgm
 ;
@@ -85,11 +84,9 @@ define amdgpu_kernel void @signal_var_bar1() {
 ; OBJ-SDAG-LABEL: signal_var_bar1:
 ; OBJ-SDAG:       ; %bb.0:
 ; OBJ-SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
-; OBJ-SDAG-NEXT:    s_add_co_i32 s0, __amdgpu_named_barrier.bars.cebb3cd1832bf92a6cb51d2898ea54dd at abs32@lo, 16
+; OBJ-SDAG-NEXT:    s_lshr_b32 s0, __amdgpu_named_barrier.bars.cebb3cd1832bf92a6cb51d2898ea54dd at abs32@lo+16, 4
 ; OBJ-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; OBJ-SDAG-NEXT:    s_lshr_b32 s0, s0, 4
 ; OBJ-SDAG-NEXT:    s_and_b32 s0, s0, 63
-; OBJ-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; OBJ-SDAG-NEXT:    s_or_b32 m0, 0x400000, s0
 ; OBJ-SDAG-NEXT:    s_barrier_init m0
 ; OBJ-SDAG-NEXT:    s_mov_b32 m0, s0



More information about the llvm-branch-commits mailing list