[llvm-branch-commits] [llvm] [AMDGPU] Fold constant offsets into named barrier addresses (PR #205216)
Chinmay Deshpande via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jun 22 19:48:10 PDT 2026
https://github.com/chinmaydd updated https://github.com/llvm/llvm-project/pull/205216
>From 77879b441d4698fddb7048e95d12f1cc857f93c5 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Mon, 22 Jun 2026 22:44:58 -0400
Subject: [PATCH] [AMDGPU] Fold constant offsets into named barrier addresses
Allow isOffsetFoldingLegal to fold a constant offset into an LDS
named-barrier global, and include the node offset when materializing the
LDS address in LowerGlobalAddress. s_barrier_signal_var on a GEP'd named
barrier now selects the immediate form, matching a bare global and GlobalISel.
With object linking the offset folds into the relocation addend.
Change-Id: I639bc723eb001573585cc05d0ad19f2773054f21
Assisted-by: Cursor
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 +++++++++++-----
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 13 ++++++++++++-
.../CodeGen/AMDGPU/s-barrier-signal-var-gep.ll | 7 ++-----
3 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index be6fd4d243252..31b74ee44b0be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1551,7 +1551,11 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI,
unsigned BarCnt = cast<GlobalVariable>(GV)->getGlobalSize(DL) / 16;
MFI->recordNumNamedBarriers(Address.value(), BarCnt);
}
- return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
+ assert((G->getOffset() == 0 ||
+ (IsNamedBarrier && G->getOffset() % 16 == 0)) &&
+ "named barrier offset must land on a barrier object");
+ return DAG.getConstant(*Address + G->getOffset(), SDLoc(Op),
+ Op.getValueType());
} else if (IsNamedBarrier) {
llvm_unreachable("named barrier should have an assigned address");
}
@@ -1580,15 +1584,17 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI,
return DAG.getPOISON(Op.getValueType());
}
- // XXX: What does the value of G->getOffset() mean?
- assert(G->getOffset() == 0 &&
- "Do not know what to do with an non-zero offset");
+ assert((G->getOffset() == 0 ||
+ (AMDGPU::isNamedBarrier(*cast<GlobalVariable>(GV)) &&
+ G->getOffset() % 16 == 0)) &&
+ "named barrier offset must land on a barrier object");
// TODO: We could emit code to handle the initialization somewhere.
// We ignore the initializer for now and legalize it to allow selection.
// The initializer will anyway get errored out during assembly emission.
unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
- return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
+ return DAG.getConstant(Offset + G->getOffset(), SDLoc(Op),
+ Op.getValueType());
}
return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 076bd7c97c44c..fa959913c8d7a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPULaneMaskUtils.h"
+#include "AMDGPUMemoryUtils.h"
#include "AMDGPUSelectionDAGInfo.h"
#include "AMDGPUTargetMachine.h"
#include "GCNSubtarget.h"
@@ -9837,6 +9838,13 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
bool SITargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
+ // Named barriers have fixed, non-relocated LDS addresses, so a constant
+ // offset into an array of them can be folded into the address.
+ if (GA->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ const auto *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+ return GV && AMDGPU::isNamedBarrier(*GV);
+ }
+
// OSes that use ELF REL relocations (instead of RELA) can only store a
// 32-bit addend in the instruction, so it is not safe to allow offset folding
// which can create arbitrary 64-bit addends. (This is only a problem for
@@ -12570,8 +12578,11 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
BarVal = C->getZExtValue();
else if (auto *GA = dyn_cast<GlobalAddressSDNode>(BarOp))
if (auto Addr = AMDGPUMachineFunctionInfo::getLDSAbsoluteAddress(
- *GA->getGlobal()))
+ *GA->getGlobal())) {
+ assert(GA->getOffset() % 16 == 0 &&
+ "named barrier offset must land on a barrier object");
BarVal = *Addr + GA->getOffset();
+ }
if (BarVal) {
unsigned BarID = (*BarVal >> 4) & 0x3F;
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll
index ed40f669f0d71..b12950536057c 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-signal-var-gep.ll
@@ -68,8 +68,7 @@ define amdgpu_kernel void @signal_var_bar1() {
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; SDAG-NEXT: s_mov_b32 m0, 0x400002
; SDAG-NEXT: s_barrier_init m0
-; SDAG-NEXT: s_mov_b32 m0, 2
-; SDAG-NEXT: s_barrier_signal m0
+; SDAG-NEXT: s_barrier_signal 2
; SDAG-NEXT: s_barrier_wait 1
; SDAG-NEXT: s_endpgm
;
@@ -85,11 +84,9 @@ define amdgpu_kernel void @signal_var_bar1() {
; OBJ-SDAG-LABEL: signal_var_bar1:
; OBJ-SDAG: ; %bb.0:
; OBJ-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
-; OBJ-SDAG-NEXT: s_add_co_i32 s0, __amdgpu_named_barrier.bars.cebb3cd1832bf92a6cb51d2898ea54dd at abs32@lo, 16
+; OBJ-SDAG-NEXT: s_lshr_b32 s0, __amdgpu_named_barrier.bars.cebb3cd1832bf92a6cb51d2898ea54dd at abs32@lo+16, 4
; OBJ-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; OBJ-SDAG-NEXT: s_lshr_b32 s0, s0, 4
; OBJ-SDAG-NEXT: s_and_b32 s0, s0, 63
-; OBJ-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; OBJ-SDAG-NEXT: s_or_b32 m0, 0x400000, s0
; OBJ-SDAG-NEXT: s_barrier_init m0
; OBJ-SDAG-NEXT: s_mov_b32 m0, s0
More information about the llvm-branch-commits
mailing list