[llvm] [DirectX] ForwardHandle needs to check if globals were stored on allocas (PR #151751)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 4 08:53:01 PDT 2025
https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/151751
>From bf7741b90bc0e3af482f1e233cd76313f5c322aa Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 1 Aug 2025 14:45:24 -0400
Subject: [PATCH 1/2] [DirectX] ForwardHandle needs to check if globals were
stored on allocas
fixes #140819
SROA pass is making it so that some globals get loaded into stack
allocations. This means we find an alloca where we use to expect a load
and now need to walk an alloca -> store -> maybe load chain before
we find the global. Doing so fixes All but two instances of #137715
And fixes every instance of `Load of "8.sroa.0" is not a global resource handle
we are currently seeing in the DML shaders.
---
.../DirectX/DXILForwardHandleAccesses.cpp | 35 ++++++++++++++++-
...e-140819_allow_forward_handle_on_alloca.ll | 39 +++++++++++++++++++
2 files changed, 72 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
diff --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
index 73abfe7c48584..747472b1a4bc1 100644
--- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
+++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
@@ -87,17 +87,48 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
for (LoadInst *LI : LoadsToProcess) {
Value *V = LI->getPointerOperand();
- auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ auto *GV = dyn_cast<GlobalVariable>(V);
// If we didn't find the global, we may need to walk through a level of
// indirection. This generally happens at -O0.
- if (!GV)
+ if (!GV) {
if (auto *NestedLI = dyn_cast<LoadInst>(V)) {
BasicBlock::iterator BBI(NestedLI);
Value *Loaded = FindAvailableLoadedValue(
NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr);
GV = dyn_cast_or_null<GlobalVariable>(Loaded);
+ } else if (auto *NestedAlloca = dyn_cast<AllocaInst>(V)) {
+ for (auto &Use : NestedAlloca->uses()) {
+ auto *Store = dyn_cast<StoreInst>(Use.getUser());
+ if (!Store)
+ continue;
+
+ Value *StoredVal = Store->getValueOperand();
+ if (!StoredVal)
+ continue;
+
+ // Try direct global match
+ GV = dyn_cast<GlobalVariable>(StoredVal);
+ if (GV)
+ break;
+
+ // If it's a load, check its source
+ if (auto *Load = dyn_cast<LoadInst>(StoredVal)) {
+ GV = dyn_cast<GlobalVariable>(Load->getPointerOperand());
+ if (GV)
+ break;
+
+ // Try to find available loaded value
+ BasicBlock::iterator BBI(Load);
+ Value *Loaded = FindAvailableLoadedValue(Load, Load->getParent(),
+ BBI, 0, nullptr, nullptr);
+ GV = dyn_cast<GlobalVariable>(Loaded);
+ if (GV)
+ break;
+ }
+ }
}
+ }
auto It = HandleMap.find(GV);
if (It == HandleMap.end()) {
diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
new file mode 100644
index 0000000000000..0c1ccb50caee7
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s
+
+%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
+ at _ZL4dest = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
+ at .str = private unnamed_addr constant [5 x i8] c"dest\00", align 1
+
+
+; NOTE: intent of this test is to confirm load target("dx.RawBuffer", i32, 1, 0)
+; is replaced with call @llvm.dx.resource.getpointer
+define void @CSMain() local_unnamed_addr {
+; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @_ZL4dest, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZL4dest, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP_I1_SROA_0]])
+; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP_I1_SROA_0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %agg.tmp.i1.sroa.0 = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+ %0 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ store target("dx.RawBuffer", i32, 1, 0) %0, ptr @_ZL4dest, align 4
+ %1 = tail call i32 @llvm.dx.thread.id(i32 0)
+ %2 = load i32, ptr @_ZL4dest, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %agg.tmp.i1.sroa.0)
+ store i32 %2, ptr %agg.tmp.i1.sroa.0, align 8
+ %agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0. = load target("dx.RawBuffer", i32, 1, 0), ptr %agg.tmp.i1.sroa.0, align 8
+ %3 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0., i32 %1)
+ store i32 0, ptr %3, align 4
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %agg.tmp.i1.sroa.0)
+ ret void
+}
>From ce5d9f7fb21e58b5b9a454d46c3b8343373dd43d Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Mon, 4 Aug 2025 11:52:44 -0400
Subject: [PATCH 2/2] address pr coments
---
.../DirectX/DXILForwardHandleAccesses.cpp | 4 ++-
...e-140819_allow_forward_handle_on_alloca.ll | 34 ++++++++-----------
2 files changed, 17 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
index 747472b1a4bc1..306db6a558779 100644
--- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
+++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
@@ -118,7 +118,9 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
if (GV)
break;
- // Try to find available loaded value
+ // If loading from an unmodified stack copy of the global, reuse the
+ // global's value. Note: we are just repeating what we are doing for
+ // the load case for the alloca store pattern.
BasicBlock::iterator BBI(Load);
Value *Loaded = FindAvailableLoadedValue(Load, Load->getParent(),
BBI, 0, nullptr, nullptr);
diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
index 0c1ccb50caee7..7c0813b0b4e36 100644
--- a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
+++ b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
@@ -2,8 +2,8 @@
; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s
%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
- at _ZL4dest = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
- at .str = private unnamed_addr constant [5 x i8] c"dest\00", align 1
+ at global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
+ at name = private unnamed_addr constant [5 x i8] c"dest\00", align 1
; NOTE: intent of this test is to confirm load target("dx.RawBuffer", i32, 1, 0)
@@ -12,28 +12,22 @@ define void @CSMain() local_unnamed_addr {
; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
-; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @.str)
-; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @_ZL4dest, align 4
-; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZL4dest, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP_I1_SROA_0]])
+; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
+; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @global, align 4
; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0)
; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP_I1_SROA_0]])
; CHECK-NEXT: ret void
;
entry:
- %agg.tmp.i1.sroa.0 = alloca target("dx.RawBuffer", i32, 1, 0), align 8
- %0 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @.str)
- store target("dx.RawBuffer", i32, 1, 0) %0, ptr @_ZL4dest, align 4
- %1 = tail call i32 @llvm.dx.thread.id(i32 0)
- %2 = load i32, ptr @_ZL4dest, align 4
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %agg.tmp.i1.sroa.0)
- store i32 %2, ptr %agg.tmp.i1.sroa.0, align 8
- %agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0. = load target("dx.RawBuffer", i32, 1, 0), ptr %agg.tmp.i1.sroa.0, align 8
- %3 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0., i32 %1)
- store i32 0, ptr %3, align 4
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %agg.tmp.i1.sroa.0)
+ %alloca = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+ %handle = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
+ store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4
+ %val = load i32, ptr @global, align 4
+ store i32 %val , ptr %alloca, align 8
+ %indirect = load target("dx.RawBuffer", i32, 1, 0), ptr %alloca, align 8
+ %buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0)
+ store i32 0, ptr %buff, align 4
ret void
}
More information about the llvm-commits
mailing list