[llvm] [DirectX] Eliminate resource global variables from module (PR #114105)
Greg Roth via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 10:29:17 PST 2024
https://github.com/pow2clk updated https://github.com/llvm/llvm-project/pull/114105
>From 3775e77b74bd6dbbc0e79ea499a38c67fa08ec3d Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Tue, 29 Oct 2024 11:39:28 -0600
Subject: [PATCH 1/5] [DirectX] Mark buffer load/store as mem read/write
By giving these intrinsics their appropriate attributes, loads of allocas that are stored on the other side of these calls can be eliminated.
Adds a test that verifies that the unneeded loads can be eliminated and also that the attributes are set properly.
Fixes #104271
This may be the first part of a broader audit of
---
llvm/include/llvm/IR/IntrinsicsDirectX.td | 6 +-
.../DirectX/ResourceGlobalElimination.ll | 85 +++++++++++++++++++
2 files changed, 88 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index e30d37f69f781e..88c17a3378cda5 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -28,12 +28,12 @@ def int_dx_handle_fromBinding
[IntrNoMem]>;
def int_dx_typedBufferLoad
- : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>;
+ : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
def int_dx_typedBufferLoad_checkbit
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
- [llvm_any_ty, llvm_i32_ty]>;
+ [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
def int_dx_typedBufferStore
- : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
+ : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty], [IntrWriteMem]>;
// Cast between target extension handle types and dxil-style opaque handles
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
diff --git a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
new file mode 100644
index 00000000000000..c7ba9377911008
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
@@ -0,0 +1,85 @@
+; RUN: opt -S -passes='early-cse<memssa>' %s | FileCheck %s
+
+; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxilv1.6-unknown-shadermodel6.6-compute"
+
+%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }
+
+ at In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
+ at Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
+
+; Function Attrs: convergent noinline norecurse
+; CHECK-LABEL define void @main()
+define void @main() local_unnamed_addr #0 {
+entry:
+ %tmp = alloca target("dx.TypedBuffer", <4 x float>, 1, 0, 0), align 4
+ %In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false)
+ store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4
+ %Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false)
+ store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4
+ ; CHECK: call i32 @llvm.dx.flattened.thread.id.in.group()
+ %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+ ; CHECK-NOT: load {{.*}} ptr @In
+ %1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
+ ; CHECK call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
+ %2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0)
+ ; CHECK-NOT: load {{.*}} ptr @In
+ %3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
+ %4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0)
+ %add.i = fadd <4 x float> %2, %4
+ store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr %tmp, align 4
+ call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i)
+ ret void
+}
+
+; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.flattened.thread.id.in.group() #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
+; CHECK: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
+declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
+; CHECK: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
+declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32, i32, i32, i32, i1) #3
+
+; CHECK: attributes [[ROAttr]] = { {{.*}} memory(read) }
+; CHECK: attributes [[WOAttr]] = { {{.*}} memory(write) }
+
+attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0, !1}
+!dx.valver = !{!2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{i32 1, i32 8}
+!3 = !{!"clang version 20.0.0git (git at github.com:llvm/llvm-project.git 54dc966bd3d375d7c1604fac5fdac20989c1072a)"}
+!4 = !{!5}
+!5 = distinct !{!5, !6, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
+!6 = distinct !{!6, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
+!7 = !{!8, !9, i64 0}
+!8 = !{!"_ZTSN4hlsl8RWBufferIDv4_fEE", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C++ TBAA"}
+!11 = !{!12}
+!12 = distinct !{!12, !13, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
+!13 = distinct !{!13, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
+!14 = !{!15}
+!15 = distinct !{!15, !16, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
+!16 = distinct !{!16, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
+!17 = !{!18, !9, i64 0}
+!18 = !{!"_ZTSN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EE", !9, i64 0, !19, i64 4}
+!19 = !{!"int", !9, i64 0}
+!20 = !{!21}
+!21 = distinct !{!21, !22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_: %agg.result"}
+!22 = distinct !{!22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_"}
>From d013656cfcc9ababfe0707a9cf7157ebe1e3d191 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Wed, 30 Oct 2024 17:54:18 -0600
Subject: [PATCH 2/5] [DirectX] Eliminate resource global variables from module
By giving these intrinsics their appropriate attributes, loads of globals that are stored on the other side of these calls can be eliminated by the EarlyCSE pass. Stores to the same globals and the globals themselves require more direct intervention as part of the handleFromBinding lowering.
Adds a test that verifies that the unneeded globals and their uses can be eliminated and also that the attributes are set properly.
Fixes #104271
---
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 20 ++++++++++++++
.../DirectX/ResourceGlobalElimination.ll | 26 ++++++++++++-------
2 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 8acc9c1efa08c0..34b2253a529828 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -204,6 +204,22 @@ class OpLowerer {
CleanupCasts.clear();
}
+ // Remove the resource global associated with the handleFromBinding call instruction
+ // and their uses as they aren't needed anymore.
+ void removeResourceGlobals(CallInst *CI) {
+ for (User *User : make_early_inc_range(CI->users())) {
+ if(StoreInst *Store = dyn_cast<StoreInst>(User)) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Store->getOperand(1))) {
+ Store->eraseFromParent();
+ assert(GV->use_empty() && "Buffer global still has users");
+ GV->removeDeadConstantUsers();
+ GV->eraseFromParent();
+ }
+ }
+ }
+ }
+
+
[[nodiscard]] bool lowerToCreateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
@@ -228,6 +244,8 @@ class OpLowerer {
Value *Cast = createTmpHandleCast(*OpCall, CI->getType());
+ removeResourceGlobals(CI);
+
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
return Error::success();
@@ -272,6 +290,8 @@ class OpLowerer {
Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType());
+ removeResourceGlobals(CI);
+
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
diff --git a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
index c7ba9377911008..1d1c632244b3ba 100644
--- a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
@@ -1,12 +1,19 @@
-; RUN: opt -S -passes='early-cse<memssa>' %s | FileCheck %s
+; RUN: opt -S -passes='early-cse<memssa>' %s -o %t
+; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t
+; finish compiling to verify that dxil-op-lower removes the globals entirely
+; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
+; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad.
+; Also that DXILOpLowering eliminates the globals entirely.
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxilv1.6-unknown-shadermodel6.6-compute"
%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }
+; LLC-NOT: @In = global
+; LLC-NOT: @Out = global
@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
@@ -14,23 +21,24 @@ target triple = "dxilv1.6-unknown-shadermodel6.6-compute"
; CHECK-LABEL define void @main()
define void @main() local_unnamed_addr #0 {
entry:
- %tmp = alloca target("dx.TypedBuffer", <4 x float>, 1, 0, 0), align 4
+ ; LLC: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle
+ ; LLC: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle
%In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false)
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4
%Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false)
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4
- ; CHECK: call i32 @llvm.dx.flattened.thread.id.in.group()
+ ; CSE: call i32 @llvm.dx.flattened.thread.id.in.group()
%0 = call i32 @llvm.dx.flattened.thread.id.in.group()
; CHECK-NOT: load {{.*}} ptr @In
%1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
- ; CHECK call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
+ ; CSE: call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
%2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0)
; CHECK-NOT: load {{.*}} ptr @In
%3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
%4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0)
%add.i = fadd <4 x float> %2, %4
- store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr %tmp, align 4
call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i)
+ ; CHECK: ret void
ret void
}
@@ -38,18 +46,18 @@ entry:
declare i32 @llvm.dx.flattened.thread.id.in.group() #1
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
-; CHECK: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
+; CSE: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) #2
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
-; CHECK: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
+; CSE: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) #2
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32, i32, i32, i32, i1) #3
-; CHECK: attributes [[ROAttr]] = { {{.*}} memory(read) }
-; CHECK: attributes [[WOAttr]] = { {{.*}} memory(write) }
+; CSE: attributes [[ROAttr]] = { {{.*}} memory(read) }
+; CSE: attributes [[WOAttr]] = { {{.*}} memory(write) }
attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) }
>From 127d5821c0f309107e74534a683d7c505b9a8111 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Thu, 31 Oct 2024 11:44:14 -0600
Subject: [PATCH 3/5] Make GV removal more robust add opt dxil-op-lower test
---
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 13 +++++++------
.../CodeGen/DirectX/ResourceGlobalElimination.ll | 1 +
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 34b2253a529828..6044e305c28fd6 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -209,12 +209,13 @@ class OpLowerer {
void removeResourceGlobals(CallInst *CI) {
for (User *User : make_early_inc_range(CI->users())) {
if(StoreInst *Store = dyn_cast<StoreInst>(User)) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Store->getOperand(1))) {
- Store->eraseFromParent();
- assert(GV->use_empty() && "Buffer global still has users");
- GV->removeDeadConstantUsers();
- GV->eraseFromParent();
- }
+ Value *V = Store->getOperand(1);
+ Store->eraseFromParent();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if(GV->use_empty()) {
+ GV->removeDeadConstantUsers();
+ GV->eraseFromParent();
+ }
}
}
}
diff --git a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
index 1d1c632244b3ba..ac7be66233b438 100644
--- a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
@@ -1,6 +1,7 @@
; RUN: opt -S -passes='early-cse<memssa>' %s -o %t
; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t
; finish compiling to verify that dxil-op-lower removes the globals entirely
+; RUN: opt -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=LLC,CHECK %s
; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
>From 5cc11c73aed082acdab5ddf67e9cfbee04030211 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Thu, 31 Oct 2024 11:59:56 -0600
Subject: [PATCH 4/5] clang-format
---
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 6044e305c28fd6..60686793b231ef 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -204,23 +204,22 @@ class OpLowerer {
CleanupCasts.clear();
}
- // Remove the resource global associated with the handleFromBinding call instruction
- // and their uses as they aren't needed anymore.
+ // Remove the resource global associated with the handleFromBinding call
+ // instruction and their uses as they aren't needed anymore.
void removeResourceGlobals(CallInst *CI) {
for (User *User : make_early_inc_range(CI->users())) {
- if(StoreInst *Store = dyn_cast<StoreInst>(User)) {
- Value *V = Store->getOperand(1);
- Store->eraseFromParent();
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- if(GV->use_empty()) {
- GV->removeDeadConstantUsers();
- GV->eraseFromParent();
- }
+ if (StoreInst *Store = dyn_cast<StoreInst>(User)) {
+ Value *V = Store->getOperand(1);
+ Store->eraseFromParent();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (GV->use_empty()) {
+ GV->removeDeadConstantUsers();
+ GV->eraseFromParent();
+ }
}
}
}
-
[[nodiscard]] bool lowerToCreateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
>From 2a18dca89b90d1959cbdbc25e25669b5e4052b6a Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Mon, 4 Nov 2024 11:27:44 -0700
Subject: [PATCH 5/5] Respond to sundry feedback
Format IntrinsicsDirectX.td
Add todo comment about future work
clean up test to remove unneeded elements and better name check prefixes
---
llvm/include/llvm/IR/IntrinsicsDirectX.td | 6 +-
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 3 +
.../DirectX/ResourceGlobalElimination.ll | 68 +++----------------
3 files changed, 16 insertions(+), 61 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 88c17a3378cda5..919542e56ad4a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -28,12 +28,14 @@ def int_dx_handle_fromBinding
[IntrNoMem]>;
def int_dx_typedBufferLoad
- : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
+ : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty],
+ [IntrReadMem]>;
def int_dx_typedBufferLoad_checkbit
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
[llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
def int_dx_typedBufferStore
- : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty], [IntrWriteMem]>;
+ : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty],
+ [IntrWriteMem]>;
// Cast between target extension handle types and dxil-style opaque handles
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 60686793b231ef..e15ffe36fd76fd 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -206,6 +206,9 @@ class OpLowerer {
// Remove the resource global associated with the handleFromBinding call
// instruction and their uses as they aren't needed anymore.
+ // TODO: We should verify that all the globals get removed.
+ // It's expected we'll need a custom pass in the future that will eliminate
+ // the need for this here.
void removeResourceGlobals(CallInst *CI) {
for (User *User : make_early_inc_range(CI->users())) {
if (StoreInst *Store = dyn_cast<StoreInst>(User)) {
diff --git a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
index ac7be66233b438..3fd46c355fcabb 100644
--- a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
@@ -1,29 +1,26 @@
; RUN: opt -S -passes='early-cse<memssa>' %s -o %t
; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t
-; finish compiling to verify that dxil-op-lower removes the globals entirely
-; RUN: opt -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=LLC,CHECK %s
-; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
-; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=LLC,CHECK %s
+; Finish compiling to verify that dxil-op-lower removes the globals entirely.
+; RUN: opt -mtriple=dxil-pc-shadermodel6.0-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s
+; RUN: opt -mtriple=dxil-pc-shadermodel6.6-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s
+; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s
+; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s
; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad.
; Also that DXILOpLowering eliminates the globals entirely.
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxilv1.6-unknown-shadermodel6.6-compute"
-
%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }
-; LLC-NOT: @In = global
-; LLC-NOT: @Out = global
+; DXOP-NOT: @In = global
+; DXOP-NOT: @Out = global
@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
-; Function Attrs: convergent noinline norecurse
; CHECK-LABEL define void @main()
define void @main() local_unnamed_addr #0 {
entry:
- ; LLC: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle
- ; LLC: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle
+ ; DXOP: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle
+ ; DXOP: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle
%In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false)
store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4
%Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false)
@@ -43,52 +40,5 @@ entry:
ret void
}
-; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-declare i32 @llvm.dx.flattened.thread.id.in.group() #1
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
-; CSE: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
-declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) #2
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
-; CSE: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
-declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) #2
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32, i32, i32, i32, i1) #3
-
-; CSE: attributes [[ROAttr]] = { {{.*}} memory(read) }
-; CSE: attributes [[WOAttr]] = { {{.*}} memory(write) }
-
attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(none) }
-attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
-attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
-
-!llvm.module.flags = !{!0, !1}
-!dx.valver = !{!2}
-!llvm.ident = !{!3}
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"frame-pointer", i32 2}
-!2 = !{i32 1, i32 8}
-!3 = !{!"clang version 20.0.0git (git at github.com:llvm/llvm-project.git 54dc966bd3d375d7c1604fac5fdac20989c1072a)"}
-!4 = !{!5}
-!5 = distinct !{!5, !6, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
-!6 = distinct !{!6, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
-!7 = !{!8, !9, i64 0}
-!8 = !{!"_ZTSN4hlsl8RWBufferIDv4_fEE", !9, i64 0}
-!9 = !{!"omnipotent char", !10, i64 0}
-!10 = !{!"Simple C++ TBAA"}
-!11 = !{!12}
-!12 = distinct !{!12, !13, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
-!13 = distinct !{!13, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
-!14 = !{!15}
-!15 = distinct !{!15, !16, !"_ZN4hlsl8RWBufferIDv4_fEixEi: %agg.result"}
-!16 = distinct !{!16, !"_ZN4hlsl8RWBufferIDv4_fEixEi"}
-!17 = !{!18, !9, i64 0}
-!18 = !{!"_ZTSN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EE", !9, i64 0, !19, i64 4}
-!19 = !{!"int", !9, i64 0}
-!20 = !{!21}
-!21 = distinct !{!21, !22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_: %agg.result"}
-!22 = distinct !{!22, !"_ZN4hlsl8__detail18TypedResourceProxyIU9_Res_u_CTDv4_fu17__hlsl_resource_tS2_EaSES2_"}
More information about the llvm-commits
mailing list