[llvm] 47ef3a0 - [DirectX] Eliminate resource global variables from module (#114105)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 12 08:52:19 PST 2024


Author: Greg Roth
Date: 2024-11-12T09:52:14-07:00
New Revision: 47ef3a0951e1f285caef4aff289b12ed0a57137d

URL: https://github.com/llvm/llvm-project/commit/47ef3a0951e1f285caef4aff289b12ed0a57137d
DIFF: https://github.com/llvm/llvm-project/commit/47ef3a0951e1f285caef4aff289b12ed0a57137d.diff

LOG: [DirectX] Eliminate resource global variables from module (#114105)

By giving these intrinsics their appropriate attributes, loads of
globals that are stored on the other side of these calls can be
eliminated by the EarlyCSE pass. Stores to the same globals and the
globals themselves require more direct intervention as part of the
create/annotated handle lowering. 

Adds a test that verifies that the unneeded globals and their uses can
be eliminated and also that the attributes are set properly.

Fixes #104271

Added: 
    llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsDirectX.td
    llvm/lib/Target/DirectX/DXILOpLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 178cb8e814ddca..6093664c908dc5 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -28,12 +28,14 @@ def int_dx_handle_fromBinding
           [IntrNoMem]>;
 
 def int_dx_typedBufferLoad
-    : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>;
+    : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty],
+                            [IntrReadMem]>;
 def int_dx_typedBufferLoad_checkbit
     : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
-                            [llvm_any_ty, llvm_i32_ty]>;
+                            [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
 def int_dx_typedBufferStore
-    : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
+    : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty],
+                            [IntrWriteMem]>;
 
 def int_dx_updateCounter
     : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i8_ty]>;

diff  --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0dd3a8dc1ad4ce..02b441126cfd05 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -204,6 +204,25 @@ class OpLowerer {
     CleanupCasts.clear();
   }
 
+  // Remove the resource global associated with the handleFromBinding call
+  // instruction and their uses as they aren't needed anymore.
+  // TODO: We should verify that all the globals get removed.
+  // It's expected we'll need a custom pass in the future that will eliminate
+  // the need for this here.
+  void removeResourceGlobals(CallInst *CI) {
+    for (User *User : make_early_inc_range(CI->users())) {
+      if (StoreInst *Store = dyn_cast<StoreInst>(User)) {
+        Value *V = Store->getOperand(1);
+        Store->eraseFromParent();
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+          if (GV->use_empty()) {
+            GV->removeDeadConstantUsers();
+            GV->eraseFromParent();
+          }
+      }
+    }
+  }
+
   [[nodiscard]] bool lowerToCreateHandle(Function &F) {
     IRBuilder<> &IRB = OpBuilder.getIRB();
     Type *Int8Ty = IRB.getInt8Ty();
@@ -228,6 +247,8 @@ class OpLowerer {
 
       Value *Cast = createTmpHandleCast(*OpCall, CI->getType());
 
+      removeResourceGlobals(CI);
+
       CI->replaceAllUsesWith(Cast);
       CI->eraseFromParent();
       return Error::success();
@@ -272,6 +293,8 @@ class OpLowerer {
 
       Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType());
 
+      removeResourceGlobals(CI);
+
       CI->replaceAllUsesWith(Cast);
       CI->eraseFromParent();
 

diff  --git a/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
new file mode 100644
index 00000000000000..c0fbc3d9150bc7
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceGlobalElimination.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -passes='early-cse<memssa>' %s -o %t
+; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t
+; Finish compiling to verify that dxil-op-lower removes the globals entirely.
+; RUN: opt -mtriple=dxil-pc-shadermodel6.0-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s
+; RUN: opt -mtriple=dxil-pc-shadermodel6.6-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s
+; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s
+; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s
+
+; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad.
+; Also that DXILOpLowering eliminates the globals entirely.
+
+%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }
+
+; DXOP-NOT: @In = global
+; DXOP-NOT: @Out = global
+ at In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
+ at Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4
+
+; CHECK-LABEL define void @main()
+define void @main() local_unnamed_addr #0 {
+entry:
+  ; DXOP: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle
+  ; DXOP: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle
+  %In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false)
+  store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4
+  %Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false)
+  store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4
+  ; CSE: call i32 @llvm.dx.flattened.thread.id.in.group()
+  %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+  ; CHECK-NOT: load {{.*}} ptr @In
+  %1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
+  ; CSE: call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t
+  %2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0)
+  ; CHECK-NOT: load {{.*}} ptr @In
+  %3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4
+  %4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0)
+  %add.i = fadd <4 x float> %2, %4
+  call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i)
+  ; CHECK: ret void
+  ret void
+}
+
+; CSE-DAG: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]]
+; CSE-DAG: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]]
+
+attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+; Just need to split up the DAG searches.
+; CSE: attributes #0
+
+; CSE-DAG: attributes [[ROAttr]] = { {{.*}} memory(read) }
+; CSE-DAG: attributes [[WOAttr]] = { {{.*}} memory(write) }


        


More information about the llvm-commits mailing list