[llvm] 544562e - [DirectX] Remove lifetime intrinsics and run Dead Store Elimination (#152636)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 12 09:42:11 PDT 2025


Author: Farzon Lotfi
Date: 2025-08-12T12:42:08-04:00
New Revision: 544562ebc2bc6f38cf21a2a82a84dcb4a14b06b9

URL: https://github.com/llvm/llvm-project/commit/544562ebc2bc6f38cf21a2a82a84dcb4a14b06b9
DIFF: https://github.com/llvm/llvm-project/commit/544562ebc2bc6f38cf21a2a82a84dcb4a14b06b9.diff

LOG: [DirectX] Remove lifetime intrinsics and run Dead Store Elimination (#152636)

fixes #151764

This fix has two parts first we track all lifetime intrinsics and if
they are users of an alloca of a target extention like dx.RawBuffer then
we eliminate those memory intrinsics when we visit the alloca.

We do step one to allow us to use the Dead Store Elimination Pass. This
removes the alloca and simplifies the use of the target extention back
to using just the global. That keeps things in a form the
DXILBitcodeWriter is expecting.

Obviously to pull this off we needed to bring back the legacy pass
manager plumbing for the DSE pass and hook it up into the DirectX
backend.

The net impact of this change is that DML shader pass rate went from
89.72% (4268 successful compilations) to 90.98% (4328 successful
compilations).

Added: 
    llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll

Modified: 
    llvm/include/llvm/InitializePasses.h
    llvm/include/llvm/LinkAllPasses.h
    llvm/include/llvm/Transforms/Scalar.h
    llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
    llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
    llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/lib/Transforms/Scalar/Scalar.cpp
    llvm/test/CodeGen/DirectX/llc-pipeline.ll
    llvm/test/tools/dxil-dis/lifetimes.ll

Removed: 
    llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll


################################################################################
diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 31801daa126ad..e55f94b9022ee 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -85,6 +85,7 @@ LLVM_ABI void initializeCycleInfoWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeDAEPass(PassRegistry &);
 LLVM_ABI void initializeDAHPass(PassRegistry &);
 LLVM_ABI void initializeDCELegacyPassPass(PassRegistry &);
+LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &);
 LLVM_ABI void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &);
 LLVM_ABI void initializeDXILResourceBindingWrapperPassPass(PassRegistry &);

diff  --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index f82a43967e67a..ea56c83a3b709 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -77,6 +77,7 @@ struct ForcePassLinking {
     (void)llvm::createDXILResourceTypeWrapperPassPass();
     (void)llvm::createDeadArgEliminationPass();
     (void)llvm::createDeadCodeEliminationPass();
+    (void)llvm::createDeadStoreEliminationPass();
     (void)llvm::createDependenceAnalysisWrapperPass();
     (void)llvm::createDomOnlyPrinterWrapperPassPass();
     (void)llvm::createDomPrinterWrapperPassPass();

diff  --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 1398f171b0f78..8e68b6a57e51f 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -33,6 +33,13 @@ class Pass;
 //
 LLVM_ABI FunctionPass *createDeadCodeEliminationPass();
 
+//===----------------------------------------------------------------------===//
+//
+// DeadStoreElimination - This pass deletes stores that are post-dominated by
+// must-aliased stores and are not loaded used between the stores.
+//
+LLVM_ABI FunctionPass *createDeadStoreEliminationPass();
+
 //===----------------------------------------------------------------------===//
 //
 // SROA - Replace aggregates or pieces of aggregates with scalar SSA values.

diff  --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
index 306db6a558779..695eacbb2fadc 100644
--- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
+++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
@@ -9,10 +9,13 @@
 #include "DXILForwardHandleAccesses.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/DXILResource.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsDirectX.h"
@@ -70,6 +73,7 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
 
   DenseMap<GlobalVariable *, IntrinsicInst *> HandleMap;
   SmallVector<LoadInst *> LoadsToProcess;
+  DenseMap<AllocaInst *, SmallVector<IntrinsicInst *>> LifeTimeIntrinsicMap;
   for (BasicBlock &BB : F)
     for (Instruction &Inst : BB)
       if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -78,6 +82,14 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
         case Intrinsic::dx_resource_handlefromimplicitbinding:
           processHandle(II, HandleMap);
           break;
+        case Intrinsic::lifetime_start:
+        case Intrinsic::lifetime_end:
+          if (II->arg_size() >= 1) {
+            Value *Ptr = II->getArgOperand(0);
+            if (auto *Alloca = dyn_cast<AllocaInst>(Ptr))
+              LifeTimeIntrinsicMap[Alloca].push_back(II);
+          }
+          break;
         default:
           continue;
         }
@@ -98,8 +110,16 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
             NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr);
         GV = dyn_cast_or_null<GlobalVariable>(Loaded);
       } else if (auto *NestedAlloca = dyn_cast<AllocaInst>(V)) {
-        for (auto &Use : NestedAlloca->uses()) {
-          auto *Store = dyn_cast<StoreInst>(Use.getUser());
+
+        if (auto It = LifeTimeIntrinsicMap.find(NestedAlloca);
+            It != LifeTimeIntrinsicMap.end()) {
+          llvm::for_each(It->second,
+                         [](IntrinsicInst *II) { II->eraseFromParent(); });
+          LifeTimeIntrinsicMap.erase(It);
+        }
+
+        for (auto *User : NestedAlloca->users()) {
+          auto *Store = dyn_cast<StoreInst>(User);
           if (!Store)
             continue;
 

diff  --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 84751d2db2266..4ca22479f3194 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/Scalarizer.h"
 #include <optional>
 
@@ -72,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
   initializeDXILFinalizeLinkageLegacyPass(*PR);
   initializeDXILPrettyPrinterLegacyPass(*PR);
   initializeDXILForwardHandleAccessesLegacyPass(*PR);
+  initializeDSELegacyPassPass(*PR);
   initializeDXILCBufferAccessLegacyPass(*PR);
 }
 
@@ -112,6 +114,7 @@ class DirectXPassConfig : public TargetPassConfig {
     addPass(createScalarizerPass(DxilScalarOptions));
     addPass(createDXILFlattenArraysLegacyPass());
     addPass(createDXILForwardHandleAccessesLegacyPass());
+    addPass(createDeadStoreEliminationPass());
     addPass(createDXILLegalizeLegacyPass());
     addPass(createDXILResourceImplicitBindingLegacyPass());
     addPass(createDXILTranslateMetadataLegacyPass());

diff  --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f46d54b463171..8093e44245d20 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -38,6 +38,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -69,6 +70,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -2666,3 +2668,79 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
   PA.preserve<LoopAnalysis>();
   return PA;
 }
+
+namespace {
+
+/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
+class DSELegacyPass : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  DSELegacyPass() : FunctionPass(ID) {
+    initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+
+    AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+    DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    const TargetLibraryInfo &TLI =
+        getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+    MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+    PostDominatorTree &PDT =
+        getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+    bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+
+#ifdef LLVM_ENABLE_STATS
+    if (AreStatisticsEnabled())
+      for (auto &I : instructions(F))
+        NumRemainingStores += isa<StoreInst>(&I);
+#endif
+
+    return Changed;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addPreserved<PostDominatorTreeWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
+  }
+};
+
+} // end anonymous namespace
+
+char DSELegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
+                    false)
+
+namespace llvm {
+LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
+  return new DSELegacyPass();
+}
+} // namespace llvm

diff  --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index c7e4a3e824700..032a3a7792824 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -37,6 +37,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeMergeICmpsLegacyPassPass(Registry);
   initializeNaryReassociateLegacyPassPass(Registry);
   initializePartiallyInlineLibCallsLegacyPassPass(Registry);
+  initializeDSELegacyPassPass(Registry);
   initializeReassociateLegacyPassPass(Registry);
   initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
   initializeSROALegacyPassPass(Registry);

diff  --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
similarity index 73%
rename from llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
rename to llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
index 7c0813b0b4e36..ce5c2d7ca32bf 100644
--- a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
+++ b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
@@ -1,5 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -dxil-forward-handle-accesses  %s | FileCheck %s
+; RUN: opt -S -dxil-forward-handle-accesses  %s | FileCheck %s  --check-prefixes=CHECK,FHCHECK
+; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK
+
+; Note: test to confirm fix for issues: 140819 & 151764
 
 %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
 @global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
@@ -11,11 +13,11 @@
 define void @CSMain() local_unnamed_addr {
 ; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+; FHCHECK-NEXT:    [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
 ; CHECK-NEXT:    store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @global, align 4
-; CHECK-NEXT:    store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
+; FHCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @global, align 4
+; FHCHECK-NEXT:    store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0)
 ; CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    ret void
@@ -25,9 +27,11 @@ entry:
   %handle  = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
   store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4
   %val  = load i32, ptr @global, align 4
+  call void @llvm.lifetime.start.p0(ptr nonnull %alloca)
   store i32 %val , ptr  %alloca, align 8
   %indirect = load target("dx.RawBuffer", i32, 1, 0), ptr  %alloca, align 8
   %buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0)
   store i32 0, ptr %buff, align 4
+  call void @llvm.lifetime.end.p0(ptr nonnull %alloca)
   ret void
 }

diff  --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 151603a7161c5..360a6f6959e9f 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -7,7 +7,7 @@
 ; CHECK-NEXT: Target Library Information
 ; CHECK-NEXT: DXIL Resource Type Analysis
 ; CHECK-NEXT: Target Transform Information
-
+; CHECK-NEXT: Assumption Cache Tracker
 ; CHECK-OBJ-NEXT: Machine Module Information
 ; CHECK-OBJ-NEXT: Machine Branch Probability Analysis
 ; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata
@@ -26,6 +26,13 @@
 ; CHECK-NEXT:   FunctionPass Manager
 ; CHECK-NEXT:     Dominator Tree Construction
 ; CHECK-NEXT:     DXIL Forward Handle Accesses
+; CHECK-NEXT:     Dominator Tree Construction
+; CHECK-NEXT:     Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:     Function Alias Analysis Results
+; CHECK-NEXT:     Post-Dominator Tree Construction
+; CHECK-NEXT:     Memory SSA
+; CHECK-NEXT:     Natural Loop Information
+; CHECK-NEXT:     Dead Store Elimination
 ; CHECK-NEXT:     DXIL Legalizer
 ; CHECK-NEXT:   DXIL Resource Binding Analysis
 ; CHECK-NEXT:   DXIL Resource Implicit Binding

diff  --git a/llvm/test/tools/dxil-dis/lifetimes.ll b/llvm/test/tools/dxil-dis/lifetimes.ll
index 3c1666f7381f4..af7a19a4d4f66 100644
--- a/llvm/test/tools/dxil-dis/lifetimes.ll
+++ b/llvm/test/tools/dxil-dis/lifetimes.ll
@@ -4,10 +4,8 @@ target triple = "dxil-unknown-shadermodel6.7-library"
 define void @test_lifetimes()  {
 ; CHECK-LABEL: test_lifetimes
 ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 4
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i32], [2 x i32]* [[ALLOCA]], i32 0, i32 0
 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8*
 ; CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* nonnull [[BITCAST]])
-; CHECK-NEXT: store i32 0, i32* [[GEP]], align 4
 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8*
 ; CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* nonnull [[BITCAST]])
 ; CHECK-NEXT: ret void


        


More information about the llvm-commits mailing list