[llvm] [DirectX] Remove lifetime intrinsics and run Dead Store Elimination (PR #152636)

Farzon Lotfi via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 8 08:06:19 PDT 2025


https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/152636

>From 55c336d0e9b38a47810beece00426f0cd07edf39 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 7 Aug 2025 23:52:47 -0400
Subject: [PATCH 1/2] [DirectX] remove lifetime intrinsics and run Dead Store
 Elimination

fixes #151764

This fix has two parts first we track all lifetime intrinsics
and if they are users of an alloca of a target extention like
dx.RawBuffer then we eliminate those memory intrinsics when we visit the
alloca.

We do step one to allow us to use the Dead Store Elimination Pass.
This removes the alloca and simplifies the use of the target extention
back to using just the global. That keeps things in a form the DXILBitcodeWriter
is expecting.

Obviously to pull this off we needed to bring back the legacy pass
manager plumbing for the DSE pass and hook it up into the DirectX
backend.
---
 llvm/include/llvm/InitializePasses.h          |  1 +
 llvm/include/llvm/LinkAllPasses.h             |  1 +
 llvm/include/llvm/Transforms/Scalar.h         |  7 ++
 .../DirectX/DXILForwardHandleAccesses.cpp     | 27 ++++++-
 .../Target/DirectX/DirectXTargetMachine.cpp   |  3 +
 .../Scalar/DeadStoreElimination.cpp           | 78 +++++++++++++++++++
 llvm/lib/Transforms/Scalar/Scalar.cpp         |  1 +
 ..._alloca.ll => forward_handle_on_alloca.ll} | 14 ++--
 llvm/test/CodeGen/DirectX/llc-pipeline.ll     |  9 ++-
 9 files changed, 133 insertions(+), 8 deletions(-)
 rename llvm/test/CodeGen/DirectX/{issue-140819_allow_forward_handle_on_alloca.ll => forward_handle_on_alloca.ll} (72%)

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 31801daa126ad..e55f94b9022ee 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -85,6 +85,7 @@ LLVM_ABI void initializeCycleInfoWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeDAEPass(PassRegistry &);
 LLVM_ABI void initializeDAHPass(PassRegistry &);
 LLVM_ABI void initializeDCELegacyPassPass(PassRegistry &);
+LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &);
 LLVM_ABI void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &);
 LLVM_ABI void initializeDXILResourceBindingWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index f82a43967e67a..ea56c83a3b709 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -77,6 +77,7 @@ struct ForcePassLinking {
     (void)llvm::createDXILResourceTypeWrapperPassPass();
     (void)llvm::createDeadArgEliminationPass();
     (void)llvm::createDeadCodeEliminationPass();
+    (void)llvm::createDeadStoreEliminationPass();
     (void)llvm::createDependenceAnalysisWrapperPass();
     (void)llvm::createDomOnlyPrinterWrapperPassPass();
     (void)llvm::createDomPrinterWrapperPassPass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 1398f171b0f78..8e68b6a57e51f 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -33,6 +33,13 @@ class Pass;
 //
 LLVM_ABI FunctionPass *createDeadCodeEliminationPass();
 
+//===----------------------------------------------------------------------===//
+//
+// DeadStoreElimination - This pass deletes stores that are post-dominated by
+// must-aliased stores and are not loaded used between the stores.
+//
+LLVM_ABI FunctionPass *createDeadStoreEliminationPass();
+
 //===----------------------------------------------------------------------===//
 //
 // SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
diff --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
index 306db6a558779..ee0a18d231904 100644
--- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
+++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
@@ -9,10 +9,13 @@
 #include "DXILForwardHandleAccesses.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/DXILResource.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsDirectX.h"
@@ -70,6 +73,7 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
 
   DenseMap<GlobalVariable *, IntrinsicInst *> HandleMap;
   SmallVector<LoadInst *> LoadsToProcess;
+  DenseMap<AllocaInst *, SmallVector<IntrinsicInst *>> LifeTimeIntrinsicMap;
   for (BasicBlock &BB : F)
     for (Instruction &Inst : BB)
       if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -78,6 +82,17 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
         case Intrinsic::dx_resource_handlefromimplicitbinding:
           processHandle(II, HandleMap);
           break;
+        // Note: Lifetime intrinsics do not show up as users of an Alloca.
+        // As a result we walk the whole function to find the lifetimes and
+        // store them so that we may delete the alloca matches
+        case Intrinsic::lifetime_start:
+        case Intrinsic::lifetime_end:
+          if (II->arg_size() >= 2) {
+            Value *Ptr = II->getArgOperand(1);
+            if (auto *Alloca = dyn_cast<AllocaInst>(Ptr->stripPointerCasts()))
+              LifeTimeIntrinsicMap[Alloca].push_back(II);
+          }
+          break;
         default:
           continue;
         }
@@ -98,8 +113,16 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
             NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr);
         GV = dyn_cast_or_null<GlobalVariable>(Loaded);
       } else if (auto *NestedAlloca = dyn_cast<AllocaInst>(V)) {
-        for (auto &Use : NestedAlloca->uses()) {
-          auto *Store = dyn_cast<StoreInst>(Use.getUser());
+
+        if (auto It = LifeTimeIntrinsicMap.find(NestedAlloca);
+            It != LifeTimeIntrinsicMap.end()) {
+          llvm::for_each(It->second,
+                         [](IntrinsicInst *II) { II->eraseFromParent(); });
+          LifeTimeIntrinsicMap.erase(It);
+        }
+
+        for (auto *User : NestedAlloca->users()) {
+          auto *Store = dyn_cast<StoreInst>(User);
           if (!Store)
             continue;
 
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 84751d2db2266..4ca22479f3194 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/Scalarizer.h"
 #include <optional>
 
@@ -72,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
   initializeDXILFinalizeLinkageLegacyPass(*PR);
   initializeDXILPrettyPrinterLegacyPass(*PR);
   initializeDXILForwardHandleAccessesLegacyPass(*PR);
+  initializeDSELegacyPassPass(*PR);
   initializeDXILCBufferAccessLegacyPass(*PR);
 }
 
@@ -112,6 +114,7 @@ class DirectXPassConfig : public TargetPassConfig {
     addPass(createScalarizerPass(DxilScalarOptions));
     addPass(createDXILFlattenArraysLegacyPass());
     addPass(createDXILForwardHandleAccessesLegacyPass());
+    addPass(createDeadStoreEliminationPass());
     addPass(createDXILLegalizeLegacyPass());
     addPass(createDXILResourceImplicitBindingLegacyPass());
     addPass(createDXILTranslateMetadataLegacyPass());
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 9b8718035df62..17725cc8c3b96 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -38,6 +38,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -69,6 +70,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -2666,3 +2668,79 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
   PA.preserve<LoopAnalysis>();
   return PA;
 }
+
+namespace {
+
+/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
+class DSELegacyPass : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  DSELegacyPass() : FunctionPass(ID) {
+    initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+
+    AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+    DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    const TargetLibraryInfo &TLI =
+        getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+    MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+    PostDominatorTree &PDT =
+        getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+    bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+
+#ifdef LLVM_ENABLE_STATS
+    if (AreStatisticsEnabled())
+      for (auto &I : instructions(F))
+        NumRemainingStores += isa<StoreInst>(&I);
+#endif
+
+    return Changed;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addPreserved<PostDominatorTreeWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
+  }
+};
+
+} // end anonymous namespace
+
+char DSELegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
+                    false)
+
+namespace llvm {
+LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
+  return new DSELegacyPass();
+}
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index c7e4a3e824700..032a3a7792824 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -37,6 +37,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeMergeICmpsLegacyPassPass(Registry);
   initializeNaryReassociateLegacyPassPass(Registry);
   initializePartiallyInlineLibCallsLegacyPassPass(Registry);
+  initializeDSELegacyPassPass(Registry);
   initializeReassociateLegacyPassPass(Registry);
   initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
   initializeSROALegacyPassPass(Registry);
diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
similarity index 72%
rename from llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
rename to llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
index 7c0813b0b4e36..c628aafbd8d39 100644
--- a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
+++ b/llvm/test/CodeGen/DirectX/forward_handle_on_alloca.ll
@@ -1,5 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -dxil-forward-handle-accesses  %s | FileCheck %s
+; RUN: opt -S -dxil-forward-handle-accesses  %s | FileCheck %s  --check-prefixes=CHECK,FHCHECK
+; RUN: opt -S -mtriple=dxil--shadermodel6.3-compute -passes='function(dxil-forward-handle-accesses),dse' %s | FileCheck %s --check-prefix=CHECK
+
+; Note: test to confirm fix for issues: 140819 & 151764
 
 %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
 @global = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
@@ -11,11 +13,11 @@
 define void @CSMain() local_unnamed_addr {
 ; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+; FHCHECK-NEXT:    [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
 ; CHECK-NEXT:    store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @global, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @global, align 4
-; CHECK-NEXT:    store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
+; FHCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @global, align 4
+; FHCHECK-NEXT:    store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 0)
 ; CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    ret void
@@ -25,9 +27,11 @@ entry:
   %handle  = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @name)
   store target("dx.RawBuffer", i32, 1, 0) %handle , ptr @global, align 4
   %val  = load i32, ptr @global, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %alloca)
   store i32 %val , ptr  %alloca, align 8
   %indirect = load target("dx.RawBuffer", i32, 1, 0), ptr  %alloca, align 8
   %buff = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %indirect, i32 0)
   store i32 0, ptr %buff, align 4
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %alloca)
   ret void
 }
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 151603a7161c5..360a6f6959e9f 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -7,7 +7,7 @@
 ; CHECK-NEXT: Target Library Information
 ; CHECK-NEXT: DXIL Resource Type Analysis
 ; CHECK-NEXT: Target Transform Information
-
+; CHECK-NEXT: Assumption Cache Tracker
 ; CHECK-OBJ-NEXT: Machine Module Information
 ; CHECK-OBJ-NEXT: Machine Branch Probability Analysis
 ; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata
@@ -26,6 +26,13 @@
 ; CHECK-NEXT:   FunctionPass Manager
 ; CHECK-NEXT:     Dominator Tree Construction
 ; CHECK-NEXT:     DXIL Forward Handle Accesses
+; CHECK-NEXT:     Dominator Tree Construction
+; CHECK-NEXT:     Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:     Function Alias Analysis Results
+; CHECK-NEXT:     Post-Dominator Tree Construction
+; CHECK-NEXT:     Memory SSA
+; CHECK-NEXT:     Natural Loop Information
+; CHECK-NEXT:     Dead Store Elimination
 ; CHECK-NEXT:     DXIL Legalizer
 ; CHECK-NEXT:   DXIL Resource Binding Analysis
 ; CHECK-NEXT:   DXIL Resource Implicit Binding

>From c4ec5018766e6ba05924e4e92140b9faa00d7ca7 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 8 Aug 2025 11:06:00 -0400
Subject: [PATCH 2/2] DSE removes the unusef gep\store, fix up the test to
 account

---
 llvm/test/tools/dxil-dis/lifetimes.ll | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/test/tools/dxil-dis/lifetimes.ll b/llvm/test/tools/dxil-dis/lifetimes.ll
index cb3e6291c7bc0..f200aaa02ee56 100644
--- a/llvm/test/tools/dxil-dis/lifetimes.ll
+++ b/llvm/test/tools/dxil-dis/lifetimes.ll
@@ -4,10 +4,8 @@ target triple = "dxil-unknown-shadermodel6.7-library"
 define void @test_lifetimes()  {
 ; CHECK-LABEL: test_lifetimes
 ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 4
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i32], [2 x i32]* [[ALLOCA]], i32 0, i32 0
 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8*
 ; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* nonnull [[BITCAST]])
-; CHECK-NEXT: store i32 0, i32* [[GEP]], align 4
 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8*
 ; CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* nonnull [[BITCAST]])
 ; CHECK-NEXT: ret void



More information about the llvm-commits mailing list