[llvm] Add Dead Block Elimination to NVVMReflect (PR #144171)
Yonah Goldberg via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 00:37:01 PDT 2025
https://github.com/YonahGoldberg updated https://github.com/llvm/llvm-project/pull/144171
>From e9c4b6b2a9f4b1a6627181554c9d2ce5578764a7 Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Fri, 13 Jun 2025 22:20:23 +0000
Subject: [PATCH 01/10] updates
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 113 +++++++++++++++---
.../test/CodeGen/NVPTX/nvvm-reflect-opaque.ll | 4 +-
llvm/test/CodeGen/NVPTX/nvvm-reflect.ll | 4 +-
3 files changed, 100 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 208bab52284a3..1c17852503660 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -39,6 +39,8 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
#define NVVM_REFLECT_OCL_FUNCTION "__nvvm_reflect_ocl"
// Argument of reflect call to retrive arch number
@@ -59,7 +61,10 @@ class NVVMReflect {
StringMap<unsigned> ReflectMap;
bool handleReflectFunction(Module &M, StringRef ReflectName);
void populateReflectMap(Module &M);
- void foldReflectCall(CallInst *Call, Constant *NewValue);
+ void replaceReflectCalls(
+ SmallVector<std::pair<CallInst *, Constant *>, 8> &ReflectReplacements,
+ const DataLayout &DL);
+ SetVector<BasicBlock *> findTransitivelyDeadBlocks(BasicBlock *DeadBB);
public:
// __CUDA_FTZ is assigned in `runOnModule` by checking nvvm-reflect-ftz module
@@ -138,6 +143,8 @@ bool NVVMReflect::handleReflectFunction(Module &M, StringRef ReflectName) {
assert(F->getReturnType()->isIntegerTy() &&
"_reflect's return type should be integer");
+ SmallVector<std::pair<CallInst *, Constant *>, 8> ReflectReplacements;
+
const bool Changed = !F->use_empty();
for (User *U : make_early_inc_range(F->users())) {
// Reflect function calls look like:
@@ -178,38 +185,110 @@ bool NVVMReflect::handleReflectFunction(Module &M, StringRef ReflectName) {
<< "(" << ReflectArg << ") with value " << ReflectVal
<< "\n");
auto *NewValue = ConstantInt::get(Call->getType(), ReflectVal);
- foldReflectCall(Call, NewValue);
- Call->eraseFromParent();
+ ReflectReplacements.push_back({Call, NewValue});
}
- // Remove the __nvvm_reflect function from the module
+ replaceReflectCalls(ReflectReplacements, M.getDataLayout());
F->eraseFromParent();
return Changed;
}
-void NVVMReflect::foldReflectCall(CallInst *Call, Constant *NewValue) {
+/// Find all blocks that become dead transitively from an initial dead block.
+/// Returns the complete set including the original dead block and any blocks
+/// that lose all their predecessors due to the deletion cascade.
+SetVector<BasicBlock *>
+NVVMReflect::findTransitivelyDeadBlocks(BasicBlock *DeadBB) {
+ SmallVector<BasicBlock *, 8> Worklist({DeadBB});
+ SetVector<BasicBlock *> DeadBlocks;
+ while (!Worklist.empty()) {
+ auto *BB = Worklist.pop_back_val();
+ DeadBlocks.insert(BB);
+
+ for (BasicBlock *Succ : successors(BB))
+ if (pred_size(Succ) == 1 && DeadBlocks.insert(Succ))
+ Worklist.push_back(Succ);
+ }
+ return DeadBlocks;
+}
+
+/// Replace calls to __nvvm_reflect with corresponding constant values. Then
+/// clean up through constant folding and propagation and dead block
+/// elimination.
+///
+/// The purpose of this cleanup is not optimization because that could be
+/// handled by later passes
+/// (i.e. SCCP, SimplifyCFG, etc.), but for correctness. Reflect calls are most
+/// commonly used to query the arch number and select a valid instruction for
+/// the arch. Therefore, you need to eliminate blocks that become dead because
+/// they may contain invalid instructions for the arch. The purpose of the
+/// cleanup is to do the minimal amount of work to leave the code in a valid
+/// state.
+void NVVMReflect::replaceReflectCalls(
+ SmallVector<std::pair<CallInst *, Constant *>, 8> &ReflectReplacements,
+ const DataLayout &DL) {
SmallVector<Instruction *, 8> Worklist;
- // Replace an instruction with a constant and add all users of the instruction
- // to the worklist
+ SetVector<BasicBlock *> DeadBlocks;
+
+ // Replace an instruction with a constant and add all users to the worklist,
+ // then delete the instruction
auto ReplaceInstructionWithConst = [&](Instruction *I, Constant *C) {
for (auto *U : I->users())
if (auto *UI = dyn_cast<Instruction>(U))
Worklist.push_back(UI);
I->replaceAllUsesWith(C);
+ I->eraseFromParent();
};
- ReplaceInstructionWithConst(Call, NewValue);
+ for (auto &[Call, NewValue] : ReflectReplacements)
+ ReplaceInstructionWithConst(Call, NewValue);
- auto &DL = Call->getModule()->getDataLayout();
- while (!Worklist.empty()) {
- auto *I = Worklist.pop_back_val();
- if (auto *C = ConstantFoldInstruction(I, DL)) {
- ReplaceInstructionWithConst(I, C);
- if (isInstructionTriviallyDead(I))
- I->eraseFromParent();
- } else if (I->isTerminator()) {
- ConstantFoldTerminator(I->getParent());
+ // Alternate between constant folding/propagation and dead block elimination.
+ // Terminator folding may create new dead blocks. When those dead blocks are
+ // deleted, their live successors may have PHIs that can be simplified, which
+ // may yield more work for folding/propagation.
+ while (true) {
+ // Iterate folding and propagating constants until the worklist is empty.
+ while (!Worklist.empty()) {
+ auto *I = Worklist.pop_back_val();
+ if (auto *C = ConstantFoldInstruction(I, DL)) {
+ ReplaceInstructionWithConst(I, C);
+ } else if (I->isTerminator()) {
+ BasicBlock *BB = I->getParent();
+ SmallVector<BasicBlock *, 8> Succs(successors(BB));
+ // Some blocks may become dead if the terminator is folded because
+ // a conditional branch is turned into a direct branch.
+ if (ConstantFoldTerminator(BB)) {
+ for (BasicBlock *Succ : Succs) {
+ if (pred_empty(Succ) &&
+ Succ != &Succ->getParent()->getEntryBlock()) {
+ SetVector<BasicBlock *> TransitivelyDead =
+ findTransitivelyDeadBlocks(Succ);
+ DeadBlocks.insert(TransitivelyDead.begin(),
+ TransitivelyDead.end());
+ }
+ }
+ }
+ }
}
+ // No more constants to fold and no more dead blocks
+ // to create more work. We're done.
+ if (DeadBlocks.empty())
+ break;
+ // PHI nodes of live successors of dead blocks get eliminated when the dead
+ // blocks are eliminated. Their users can now be simplified further, so add
+ // them to the worklist.
+ for (BasicBlock *DeadBB : DeadBlocks)
+ for (BasicBlock *Succ : successors(DeadBB))
+ if (!DeadBlocks.contains(Succ))
+ for (PHINode &PHI : Succ->phis())
+ for (auto *U : PHI.users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ Worklist.push_back(UI);
+ // Delete all dead blocks in order
+ for (BasicBlock *DeadBB : DeadBlocks)
+ DeleteDeadBlock(DeadBB);
+
+ DeadBlocks.clear();
}
}
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
index 19c74df303702..7bb1af707001a 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
@@ -3,12 +3,12 @@
; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
-; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
+; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
-; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
+; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
index 244b44fea9b83..581dbf353c1ff 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -3,12 +3,12 @@
; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
-; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
+; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
-; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
+; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
>From d5e1cfc8bfaaf736493743d17af464a743c34cb2 Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Fri, 13 Jun 2025 22:20:31 +0000
Subject: [PATCH 02/10] format
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 1c17852503660..5b24864ab586f 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -19,6 +19,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -39,8 +41,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
#define NVVM_REFLECT_OCL_FUNCTION "__nvvm_reflect_ocl"
// Argument of reflect call to retrive arch number
>From 2c951c83fd46a0d046dd72fa397de678d0c834bd Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Fri, 13 Jun 2025 22:23:10 +0000
Subject: [PATCH 03/10] format
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 5b24864ab586f..b0f69598972ce 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -259,12 +259,9 @@ void NVVMReflect::replaceReflectCalls(
// a conditional branch is turned into a direct branch.
if (ConstantFoldTerminator(BB)) {
for (BasicBlock *Succ : Succs) {
- if (pred_empty(Succ) &&
- Succ != &Succ->getParent()->getEntryBlock()) {
- SetVector<BasicBlock *> TransitivelyDead =
- findTransitivelyDeadBlocks(Succ);
- DeadBlocks.insert(TransitivelyDead.begin(),
- TransitivelyDead.end());
+ if (pred_empty(Succ) && Succ != &Succ->getParent()->getEntryBlock()) {
+ SetVector<BasicBlock *> TransitivelyDead = findTransitivelyDeadBlocks(Succ);
+ DeadBlocks.insert(TransitivelyDead.begin(), TransitivelyDead.end());
}
}
}
@@ -284,7 +281,7 @@ void NVVMReflect::replaceReflectCalls(
for (auto *U : PHI.users())
if (auto *UI = dyn_cast<Instruction>(U))
Worklist.push_back(UI);
- // Delete all dead blocks in order
+ // Delete all dead blocks
for (BasicBlock *DeadBB : DeadBlocks)
DeleteDeadBlock(DeadBB);
>From 8fbef2a34339f66e7aaeab113372edde945c1fb1 Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Fri, 13 Jun 2025 22:23:15 +0000
Subject: [PATCH 04/10] format
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index b0f69598972ce..74c3efd18ad89 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -259,9 +259,12 @@ void NVVMReflect::replaceReflectCalls(
// a conditional branch is turned into a direct branch.
if (ConstantFoldTerminator(BB)) {
for (BasicBlock *Succ : Succs) {
- if (pred_empty(Succ) && Succ != &Succ->getParent()->getEntryBlock()) {
- SetVector<BasicBlock *> TransitivelyDead = findTransitivelyDeadBlocks(Succ);
- DeadBlocks.insert(TransitivelyDead.begin(), TransitivelyDead.end());
+ if (pred_empty(Succ) &&
+ Succ != &Succ->getParent()->getEntryBlock()) {
+ SetVector<BasicBlock *> TransitivelyDead =
+ findTransitivelyDeadBlocks(Succ);
+ DeadBlocks.insert(TransitivelyDead.begin(),
+ TransitivelyDead.end());
}
}
}
>From f9eedaadebddf6cc60cf3d6a5424e30bfdda7fb5 Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Fri, 13 Jun 2025 22:30:54 +0000
Subject: [PATCH 05/10] cleanup
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 74c3efd18ad89..fd9225838b243 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -19,7 +19,6 @@
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
>From 4ba9826586f37cc6f3e0d5eb3261b5132e69c003 Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Fri, 13 Jun 2025 22:33:19 +0000
Subject: [PATCH 06/10] added back isInstructionTriviallyDead check
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index fd9225838b243..2585ff45bde4c 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -235,7 +235,8 @@ void NVVMReflect::replaceReflectCalls(
if (auto *UI = dyn_cast<Instruction>(U))
Worklist.push_back(UI);
I->replaceAllUsesWith(C);
- I->eraseFromParent();
+ if (isInstructionTriviallyDead(I))
+ I->eraseFromParent();
};
for (auto &[Call, NewValue] : ReflectReplacements)
>From 32c5c408951acee7e101e95ae5f3d93a79921c4f Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Mon, 30 Jun 2025 07:15:47 +0000
Subject: [PATCH 07/10] NVVMReflectDCE option
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 2585ff45bde4c..c76eaf7e8d1f3 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -88,7 +88,7 @@ ModulePass *llvm::createNVVMReflectPass(unsigned SmVersion) {
}
static cl::opt<bool>
- NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
+ NVVMReflectEnabled("nvvm-reflect-enable", cl::init(false), cl::Hidden,
cl::desc("NVVM reflection, enabled by default"));
char NVVMReflectLegacyPass::ID = 0;
@@ -105,6 +105,10 @@ static cl::list<std::string> ReflectList(
cl::desc("A key=value pair. Replace __nvvm_reflect(name) with value."),
cl::ValueRequired);
+static cl::opt<bool> NVVMReflectDCE("nvvm-reflect-dce", cl::init(false),
+ cl::Hidden,
+ cl::desc("Delete dead blocks introduced by reflect call elimination"));
+
// Set the ReflectMap with, first, the value of __CUDA_FTZ from module metadata,
// and then the key/value pairs from the command line.
void NVVMReflect::populateReflectMap(Module &M) {
@@ -241,8 +245,9 @@ void NVVMReflect::replaceReflectCalls(
for (auto &[Call, NewValue] : ReflectReplacements)
ReplaceInstructionWithConst(Call, NewValue);
-
- // Alternate between constant folding/propagation and dead block elimination.
+
+ // Constant fold reflect results. If NVVMReflectDCE is enabled, we will
+ // alternate between constant folding/propagation and dead block elimination.
// Terminator folding may create new dead blocks. When those dead blocks are
// deleted, their live successors may have PHIs that can be simplified, which
// may yield more work for folding/propagation.
@@ -256,11 +261,12 @@ void NVVMReflect::replaceReflectCalls(
BasicBlock *BB = I->getParent();
SmallVector<BasicBlock *, 8> Succs(successors(BB));
// Some blocks may become dead if the terminator is folded because
- // a conditional branch is turned into a direct branch.
+ // a conditional branch is turned into a direct branch. Add those dead blocks
+ // to the dead blocks set if NVVMReflectDCE is enabled.
if (ConstantFoldTerminator(BB)) {
for (BasicBlock *Succ : Succs) {
if (pred_empty(Succ) &&
- Succ != &Succ->getParent()->getEntryBlock()) {
+ Succ != &Succ->getParent()->getEntryBlock() && NVVMReflectDCE) {
SetVector<BasicBlock *> TransitivelyDead =
findTransitivelyDeadBlocks(Succ);
DeadBlocks.insert(TransitivelyDead.begin(),
>From d97ad29d87e680c8604a87f7f672a91c1876cc1a Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Mon, 30 Jun 2025 07:17:54 +0000
Subject: [PATCH 08/10] use reflect dce instead of simplifycfg
---
llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll | 4 ++--
llvm/test/CodeGen/NVPTX/nvvm-reflect.ll | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
index 7bb1af707001a..553b2c107d86a 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
@@ -3,12 +3,12 @@
; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
-; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
+; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' -nvvm-reflect-dce \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
-; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
+; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' -nvvm-reflect-dce \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
index 581dbf353c1ff..86cdc3f489c2e 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -3,12 +3,12 @@
; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
-; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
+; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' -nvvm-reflect-dce \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
-; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' \
+; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect' -nvvm-reflect-dce \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
>From ed7cfc10e0c71499c34faef818d5d9bda1763591 Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Mon, 30 Jun 2025 07:18:29 +0000
Subject: [PATCH 09/10] format
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index c76eaf7e8d1f3..093c55a9fb027 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -105,9 +105,9 @@ static cl::list<std::string> ReflectList(
cl::desc("A key=value pair. Replace __nvvm_reflect(name) with value."),
cl::ValueRequired);
-static cl::opt<bool> NVVMReflectDCE("nvvm-reflect-dce", cl::init(false),
- cl::Hidden,
- cl::desc("Delete dead blocks introduced by reflect call elimination"));
+static cl::opt<bool> NVVMReflectDCE(
+ "nvvm-reflect-dce", cl::init(false), cl::Hidden,
+ cl::desc("Delete dead blocks introduced by reflect call elimination"));
// Set the ReflectMap with, first, the value of __CUDA_FTZ from module metadata,
// and then the key/value pairs from the command line.
@@ -245,7 +245,7 @@ void NVVMReflect::replaceReflectCalls(
for (auto &[Call, NewValue] : ReflectReplacements)
ReplaceInstructionWithConst(Call, NewValue);
-
+
// Constant fold reflect results. If NVVMReflectDCE is enabled, we will
// alternate between constant folding/propagation and dead block elimination.
// Terminator folding may create new dead blocks. When those dead blocks are
@@ -261,8 +261,8 @@ void NVVMReflect::replaceReflectCalls(
BasicBlock *BB = I->getParent();
SmallVector<BasicBlock *, 8> Succs(successors(BB));
// Some blocks may become dead if the terminator is folded because
- // a conditional branch is turned into a direct branch. Add those dead blocks
- // to the dead blocks set if NVVMReflectDCE is enabled.
+ // a conditional branch is turned into a direct branch. Add those dead
+ // blocks to the dead blocks set if NVVMReflectDCE is enabled.
if (ConstantFoldTerminator(BB)) {
for (BasicBlock *Succ : Succs) {
if (pred_empty(Succ) &&
>From c9b04d7c714493d25b4258b04099bd280447675e Mon Sep 17 00:00:00 2001
From: Yonah Goldberg <ygoldberg at nvidia.com>
Date: Mon, 30 Jun 2025 07:36:22 +0000
Subject: [PATCH 10/10] bugs
---
llvm/lib/Target/NVPTX/NVVMReflect.cpp | 36 +++++++++++----------------
1 file changed, 15 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 093c55a9fb027..6d21706570bbe 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -88,14 +88,9 @@ ModulePass *llvm::createNVVMReflectPass(unsigned SmVersion) {
}
static cl::opt<bool>
- NVVMReflectEnabled("nvvm-reflect-enable", cl::init(false), cl::Hidden,
+ NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
cl::desc("NVVM reflection, enabled by default"));
-char NVVMReflectLegacyPass::ID = 0;
-INITIALIZE_PASS(NVVMReflectLegacyPass, "nvvm-reflect",
- "Replace occurrences of __nvvm_reflect() calls with 0/1", false,
- false)
-
// Allow users to specify additional key/value pairs to reflect. These key/value
// pairs are the last to be added to the ReflectMap, and therefore will take
// precedence over initial values (i.e. __CUDA_FTZ from module medadata and
@@ -109,6 +104,11 @@ static cl::opt<bool> NVVMReflectDCE(
"nvvm-reflect-dce", cl::init(false), cl::Hidden,
cl::desc("Delete dead blocks introduced by reflect call elimination"));
+char NVVMReflectLegacyPass::ID = 0;
+INITIALIZE_PASS(NVVMReflectLegacyPass, "nvvm-reflect",
+ "Replace occurrences of __nvvm_reflect() calls with 0/1", false,
+ false)
+
// Set the ReflectMap with, first, the value of __CUDA_FTZ from module metadata,
// and then the key/value pairs from the command line.
void NVVMReflect::populateReflectMap(Module &M) {
@@ -188,6 +188,8 @@ bool NVVMReflect::handleReflectFunction(Module &M, StringRef ReflectName) {
<< "(" << ReflectArg << ") with value " << ReflectVal
<< "\n");
auto *NewValue = ConstantInt::get(Call->getType(), ReflectVal);
+ dbgs() << "NewValue: " << *NewValue << "\n";
+ dbgs() << "Call: " << *Call << "\n";
ReflectReplacements.push_back({Call, NewValue});
}
@@ -216,35 +218,25 @@ NVVMReflect::findTransitivelyDeadBlocks(BasicBlock *DeadBB) {
/// Replace calls to __nvvm_reflect with corresponding constant values. Then
/// clean up through constant folding and propagation and dead block
-/// elimination.
-///
-/// The purpose of this cleanup is not optimization because that could be
-/// handled by later passes
-/// (i.e. SCCP, SimplifyCFG, etc.), but for correctness. Reflect calls are most
-/// commonly used to query the arch number and select a valid instruction for
-/// the arch. Therefore, you need to eliminate blocks that become dead because
-/// they may contain invalid instructions for the arch. The purpose of the
-/// cleanup is to do the minimal amount of work to leave the code in a valid
-/// state.
+/// elimination, if NVVMReflectDCE is enabled.
void NVVMReflect::replaceReflectCalls(
SmallVector<std::pair<CallInst *, Constant *>, 8> &ReflectReplacements,
const DataLayout &DL) {
SmallVector<Instruction *, 8> Worklist;
SetVector<BasicBlock *> DeadBlocks;
- // Replace an instruction with a constant and add all users to the worklist,
- // then delete the instruction
+ // Replace an instruction with a constant and add all users to the worklist
auto ReplaceInstructionWithConst = [&](Instruction *I, Constant *C) {
for (auto *U : I->users())
if (auto *UI = dyn_cast<Instruction>(U))
Worklist.push_back(UI);
I->replaceAllUsesWith(C);
- if (isInstructionTriviallyDead(I))
- I->eraseFromParent();
};
- for (auto &[Call, NewValue] : ReflectReplacements)
+ for (auto &[Call, NewValue] : ReflectReplacements) {
ReplaceInstructionWithConst(Call, NewValue);
+ Call->eraseFromParent();
+ }
// Constant fold reflect results. If NVVMReflectDCE is enabled, we will
// alternate between constant folding/propagation and dead block elimination.
@@ -257,6 +249,8 @@ void NVVMReflect::replaceReflectCalls(
auto *I = Worklist.pop_back_val();
if (auto *C = ConstantFoldInstruction(I, DL)) {
ReplaceInstructionWithConst(I, C);
+ if (isInstructionTriviallyDead(I))
+ I->eraseFromParent();
} else if (I->isTerminator()) {
BasicBlock *BB = I->getParent();
SmallVector<BasicBlock *, 8> Succs(successors(BB));
More information about the llvm-commits
mailing list