[llvm] aa15fe9 - Revert "[AMDGPUUnifyDivergentExitNodes] Add NewPM support"
Vitaly Buka via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 16 19:10:31 PDT 2023
Author: Vitaly Buka
Date: 2023-03-16T19:03:46-07:00
New Revision: aa15fe98b64e4692e572b506d6e7fb428a9151a6
URL: https://github.com/llvm/llvm-project/commit/aa15fe98b64e4692e572b506d6e7fb428a9151a6
DIFF: https://github.com/llvm/llvm-project/commit/aa15fe98b64e4692e572b506d6e7fb428a9151a6.diff
LOG: Revert "[AMDGPUUnifyDivergentExitNodes] Add NewPM support"
Introduces nullptr dereference.
This reverts commit a5455e32b364dabe499ec11722626d4bbaf047ba.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll
Removed:
llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index fe7a287657b00..64dc8604e76ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -22,7 +22,6 @@
#include "AMDGPURegBankSelect.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
-#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
@@ -656,10 +655,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUPromoteKernelArgumentsPass());
return true;
}
- if (PassName == "amdgpu-unify-divergent-exit-nodes") {
- PM.addPass(AMDGPUUnifyDivergentExitNodesPass());
- return true;
- }
return false;
});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 5d59ee47ec430..5f204f5be51ae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUUnifyDivergentExitNodes.h"
#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/ArrayRef.h"
@@ -54,33 +53,25 @@ using namespace llvm;
namespace {
-class AMDGPUUnifyDivergentExitNodesImpl {
+class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
private:
const TargetTransformInfo *TTI = nullptr;
public:
- AMDGPUUnifyDivergentExitNodesImpl() = delete;
- AMDGPUUnifyDivergentExitNodesImpl(const TargetTransformInfo *TTI)
- : TTI(TTI) {}
+ static char ID; // Pass identification, replacement for typeid
+
+ AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
+ initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry());
+ }
// We can preserve non-critical-edgeness when we unify function exit nodes
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
ArrayRef<BasicBlock *> ReturningBlocks,
StringRef Name);
- bool run(Function &F, DominatorTree &DT, const PostDominatorTree &PDT,
- const UniformityInfo &UA);
-};
-
-class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
-public:
- static char ID;
- AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
- initializeAMDGPUUnifyDivergentExitNodesPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
};
+
} // end anonymous namespace
char AMDGPUUnifyDivergentExitNodes::ID = 0;
@@ -88,14 +79,14 @@ char AMDGPUUnifyDivergentExitNodes::ID = 0;
char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
- "Unify divergent function exit nodes", false, false)
+ "Unify divergent function exit nodes", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
-void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const {
+void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
if (RequireAndPreserveDomTree)
AU.addRequired<DominatorTreeWrapperPass>();
@@ -141,7 +132,7 @@ static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB) {
return true;
}
-BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
+BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
Function &F, DomTreeUpdater &DTU, ArrayRef<BasicBlock *> ReturningBlocks,
StringRef Name) {
// Otherwise, we need to insert a new basic block into the function, add a PHI
@@ -189,14 +180,21 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
return NewRetBlock;
}
-bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree &DT,
- const PostDominatorTree &PDT,
- const UniformityInfo &UA) {
+bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
return false;
+ UniformityInfo &UA =
+ getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
// Loop over all of the blocks in a function, tracking all of the blocks that
// return.
SmallVector<BasicBlock *, 4> ReturningBlocks;
@@ -329,30 +327,3 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree &DT,
unifyReturnBlockSet(F, DTU, ReturningBlocks, "UnifiedReturnBlock");
return true;
}
-
-bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
- DominatorTree *DT = nullptr;
- if (RequireAndPreserveDomTree)
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- const auto &PDT =
- getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- const auto &UA = getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
- const auto *TranformInfo =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(F, *DT, PDT, UA);
-}
-
-PreservedAnalyses
-AMDGPUUnifyDivergentExitNodesPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- DominatorTree *DT = nullptr;
- if (RequireAndPreserveDomTree)
- DT = &AM.getResult<DominatorTreeAnalysis>(F);
-
- const auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- const auto &UA = AM.getResult<UniformityInfoAnalysis>(F);
- const auto *TransformInfo = &AM.getResult<TargetIRAnalysis>(F);
- return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).run(F, *DT, PDT, UA)
- ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h
deleted file mode 100644
index e58925bc01d9e..0000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- AMDGPUUnifyDivergentExitNodes.h ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a variant of the UnifyFunctionExitNodes pass. Rather than ensuring
-// there is at most one ret and one unreachable instruction, it ensures there is
-// at most one divergent exiting block.
-//
-// StructurizeCFG can't deal with multi-exit regions formed by branches to
-// multiple return nodes. It is not desirable to structurize regions with
-// uniform branches, so unifying those to the same return block as divergent
-// branches inhibits use of scalar branching. It still can't deal with the case
-// where one branch goes to return, and one unreachable. Replace unreachable in
-// this case with a return.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-
-namespace llvm {
-class AMDGPUUnifyDivergentExitNodesPass
- : public PassInfoMixin<AMDGPUUnifyDivergentExitNodesPass> {
-public:
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-};
-
-} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll
index 13f8eff94f86b..58d5dc20d5ac5 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll
@@ -1,48 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -p simplifycfg,amdgpu-unify-divergent-exit-nodes %s -S -o - | FileCheck %s --check-prefix=OPT
-; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=ISA
+; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s
define void @nested_inf_loop(i1 %0, i1 %1) {
-; OPT-LABEL: @nested_inf_loop(
-; OPT-NEXT: BB:
-; OPT-NEXT: br label [[BB1:%.*]]
-; OPT: BB1:
-; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0:%.*]], i1 true, i1 [[TMP1:%.*]]
-; OPT-NEXT: br i1 [[BRMERGE]], label [[BB1]], label [[INFLOOP:%.*]]
-; OPT: infloop:
-; OPT-NEXT: br i1 true, label [[INFLOOP]], label [[DUMMYRETURNBLOCK:%.*]]
-; OPT: DummyReturnBlock:
-; OPT-NEXT: ret void
-;
-; ISA-LABEL: nested_inf_loop:
-; ISA-NEXT: %bb.0: ; %BB
-; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; ISA-NEXT: v_and_b32_e32 v1, 1, v1
-; ISA-NEXT: v_and_b32_e32 v0, 1, v0
-; ISA-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
-; ISA-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; ISA-NEXT: s_xor_b64 s[6:7], vcc, -1
-; ISA-NEXT: s_mov_b64 s[8:9], 0
-; ISA-NEXT: .LBB0_1: ; %BB1
-; ISA: s_and_b64 s[10:11], exec, s[6:7]
-; ISA-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
-; ISA-NEXT: s_andn2_b64 exec, exec, s[8:9]
-; ISA-NEXT: s_cbranch_execnz .LBB0_1
-; ISA-NEXT: %bb.2: ; %BB2
-; ISA: s_or_b64 exec, exec, s[8:9]
-; ISA-NEXT: s_mov_b64 s[8:9], 0
-; ISA-NEXT: .LBB0_3: ; %BB4
-; ISA: s_and_b64 s[10:11], exec, s[4:5]
-; ISA-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
-; ISA-NEXT: s_andn2_b64 exec, exec, s[8:9]
-; ISA-NEXT: s_cbranch_execnz .LBB0_3
-; ISA-NEXT: %bb.4: ; %loop.exit.guard
-; ISA: s_or_b64 exec, exec, s[8:9]
-; ISA-NEXT: s_mov_b64 vcc, 0
-; ISA-NEXT: s_mov_b64 s[8:9], 0
-; ISA-NEXT: s_branch .LBB0_1
-; ISA-NEXT: %bb.5: ; %DummyReturnBlock
-; ISA-NEXT: s_setpc_b64 s[30:31]
+; CHECK-LABEL: nested_inf_loop:
+; CHECK-NEXT: %bb.0: ; %BB
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; CHECK-NEXT: s_xor_b64 s[6:7], vcc, -1
+; CHECK-NEXT: s_mov_b64 s[8:9], 0
+; CHECK-NEXT: .LBB0_1: ; %BB1
+; CHECK: s_and_b64 s[10:11], exec, s[6:7]
+; CHECK-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_cbranch_execnz .LBB0_1
+; CHECK-NEXT: %bb.2: ; %BB2
+; CHECK: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_mov_b64 s[8:9], 0
+; CHECK-NEXT: .LBB0_3: ; %BB4
+; CHECK: s_and_b64 s[10:11], exec, s[4:5]
+; CHECK-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_cbranch_execnz .LBB0_3
+; CHECK-NEXT: %bb.4: ; %loop.exit.guard
+; CHECK: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_mov_b64 vcc, 0
+; CHECK-NEXT: s_mov_b64 s[8:9], 0
+; CHECK-NEXT: s_branch .LBB0_1
+; CHECK-NEXT: %bb.5: ; %DummyReturnBlock
+; CHECK-NEXT: s_setpc_b64 s[30:31]
BB:
br label %BB1
More information about the llvm-commits
mailing list