[llvm] dbebebf - [AMDGPU] Use UniformityAnalysis in CodeGenPrepare
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 6 04:26:56 PST 2023
Author: pvanhout
Date: 2023-03-06T13:26:51+01:00
New Revision: dbebebf6f6838109aab71f062f53a0bf1ffbf7a9
URL: https://github.com/llvm/llvm-project/commit/dbebebf6f6838109aab71f062f53a0bf1ffbf7a9
DIFF: https://github.com/llvm/llvm-project/commit/dbebebf6f6838109aab71f062f53a0bf1ffbf7a9.diff
LOG: [AMDGPU] Use UniformityAnalysis in CodeGenPrepare
A little extra change was needed in UA because it didn't consider
InvokeInst and it made call-constexpr.ll assert.
Reviewed By: sameerds, arsenm
Differential Revision: https://reviews.llvm.org/D145358
Added:
Modified:
llvm/lib/Analysis/UniformityAnalysis.cpp
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index bf1d3a61f75d7..f5693f2e37e7a 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -32,12 +32,10 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
for (auto &I : instructions(F)) {
- if (TTI->isSourceOfDivergence(&I)) {
- assert(!I.isTerminator());
+ if (TTI->isSourceOfDivergence(&I))
markDivergent(I);
- } else if (TTI->isAlwaysUniform(&I)) {
+ else if (TTI->isAlwaysUniform(&I))
addUniformOverride(I);
- }
}
for (auto &Arg : F.args()) {
if (TTI->isSourceOfDivergence(&Arg)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 346f201333c5d..b79ea7f2b5e22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -17,7 +17,7 @@
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
@@ -73,7 +73,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
const GCNSubtarget *ST = nullptr;
AssumptionCache *AC = nullptr;
DominatorTree *DT = nullptr;
- LegacyDivergenceAnalysis *DA = nullptr;
+ UniformityInfo *UA = nullptr;
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
bool HasUnsafeFPMath = false;
@@ -224,7 +224,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
// FIXME: Division expansion needs to preserve the dominator tree.
if (!ExpandDiv64InIR)
@@ -314,7 +314,7 @@ bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
int TySize = DL.getTypeSizeInBits(Ty);
Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
- return I.isSimple() && TySize < 32 && Alignment >= 4 && DA->isUniform(&I);
+ return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I);
}
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -519,7 +519,7 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
return false;
// Prefer scalar if this could be s_mul_i32
- if (DA->isUniform(&I))
+ if (UA->isUniform(&I))
return false;
Value *LHS = I.getOperand(0);
@@ -1237,7 +1237,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
return true;
if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- DA->isUniform(&I) && promoteUniformOpToI32(I))
+ UA->isUniform(&I) && promoteUniformOpToI32(I))
return true;
if (UseMul24Intrin && replaceMulWithMul24(I))
@@ -1367,7 +1367,7 @@ bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
bool Changed = false;
if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
- DA->isUniform(&I))
+ UA->isUniform(&I))
Changed |= promoteUniformOpToI32(I);
return Changed;
@@ -1377,7 +1377,7 @@ bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
bool Changed = false;
if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- DA->isUniform(&I))
+ UA->isUniform(&I))
Changed |= promoteUniformOpToI32(I);
return Changed;
@@ -1396,7 +1396,7 @@ bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
bool Changed = false;
if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- DA->isUniform(&I))
+ UA->isUniform(&I))
Changed |= promoteUniformBitreverseToI32(I);
return Changed;
@@ -1419,7 +1419,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
ST = &TM.getSubtarget<GCNSubtarget>(F);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
@@ -1459,7 +1459,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
false, false)
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 2094a9ad9c58f..294231ded7d76 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -200,11 +200,11 @@
; GCN-O1-NEXT: AMDGPU Promote Alloca
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: SROA
-; GCN-O1-NEXT: Post-Dominator Tree Construction
-; GCN-O1-NEXT: Natural Loop Information
-; GCN-O1-NEXT: Legacy Divergence Analysis
+; GCN-O1-NEXT: Cycle Info Analysis
+; GCN-O1-NEXT: Uniformity Analysis
; GCN-O1-NEXT: AMDGPU IR optimizations
; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl)
+; GCN-O1-NEXT: Natural Loop Information
; GCN-O1-NEXT: Canonicalize natural loops
; GCN-O1-NEXT: Scalar Evolution Analysis
; GCN-O1-NEXT: Loop Pass Manager
@@ -494,8 +494,8 @@
; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis
; GCN-O1-OPTS-NEXT: Nary reassociation
; GCN-O1-OPTS-NEXT: Early CSE
-; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction
-; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis
+; GCN-O1-OPTS-NEXT: Cycle Info Analysis
+; GCN-O1-OPTS-NEXT: Uniformity Analysis
; GCN-O1-OPTS-NEXT: AMDGPU IR optimizations
; GCN-O1-OPTS-NEXT: Canonicalize natural loops
; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis
@@ -801,8 +801,8 @@
; GCN-O2-NEXT: Scalar Evolution Analysis
; GCN-O2-NEXT: Nary reassociation
; GCN-O2-NEXT: Early CSE
-; GCN-O2-NEXT: Post-Dominator Tree Construction
-; GCN-O2-NEXT: Legacy Divergence Analysis
+; GCN-O2-NEXT: Cycle Info Analysis
+; GCN-O2-NEXT: Uniformity Analysis
; GCN-O2-NEXT: AMDGPU IR optimizations
; GCN-O2-NEXT: Canonicalize natural loops
; GCN-O2-NEXT: Scalar Evolution Analysis
@@ -1114,8 +1114,8 @@
; GCN-O3-NEXT: Scalar Evolution Analysis
; GCN-O3-NEXT: Nary reassociation
; GCN-O3-NEXT: Early CSE
-; GCN-O3-NEXT: Post-Dominator Tree Construction
-; GCN-O3-NEXT: Legacy Divergence Analysis
+; GCN-O3-NEXT: Cycle Info Analysis
+; GCN-O3-NEXT: Uniformity Analysis
; GCN-O3-NEXT: AMDGPU IR optimizations
; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O3-NEXT: Canonicalize natural loops
More information about the llvm-commits
mailing list