[llvm] [AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. (PR #118461)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 02:22:26 PST 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/118461
Use references instead of pointers for most state, initialize it all in
the constructor, and common up some of the initialization between the
legacy and new pass manager paths.
>From 318ebd5d46c5545c5223db796eeb32547fe81e08 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 3 Dec 2024 07:40:17 +0000
Subject: [PATCH] [AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC.
Use references instead of pointers for most state, initialize it all in
the constructor, and common up some of the initialization between the
legacy and new pass manager paths.
---
.../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 198 ++++++++----------
1 file changed, 91 insertions(+), 107 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 7257b53afe69d0..5c92428c5f1859 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -95,32 +95,45 @@ static cl::opt<bool> DisableFDivExpand(
cl::ReallyHidden,
cl::init(false));
+static bool hasUnsafeFPMath(const Function &F) {
+ return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+}
+
class AMDGPUCodeGenPrepareImpl
: public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
public:
- const GCNSubtarget *ST = nullptr;
- const AMDGPUTargetMachine *TM = nullptr;
- const TargetLibraryInfo *TLInfo = nullptr;
- AssumptionCache *AC = nullptr;
- DominatorTree *DT = nullptr;
- UniformityInfo *UA = nullptr;
- Module *Mod = nullptr;
- const DataLayout *DL = nullptr;
- bool HasUnsafeFPMath = false;
- bool HasFP32DenormalFlush = false;
+ Function &F;
+ const GCNSubtarget &ST;
+ const AMDGPUTargetMachine &TM;
+ const TargetLibraryInfo *TLI;
+ AssumptionCache *AC;
+ const DominatorTree *DT;
+ const UniformityInfo &UA;
+ const DataLayout &DL;
+ bool HasUnsafeFPMath;
+ bool HasFP32DenormalFlush;
bool FlowChanged = false;
mutable Function *SqrtF32 = nullptr;
mutable Function *LdexpF32 = nullptr;
DenseMap<const PHINode *, bool> BreakPhiNodesCache;
+ AMDGPUCodeGenPrepareImpl(Function &F, const AMDGPUTargetMachine &TM,
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ const DominatorTree *DT, const UniformityInfo &UA)
+ : F(F), ST(TM.getSubtarget<GCNSubtarget>(F)), TM(TM), TLI(TLI), AC(AC),
+ DT(DT), UA(UA), DL(F.getDataLayout()),
+ HasUnsafeFPMath(hasUnsafeFPMath(F)),
+ HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals ==
+ DenormalMode::getPreserveSign()) {}
+
Function *getSqrtF32() const {
if (SqrtF32)
return SqrtF32;
- LLVMContext &Ctx = Mod->getContext();
- SqrtF32 = Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::amdgcn_sqrt,
- {Type::getFloatTy(Ctx)});
+ LLVMContext &Ctx = F.getContext();
+ SqrtF32 = Intrinsic::getOrInsertDeclaration(
+ F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
return SqrtF32;
}
@@ -128,9 +141,10 @@ class AMDGPUCodeGenPrepareImpl
if (LdexpF32)
return LdexpF32;
- LLVMContext &Ctx = Mod->getContext();
+ LLVMContext &Ctx = F.getContext();
LdexpF32 = Intrinsic::getOrInsertDeclaration(
- Mod, Intrinsic::ldexp, {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
+ F.getParent(), Intrinsic::ldexp,
+ {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
return LdexpF32;
}
@@ -166,8 +180,7 @@ class AMDGPUCodeGenPrepareImpl
/// Wrapper to pass all the arguments to computeKnownFPClass
KnownFPClass computeKnownFPClass(const Value *V, FPClassTest Interested,
const Instruction *CtxI) const {
- return llvm::computeKnownFPClass(V, *DL, Interested, 0, TLInfo, AC, CtxI,
- DT);
+ return llvm::computeKnownFPClass(V, DL, Interested, 0, TLI, AC, CtxI, DT);
}
bool canIgnoreDenormalInput(const Value *V, const Instruction *CtxI) const {
@@ -317,13 +330,10 @@ class AMDGPUCodeGenPrepareImpl
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
bool visitMinNum(IntrinsicInst &I);
bool visitSqrt(IntrinsicInst &I);
- bool run(Function &F);
+ bool run();
};
class AMDGPUCodeGenPrepare : public FunctionPass {
-private:
- AMDGPUCodeGenPrepareImpl Impl;
-
public:
static char ID;
AMDGPUCodeGenPrepare() : FunctionPass(ID) {
@@ -339,13 +349,12 @@ class AMDGPUCodeGenPrepare : public FunctionPass {
AU.setPreservesAll();
}
bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
};
} // end anonymous namespace
-bool AMDGPUCodeGenPrepareImpl::run(Function &F) {
+bool AMDGPUCodeGenPrepareImpl::run() {
BreakPhiNodesCache.clear();
bool MadeChange = false;
@@ -411,7 +420,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
if (const VectorType *VT = dyn_cast<VectorType>(T)) {
// TODO: The set of packed operations is more limited, so may want to
// promote some anyway.
- if (ST->hasVOP3PInsts())
+ if (ST.hasVOP3PInsts())
return false;
return needsPromotionToI32(VT->getElementType());
@@ -422,7 +431,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const {
return Ty->isFloatTy() || Ty->isDoubleTy() ||
- (Ty->isHalfTy() && ST->has16BitInsts());
+ (Ty->isHalfTy() && ST.has16BitInsts());
}
// Return true if the op promoted to i32 should have nsw set.
@@ -455,11 +464,10 @@ static bool promotedOpIsNUW(const Instruction &I) {
bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
Type *Ty = I.getType();
- const DataLayout &DL = Mod->getDataLayout();
int TySize = DL.getTypeSizeInBits(Ty);
Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
- return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I);
+ return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I);
}
bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -591,11 +599,11 @@ bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
}
unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const {
- return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits();
+ return computeKnownBits(Op, DL, 0, AC).countMaxActiveBits();
}
unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const {
- return ComputeMaxSignificantBits(Op, *DL, 0, AC);
+ return ComputeMaxSignificantBits(Op, DL, 0, AC);
}
static void extractValues(IRBuilder<> &Builder,
@@ -631,11 +639,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
Type *Ty = I.getType();
unsigned Size = Ty->getScalarSizeInBits();
- if (Size <= 16 && ST->has16BitInsts())
+ if (Size <= 16 && ST.has16BitInsts())
return false;
// Prefer scalar if this could be s_mul_i32
- if (UA->isUniform(&I))
+ if (UA.isUniform(&I))
return false;
Value *LHS = I.getOperand(0);
@@ -646,11 +654,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
unsigned LHSBits = 0, RHSBits = 0;
bool IsSigned = false;
- if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
+ if (ST.hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
(RHSBits = numBitsUnsigned(RHS)) <= 24) {
IsSigned = false;
- } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
+ } else if (ST.hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
(RHSBits = numBitsSigned(RHS)) <= 24) {
IsSigned = true;
@@ -730,21 +738,21 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const {
if (CastOp) {
if (!CastOp->hasOneUse())
return false;
- CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
- CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
+ CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), DL);
+ CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), DL);
}
// TODO: Handle special 0/-1 cases DAG combine does, although we only really
// need to handle divisions here.
- Constant *FoldedT = SelOpNo ?
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) :
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL);
+ Constant *FoldedT =
+ SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, DL)
+ : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, DL);
if (!FoldedT || isa<ConstantExpr>(FoldedT))
return false;
- Constant *FoldedF = SelOpNo ?
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) :
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL);
+ Constant *FoldedF =
+ SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, DL)
+ : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, DL);
if (!FoldedF || isa<ConstantExpr>(FoldedF))
return false;
@@ -777,7 +785,7 @@ AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
// result? It's unspecified by the spec.
Value *FrexpExp =
- ST->hasFractBug()
+ ST.hasFractBug()
? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
{Builder.getInt32Ty(), Ty}, Src)
: Builder.CreateExtractValue(Frexp, {1});
@@ -815,7 +823,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
// If we have have to work around the fract/frexp bug, we're worse off than
// using the fdiv.fast expansion. The full safe expansion is faster if we have
// fast FMA.
- if (HasFP32DenormalFlush && ST->hasFractBug() && !ST->hasFastFMAF32() &&
+ if (HasFP32DenormalFlush && ST.hasFractBug() && !ST.hasFastFMAF32() &&
(!FMF.noNaNs() || !FMF.noInfs()))
return nullptr;
@@ -1157,17 +1165,12 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
if (NewVal) {
FDiv.replaceAllUsesWith(NewVal);
NewVal->takeName(&FDiv);
- RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI);
}
return true;
}
-static bool hasUnsafeFPMath(const Function &F) {
- Attribute Attr = F.getFnAttribute("unsafe-fp-math");
- return Attr.getValueAsBool();
-}
-
static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
Value *LHS, Value *RHS) {
Type *I32Ty = Builder.getInt32Ty();
@@ -1192,7 +1195,6 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
Value *Den, unsigned AtLeast,
bool IsSigned) const {
- const DataLayout &DL = Mod->getDataLayout();
unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
if (LHSSignBits < AtLeast)
return -1;
@@ -1271,7 +1273,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
Value *FQNeg = Builder.CreateFNeg(FQ);
// float fr = mad(fqneg, fb, fa);
- auto FMAD = !ST->hasMadMacF32Insts()
+ auto FMAD = !ST.hasMadMacF32Insts()
? Intrinsic::fma
: (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
Value *FR = Builder.CreateIntrinsic(FMAD,
@@ -1338,7 +1340,7 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
// If there's no wider mulhi, there's only a better expansion for powers of
// two.
// TODO: Should really know for each vector element.
- if (isKnownToBeAPowerOfTwo(C, *DL, true, 0, AC, &I, DT))
+ if (isKnownToBeAPowerOfTwo(C, DL, true, 0, AC, &I, DT))
return true;
return false;
@@ -1348,8 +1350,8 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (BinOpDen->getOpcode() == Instruction::Shl &&
isa<Constant>(BinOpDen->getOperand(0)) &&
- isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true,
- 0, AC, &I, DT)) {
+ isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), DL, true, 0, AC, &I,
+ DT)) {
return true;
}
}
@@ -1357,9 +1359,9 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
return false;
}
-static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
+static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL) {
// Check whether the sign can be determined statically.
- KnownBits Known = computeKnownBits(V, *DL);
+ KnownBits Known = computeKnownBits(V, DL);
if (Known.isNegative())
return Constant::getAllOnesValue(V->getType());
if (Known.isNonNegative())
@@ -1542,8 +1544,8 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
if (foldBinOpIntoSelect(I))
return true;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA->isUniform(&I) && promoteUniformOpToI32(I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+ UA.isUniform(&I) && promoteUniformOpToI32(I))
return true;
if (UseMul24Intrin && replaceMulWithMul24(I))
@@ -1655,11 +1657,11 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
};
WidenLoad->setMetadata(LLVMContext::MD_range,
- MDNode::get(Mod->getContext(), LowAndHigh));
+ MDNode::get(F.getContext(), LowAndHigh));
}
}
- int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
+ int TySize = DL.getTypeSizeInBits(I.getType());
Type *IntNTy = Builder.getIntNTy(TySize);
Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
@@ -1674,8 +1676,8 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
- UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
+ UA.isUniform(&I))
Changed |= promoteUniformOpToI32(I);
return Changed;
@@ -1688,8 +1690,8 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Value *CmpVal;
FCmpInst::Predicate Pred;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) {
- if (UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) {
+ if (UA.isUniform(&I))
return promoteUniformOpToI32(I);
return false;
}
@@ -1722,7 +1724,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
return true;
}
@@ -1947,7 +1949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
FixedVectorType *FVT = dyn_cast<FixedVectorType>(I.getType());
if (!FVT || FVT->getNumElements() == 1 ||
- DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
+ DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
return false;
if (!ForceBreakLargePHIs && !canBreakPHINode(I))
@@ -1960,7 +1962,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
unsigned Idx = 0;
// For 8/16 bits type, don't scalarize fully but break it up into as many
// 32-bit slices as we can, and scalarize the tail.
- const unsigned EltSize = DL->getTypeSizeInBits(EltTy);
+ const unsigned EltSize = DL.getTypeSizeInBits(EltTy);
const unsigned NumElts = FVT->getNumElements();
if (EltSize == 8 || EltSize == 16) {
const unsigned SubVecSize = (32 / EltSize);
@@ -2079,7 +2081,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
SmallVector<const Value *, 4> WorkList;
getUnderlyingObjects(I.getOperand(0), WorkList);
if (!all_of(WorkList, [&](const Value *V) {
- return isPtrKnownNeverNull(V, *DL, *TM, SrcAS);
+ return isPtrKnownNeverNull(V, DL, TM, SrcAS);
}))
return false;
@@ -2107,8 +2109,8 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
bool Changed = false;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+ UA.isUniform(&I))
Changed |= promoteUniformBitreverseToI32(I);
return Changed;
@@ -2120,7 +2122,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
/// If fract is a useful instruction for the subtarget. Does not account for the
/// nan handling; the instruction has a nan check on the input value.
Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
- if (ST->hasFractBug())
+ if (ST.hasFractBug())
return nullptr;
if (I.getIntrinsicID() != Intrinsic::minnum)
@@ -2177,7 +2179,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
// Match pattern for fract intrinsic in contexts where the nan check has been
// optimized out (and hope the knowledge the source can't be nan wasn't lost).
if (!I.hasNoNaNs() &&
- !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(*DL, TLInfo)))
+ !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(DL, TLI)))
return false;
IRBuilder<> Builder(&I);
@@ -2189,7 +2191,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
return true;
}
@@ -2201,7 +2203,7 @@ static bool isOneOrNegOne(const Value *Val) {
// Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way.
bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
Type *Ty = Sqrt.getType()->getScalarType();
- if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST->has16BitInsts()))
+ if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST.has16BitInsts()))
return false;
const FPMathOperator *FPOp = cast<const FPMathOperator>(&Sqrt);
@@ -2257,14 +2259,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
return true;
}
-bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
- Impl.Mod = &M;
- Impl.DL = &Impl.Mod->getDataLayout();
- Impl.SqrtF32 = nullptr;
- Impl.LdexpF32 = nullptr;
- return false;
-}
-
bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -2274,36 +2268,26 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
return false;
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
- Impl.TM = &TM;
- Impl.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
- Impl.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- Impl.UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
- Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F, *Impl.ST);
- Impl.HasFP32DenormalFlush =
- Mode.FP32Denormals == DenormalMode::getPreserveSign();
- return Impl.run(F);
+ const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ const UniformityInfo &UA =
+ getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ return AMDGPUCodeGenPrepareImpl(F, TM, TLI, AC, DT, UA).run();
}
PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
FunctionAnalysisManager &FAM) {
- AMDGPUCodeGenPrepareImpl Impl;
- Impl.Mod = F.getParent();
- Impl.DL = &Impl.Mod->getDataLayout();
- Impl.TM = static_cast<const AMDGPUTargetMachine *>(&TM);
- Impl.TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
- Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
- Impl.AC = &FAM.getResult<AssumptionAnalysis>(F);
- Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
- Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
- Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F, *Impl.ST);
- Impl.HasFP32DenormalFlush =
- Mode.FP32Denormals == DenormalMode::getPreserveSign();
- if (!Impl.run(F))
+ const AMDGPUTargetMachine &ATM = static_cast<const AMDGPUTargetMachine &>(TM);
+ const TargetLibraryInfo *TLI = &FAM.getResult<TargetLibraryAnalysis>(F);
+ AssumptionCache *AC = &FAM.getResult<AssumptionAnalysis>(F);
+ const DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ const UniformityInfo &UA = FAM.getResult<UniformityInfoAnalysis>(F);
+ AMDGPUCodeGenPrepareImpl Impl(F, ATM, TLI, AC, DT, UA);
+ if (!Impl.run())
return PreservedAnalyses::all();
PreservedAnalyses PA = PreservedAnalyses::none();
if (!Impl.FlowChanged)
More information about the llvm-commits
mailing list