[llvm] 9ad09b2 - [AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. (#118461)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 07:31:29 PST 2024
Author: Jay Foad
Date: 2024-12-03T15:31:25Z
New Revision: 9ad09b2930ef2e95bf8772c91f623881d1c14733
URL: https://github.com/llvm/llvm-project/commit/9ad09b2930ef2e95bf8772c91f623881d1c14733
DIFF: https://github.com/llvm/llvm-project/commit/9ad09b2930ef2e95bf8772c91f623881d1c14733.diff
LOG: [AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. (#118461)
Use references instead of pointers for most state, initialize it all in
the constructor, and common up some of the initialization between the
legacy and new pass manager paths.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 7257b53afe69d0..75e20c79301681 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -95,32 +95,45 @@ static cl::opt<bool> DisableFDivExpand(
cl::ReallyHidden,
cl::init(false));
+static bool hasUnsafeFPMath(const Function &F) {
+ return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+}
+
class AMDGPUCodeGenPrepareImpl
: public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
public:
- const GCNSubtarget *ST = nullptr;
- const AMDGPUTargetMachine *TM = nullptr;
- const TargetLibraryInfo *TLInfo = nullptr;
- AssumptionCache *AC = nullptr;
- DominatorTree *DT = nullptr;
- UniformityInfo *UA = nullptr;
- Module *Mod = nullptr;
- const DataLayout *DL = nullptr;
- bool HasUnsafeFPMath = false;
- bool HasFP32DenormalFlush = false;
+ Function &F;
+ const GCNSubtarget &ST;
+ const AMDGPUTargetMachine &TM;
+ const TargetLibraryInfo *TLI;
+ AssumptionCache *AC;
+ const DominatorTree *DT;
+ const UniformityInfo &UA;
+ const DataLayout &DL;
+ const bool HasUnsafeFPMath;
+ const bool HasFP32DenormalFlush;
bool FlowChanged = false;
mutable Function *SqrtF32 = nullptr;
mutable Function *LdexpF32 = nullptr;
DenseMap<const PHINode *, bool> BreakPhiNodesCache;
+ AMDGPUCodeGenPrepareImpl(Function &F, const AMDGPUTargetMachine &TM,
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ const DominatorTree *DT, const UniformityInfo &UA)
+ : F(F), ST(TM.getSubtarget<GCNSubtarget>(F)), TM(TM), TLI(TLI), AC(AC),
+ DT(DT), UA(UA), DL(F.getDataLayout()),
+ HasUnsafeFPMath(hasUnsafeFPMath(F)),
+ HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals ==
+ DenormalMode::getPreserveSign()) {}
+
Function *getSqrtF32() const {
if (SqrtF32)
return SqrtF32;
- LLVMContext &Ctx = Mod->getContext();
- SqrtF32 = Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::amdgcn_sqrt,
- {Type::getFloatTy(Ctx)});
+ LLVMContext &Ctx = F.getContext();
+ SqrtF32 = Intrinsic::getOrInsertDeclaration(
+ F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
return SqrtF32;
}
@@ -128,9 +141,10 @@ class AMDGPUCodeGenPrepareImpl
if (LdexpF32)
return LdexpF32;
- LLVMContext &Ctx = Mod->getContext();
+ LLVMContext &Ctx = F.getContext();
LdexpF32 = Intrinsic::getOrInsertDeclaration(
- Mod, Intrinsic::ldexp, {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
+ F.getParent(), Intrinsic::ldexp,
+ {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
return LdexpF32;
}
@@ -166,8 +180,7 @@ class AMDGPUCodeGenPrepareImpl
/// Wrapper to pass all the arguments to computeKnownFPClass
KnownFPClass computeKnownFPClass(const Value *V, FPClassTest Interested,
const Instruction *CtxI) const {
- return llvm::computeKnownFPClass(V, *DL, Interested, 0, TLInfo, AC, CtxI,
- DT);
+ return llvm::computeKnownFPClass(V, DL, Interested, 0, TLI, AC, CtxI, DT);
}
bool canIgnoreDenormalInput(const Value *V, const Instruction *CtxI) const {
@@ -317,13 +330,10 @@ class AMDGPUCodeGenPrepareImpl
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
bool visitMinNum(IntrinsicInst &I);
bool visitSqrt(IntrinsicInst &I);
- bool run(Function &F);
+ bool run();
};
class AMDGPUCodeGenPrepare : public FunctionPass {
-private:
- AMDGPUCodeGenPrepareImpl Impl;
-
public:
static char ID;
AMDGPUCodeGenPrepare() : FunctionPass(ID) {
@@ -339,13 +349,12 @@ class AMDGPUCodeGenPrepare : public FunctionPass {
AU.setPreservesAll();
}
bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
};
} // end anonymous namespace
-bool AMDGPUCodeGenPrepareImpl::run(Function &F) {
+bool AMDGPUCodeGenPrepareImpl::run() {
BreakPhiNodesCache.clear();
bool MadeChange = false;
@@ -411,7 +420,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
if (const VectorType *VT = dyn_cast<VectorType>(T)) {
// TODO: The set of packed operations is more limited, so may want to
// promote some anyway.
- if (ST->hasVOP3PInsts())
+ if (ST.hasVOP3PInsts())
return false;
return needsPromotionToI32(VT->getElementType());
@@ -422,7 +431,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const {
return Ty->isFloatTy() || Ty->isDoubleTy() ||
- (Ty->isHalfTy() && ST->has16BitInsts());
+ (Ty->isHalfTy() && ST.has16BitInsts());
}
// Return true if the op promoted to i32 should have nsw set.
@@ -455,11 +464,10 @@ static bool promotedOpIsNUW(const Instruction &I) {
bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
Type *Ty = I.getType();
- const DataLayout &DL = Mod->getDataLayout();
int TySize = DL.getTypeSizeInBits(Ty);
Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
- return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I);
+ return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I);
}
bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -591,11 +599,11 @@ bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
}
unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const {
- return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits();
+ return computeKnownBits(Op, DL, 0, AC).countMaxActiveBits();
}
unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const {
- return ComputeMaxSignificantBits(Op, *DL, 0, AC);
+ return ComputeMaxSignificantBits(Op, DL, 0, AC);
}
static void extractValues(IRBuilder<> &Builder,
@@ -631,11 +639,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
Type *Ty = I.getType();
unsigned Size = Ty->getScalarSizeInBits();
- if (Size <= 16 && ST->has16BitInsts())
+ if (Size <= 16 && ST.has16BitInsts())
return false;
// Prefer scalar if this could be s_mul_i32
- if (UA->isUniform(&I))
+ if (UA.isUniform(&I))
return false;
Value *LHS = I.getOperand(0);
@@ -646,11 +654,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
unsigned LHSBits = 0, RHSBits = 0;
bool IsSigned = false;
- if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
+ if (ST.hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
(RHSBits = numBitsUnsigned(RHS)) <= 24) {
IsSigned = false;
- } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
+ } else if (ST.hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
(RHSBits = numBitsSigned(RHS)) <= 24) {
IsSigned = true;
@@ -730,21 +738,21 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const {
if (CastOp) {
if (!CastOp->hasOneUse())
return false;
- CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
- CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
+ CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), DL);
+ CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), DL);
}
// TODO: Handle special 0/-1 cases DAG combine does, although we only really
// need to handle divisions here.
- Constant *FoldedT = SelOpNo ?
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) :
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL);
+ Constant *FoldedT =
+ SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, DL)
+ : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, DL);
if (!FoldedT || isa<ConstantExpr>(FoldedT))
return false;
- Constant *FoldedF = SelOpNo ?
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) :
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL);
+ Constant *FoldedF =
+ SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, DL)
+ : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, DL);
if (!FoldedF || isa<ConstantExpr>(FoldedF))
return false;
@@ -777,7 +785,7 @@ AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
// result? It's unspecified by the spec.
Value *FrexpExp =
- ST->hasFractBug()
+ ST.hasFractBug()
? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
{Builder.getInt32Ty(), Ty}, Src)
: Builder.CreateExtractValue(Frexp, {1});
@@ -815,7 +823,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
// If we have have to work around the fract/frexp bug, we're worse off than
// using the fdiv.fast expansion. The full safe expansion is faster if we have
// fast FMA.
- if (HasFP32DenormalFlush && ST->hasFractBug() && !ST->hasFastFMAF32() &&
+ if (HasFP32DenormalFlush && ST.hasFractBug() && !ST.hasFastFMAF32() &&
(!FMF.noNaNs() || !FMF.noInfs()))
return nullptr;
@@ -1157,17 +1165,12 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
if (NewVal) {
FDiv.replaceAllUsesWith(NewVal);
NewVal->takeName(&FDiv);
- RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI);
}
return true;
}
-static bool hasUnsafeFPMath(const Function &F) {
- Attribute Attr = F.getFnAttribute("unsafe-fp-math");
- return Attr.getValueAsBool();
-}
-
static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
Value *LHS, Value *RHS) {
Type *I32Ty = Builder.getInt32Ty();
@@ -1192,7 +1195,6 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
Value *Den, unsigned AtLeast,
bool IsSigned) const {
- const DataLayout &DL = Mod->getDataLayout();
unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
if (LHSSignBits < AtLeast)
return -1;
@@ -1271,7 +1273,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
Value *FQNeg = Builder.CreateFNeg(FQ);
// float fr = mad(fqneg, fb, fa);
- auto FMAD = !ST->hasMadMacF32Insts()
+ auto FMAD = !ST.hasMadMacF32Insts()
? Intrinsic::fma
: (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
Value *FR = Builder.CreateIntrinsic(FMAD,
@@ -1338,7 +1340,7 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
// If there's no wider mulhi, there's only a better expansion for powers of
// two.
// TODO: Should really know for each vector element.
- if (isKnownToBeAPowerOfTwo(C, *DL, true, 0, AC, &I, DT))
+ if (isKnownToBeAPowerOfTwo(C, DL, true, 0, AC, &I, DT))
return true;
return false;
@@ -1348,8 +1350,8 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (BinOpDen->getOpcode() == Instruction::Shl &&
isa<Constant>(BinOpDen->getOperand(0)) &&
- isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true,
- 0, AC, &I, DT)) {
+ isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), DL, true, 0, AC, &I,
+ DT)) {
return true;
}
}
@@ -1357,9 +1359,9 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
return false;
}
-static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
+static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL) {
// Check whether the sign can be determined statically.
- KnownBits Known = computeKnownBits(V, *DL);
+ KnownBits Known = computeKnownBits(V, DL);
if (Known.isNegative())
return Constant::getAllOnesValue(V->getType());
if (Known.isNonNegative())
@@ -1542,8 +1544,8 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
if (foldBinOpIntoSelect(I))
return true;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA->isUniform(&I) && promoteUniformOpToI32(I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+ UA.isUniform(&I) && promoteUniformOpToI32(I))
return true;
if (UseMul24Intrin && replaceMulWithMul24(I))
@@ -1655,11 +1657,11 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
};
WidenLoad->setMetadata(LLVMContext::MD_range,
- MDNode::get(Mod->getContext(), LowAndHigh));
+ MDNode::get(F.getContext(), LowAndHigh));
}
}
- int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
+ int TySize = DL.getTypeSizeInBits(I.getType());
Type *IntNTy = Builder.getIntNTy(TySize);
Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
@@ -1674,8 +1676,8 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
- UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
+ UA.isUniform(&I))
Changed |= promoteUniformOpToI32(I);
return Changed;
@@ -1688,8 +1690,8 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Value *CmpVal;
FCmpInst::Predicate Pred;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) {
- if (UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) {
+ if (UA.isUniform(&I))
return promoteUniformOpToI32(I);
return false;
}
@@ -1722,7 +1724,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
return true;
}
@@ -1947,7 +1949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
FixedVectorType *FVT = dyn_cast<FixedVectorType>(I.getType());
if (!FVT || FVT->getNumElements() == 1 ||
- DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
+ DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
return false;
if (!ForceBreakLargePHIs && !canBreakPHINode(I))
@@ -1960,7 +1962,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
unsigned Idx = 0;
// For 8/16 bits type, don't scalarize fully but break it up into as many
// 32-bit slices as we can, and scalarize the tail.
- const unsigned EltSize = DL->getTypeSizeInBits(EltTy);
+ const unsigned EltSize = DL.getTypeSizeInBits(EltTy);
const unsigned NumElts = FVT->getNumElements();
if (EltSize == 8 || EltSize == 16) {
const unsigned SubVecSize = (32 / EltSize);
@@ -2079,7 +2081,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
SmallVector<const Value *, 4> WorkList;
getUnderlyingObjects(I.getOperand(0), WorkList);
if (!all_of(WorkList, [&](const Value *V) {
- return isPtrKnownNeverNull(V, *DL, *TM, SrcAS);
+ return isPtrKnownNeverNull(V, DL, TM, SrcAS);
}))
return false;
@@ -2107,8 +2109,8 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
bool Changed = false;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+ UA.isUniform(&I))
Changed |= promoteUniformBitreverseToI32(I);
return Changed;
@@ -2120,7 +2122,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
/// If fract is a useful instruction for the subtarget. Does not account for the
/// nan handling; the instruction has a nan check on the input value.
Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
- if (ST->hasFractBug())
+ if (ST.hasFractBug())
return nullptr;
if (I.getIntrinsicID() != Intrinsic::minnum)
@@ -2177,7 +2179,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
// Match pattern for fract intrinsic in contexts where the nan check has been
// optimized out (and hope the knowledge the source can't be nan wasn't lost).
if (!I.hasNoNaNs() &&
- !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(*DL, TLInfo)))
+ !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(DL, TLI)))
return false;
IRBuilder<> Builder(&I);
@@ -2189,7 +2191,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
return true;
}
@@ -2201,7 +2203,7 @@ static bool isOneOrNegOne(const Value *Val) {
// Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way.
bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
Type *Ty = Sqrt.getType()->getScalarType();
- if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST->has16BitInsts()))
+ if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST.has16BitInsts()))
return false;
const FPMathOperator *FPOp = cast<const FPMathOperator>(&Sqrt);
@@ -2257,14 +2259,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
return true;
}
-bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
- Impl.Mod = &M;
- Impl.DL = &Impl.Mod->getDataLayout();
- Impl.SqrtF32 = nullptr;
- Impl.LdexpF32 = nullptr;
- return false;
-}
-
bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -2274,36 +2268,26 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
return false;
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
- Impl.TM = &TM;
- Impl.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
- Impl.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- Impl.UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
- Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F, *Impl.ST);
- Impl.HasFP32DenormalFlush =
- Mode.FP32Denormals == DenormalMode::getPreserveSign();
- return Impl.run(F);
+ const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ const UniformityInfo &UA =
+ getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ return AMDGPUCodeGenPrepareImpl(F, TM, TLI, AC, DT, UA).run();
}
PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
FunctionAnalysisManager &FAM) {
- AMDGPUCodeGenPrepareImpl Impl;
- Impl.Mod = F.getParent();
- Impl.DL = &Impl.Mod->getDataLayout();
- Impl.TM = static_cast<const AMDGPUTargetMachine *>(&TM);
- Impl.TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
- Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
- Impl.AC = &FAM.getResult<AssumptionAnalysis>(F);
- Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
- Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
- Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F, *Impl.ST);
- Impl.HasFP32DenormalFlush =
- Mode.FP32Denormals == DenormalMode::getPreserveSign();
- if (!Impl.run(F))
+ const AMDGPUTargetMachine &ATM = static_cast<const AMDGPUTargetMachine &>(TM);
+ const TargetLibraryInfo *TLI = &FAM.getResult<TargetLibraryAnalysis>(F);
+ AssumptionCache *AC = &FAM.getResult<AssumptionAnalysis>(F);
+ const DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ const UniformityInfo &UA = FAM.getResult<UniformityInfoAnalysis>(F);
+ AMDGPUCodeGenPrepareImpl Impl(F, ATM, TLI, AC, DT, UA);
+ if (!Impl.run())
return PreservedAnalyses::all();
PreservedAnalyses PA = PreservedAnalyses::none();
if (!Impl.FlowChanged)
More information about the llvm-commits
mailing list