[llvm] 9ad09b2 - [AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. (#118461)

Tue Dec 3 07:31:29 PST 2024

Author: Jay Foad
Date: 2024-12-03T15:31:25Z
New Revision: 9ad09b2930ef2e95bf8772c91f623881d1c14733

URL: https://github.com/llvm/llvm-project/commit/9ad09b2930ef2e95bf8772c91f623881d1c14733
DIFF: https://github.com/llvm/llvm-project/commit/9ad09b2930ef2e95bf8772c91f623881d1c14733.diff

LOG: [AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. (#118461)

Use references instead of pointers for most state, initialize it all in
the constructor, and common up some of the initialization between the
legacy and new pass manager paths.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 7257b53afe69d0..75e20c79301681 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -95,32 +95,45 @@ static cl::opt<bool> DisableFDivExpand(
   cl::ReallyHidden,
   cl::init(false));
 
+static bool hasUnsafeFPMath(const Function &F) {
+  return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+}
+
 class AMDGPUCodeGenPrepareImpl
     : public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
 public:
-  const GCNSubtarget *ST = nullptr;
-  const AMDGPUTargetMachine *TM = nullptr;
-  const TargetLibraryInfo *TLInfo = nullptr;
-  AssumptionCache *AC = nullptr;
-  DominatorTree *DT = nullptr;
-  UniformityInfo *UA = nullptr;
-  Module *Mod = nullptr;
-  const DataLayout *DL = nullptr;
-  bool HasUnsafeFPMath = false;
-  bool HasFP32DenormalFlush = false;
+  Function &F;
+  const GCNSubtarget &ST;
+  const AMDGPUTargetMachine &TM;
+  const TargetLibraryInfo *TLI;
+  AssumptionCache *AC;
+  const DominatorTree *DT;
+  const UniformityInfo &UA;
+  const DataLayout &DL;
+  const bool HasUnsafeFPMath;
+  const bool HasFP32DenormalFlush;
   bool FlowChanged = false;
   mutable Function *SqrtF32 = nullptr;
   mutable Function *LdexpF32 = nullptr;
 
   DenseMap<const PHINode *, bool> BreakPhiNodesCache;
 
+  AMDGPUCodeGenPrepareImpl(Function &F, const AMDGPUTargetMachine &TM,
+                           const TargetLibraryInfo *TLI, AssumptionCache *AC,
+                           const DominatorTree *DT, const UniformityInfo &UA)
+      : F(F), ST(TM.getSubtarget<GCNSubtarget>(F)), TM(TM), TLI(TLI), AC(AC),
+        DT(DT), UA(UA), DL(F.getDataLayout()),
+        HasUnsafeFPMath(hasUnsafeFPMath(F)),
+        HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals ==
+                             DenormalMode::getPreserveSign()) {}
+
   Function *getSqrtF32() const {
     if (SqrtF32)
       return SqrtF32;
 
-    LLVMContext &Ctx = Mod->getContext();
-    SqrtF32 = Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::amdgcn_sqrt,
-                                                {Type::getFloatTy(Ctx)});
+    LLVMContext &Ctx = F.getContext();
+    SqrtF32 = Intrinsic::getOrInsertDeclaration(
+        F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
     return SqrtF32;
   }
 
@@ -128,9 +141,10 @@ class AMDGPUCodeGenPrepareImpl
     if (LdexpF32)
       return LdexpF32;
 
-    LLVMContext &Ctx = Mod->getContext();
+    LLVMContext &Ctx = F.getContext();
     LdexpF32 = Intrinsic::getOrInsertDeclaration(
-        Mod, Intrinsic::ldexp, {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
+        F.getParent(), Intrinsic::ldexp,
+        {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
     return LdexpF32;
   }
 
@@ -166,8 +180,7 @@ class AMDGPUCodeGenPrepareImpl
   /// Wrapper to pass all the arguments to computeKnownFPClass
   KnownFPClass computeKnownFPClass(const Value *V, FPClassTest Interested,
                                    const Instruction *CtxI) const {
-    return llvm::computeKnownFPClass(V, *DL, Interested, 0, TLInfo, AC, CtxI,
-                                     DT);
+    return llvm::computeKnownFPClass(V, DL, Interested, 0, TLI, AC, CtxI, DT);
   }
 
   bool canIgnoreDenormalInput(const Value *V, const Instruction *CtxI) const {
@@ -317,13 +330,10 @@ class AMDGPUCodeGenPrepareImpl
   bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
   bool visitMinNum(IntrinsicInst &I);
   bool visitSqrt(IntrinsicInst &I);
-  bool run(Function &F);
+  bool run();
 };
 
 class AMDGPUCodeGenPrepare : public FunctionPass {
-private:
-  AMDGPUCodeGenPrepareImpl Impl;
-
 public:
   static char ID;
   AMDGPUCodeGenPrepare() : FunctionPass(ID) {
@@ -339,13 +349,12 @@ class AMDGPUCodeGenPrepare : public FunctionPass {
       AU.setPreservesAll();
   }
   bool runOnFunction(Function &F) override;
-  bool doInitialization(Module &M) override;
   StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
 };
 
 } // end anonymous namespace
 
-bool AMDGPUCodeGenPrepareImpl::run(Function &F) {
+bool AMDGPUCodeGenPrepareImpl::run() {
   BreakPhiNodesCache.clear();
   bool MadeChange = false;
 
@@ -411,7 +420,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
   if (const VectorType *VT = dyn_cast<VectorType>(T)) {
     // TODO: The set of packed operations is more limited, so may want to
     // promote some anyway.
-    if (ST->hasVOP3PInsts())
+    if (ST.hasVOP3PInsts())
       return false;
 
     return needsPromotionToI32(VT->getElementType());
@@ -422,7 +431,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
 
 bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const {
   return Ty->isFloatTy() || Ty->isDoubleTy() ||
-         (Ty->isHalfTy() && ST->has16BitInsts());
+         (Ty->isHalfTy() && ST.has16BitInsts());
 }
 
 // Return true if the op promoted to i32 should have nsw set.
@@ -455,11 +464,10 @@ static bool promotedOpIsNUW(const Instruction &I) {
 
 bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
   Type *Ty = I.getType();
-  const DataLayout &DL = Mod->getDataLayout();
   int TySize = DL.getTypeSizeInBits(Ty);
   Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
 
-  return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I);
+  return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I);
 }
 
 bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -591,11 +599,11 @@ bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
 }
 
 unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const {
-  return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits();
+  return computeKnownBits(Op, DL, 0, AC).countMaxActiveBits();
 }
 
 unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const {
-  return ComputeMaxSignificantBits(Op, *DL, 0, AC);
+  return ComputeMaxSignificantBits(Op, DL, 0, AC);
 }
 
 static void extractValues(IRBuilder<> &Builder,
@@ -631,11 +639,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
 
   Type *Ty = I.getType();
   unsigned Size = Ty->getScalarSizeInBits();
-  if (Size <= 16 && ST->has16BitInsts())
+  if (Size <= 16 && ST.has16BitInsts())
     return false;
 
   // Prefer scalar if this could be s_mul_i32
-  if (UA->isUniform(&I))
+  if (UA.isUniform(&I))
     return false;
 
   Value *LHS = I.getOperand(0);
@@ -646,11 +654,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
   unsigned LHSBits = 0, RHSBits = 0;
   bool IsSigned = false;
 
-  if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
+  if (ST.hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
       (RHSBits = numBitsUnsigned(RHS)) <= 24) {
     IsSigned = false;
 
-  } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
+  } else if (ST.hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
              (RHSBits = numBitsSigned(RHS)) <= 24) {
     IsSigned = true;
 
@@ -730,21 +738,21 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const {
   if (CastOp) {
     if (!CastOp->hasOneUse())
       return false;
-    CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
-    CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
+    CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), DL);
+    CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), DL);
   }
 
   // TODO: Handle special 0/-1 cases DAG combine does, although we only really
   // need to handle divisions here.
-  Constant *FoldedT = SelOpNo ?
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) :
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL);
+  Constant *FoldedT =
+      SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, DL)
+              : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, DL);
   if (!FoldedT || isa<ConstantExpr>(FoldedT))
     return false;
 
-  Constant *FoldedF = SelOpNo ?
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) :
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL);
+  Constant *FoldedF =
+      SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, DL)
+              : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, DL);
   if (!FoldedF || isa<ConstantExpr>(FoldedF))
     return false;
 
@@ -777,7 +785,7 @@ AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
   // result? It's unspecified by the spec.
 
   Value *FrexpExp =
-      ST->hasFractBug()
+      ST.hasFractBug()
           ? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
                                     {Builder.getInt32Ty(), Ty}, Src)
           : Builder.CreateExtractValue(Frexp, {1});
@@ -815,7 +823,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
   // If we have have to work around the fract/frexp bug, we're worse off than
   // using the fdiv.fast expansion. The full safe expansion is faster if we have
   // fast FMA.
-  if (HasFP32DenormalFlush && ST->hasFractBug() && !ST->hasFastFMAF32() &&
+  if (HasFP32DenormalFlush && ST.hasFractBug() && !ST.hasFastFMAF32() &&
       (!FMF.noNaNs() || !FMF.noInfs()))
     return nullptr;
 
@@ -1157,17 +1165,12 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
   if (NewVal) {
     FDiv.replaceAllUsesWith(NewVal);
     NewVal->takeName(&FDiv);
-    RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLInfo);
+    RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI);
   }
 
   return true;
 }
 
-static bool hasUnsafeFPMath(const Function &F) {
-  Attribute Attr = F.getFnAttribute("unsafe-fp-math");
-  return Attr.getValueAsBool();
-}
-
 static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
                                           Value *LHS, Value *RHS) {
   Type *I32Ty = Builder.getInt32Ty();
@@ -1192,7 +1195,6 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
 int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
                                             Value *Den, unsigned AtLeast,
                                             bool IsSigned) const {
-  const DataLayout &DL = Mod->getDataLayout();
   unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
   if (LHSSignBits < AtLeast)
     return -1;
@@ -1271,7 +1273,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
   Value *FQNeg = Builder.CreateFNeg(FQ);
 
   // float fr = mad(fqneg, fb, fa);
-  auto FMAD = !ST->hasMadMacF32Insts()
+  auto FMAD = !ST.hasMadMacF32Insts()
                   ? Intrinsic::fma
                   : (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
   Value *FR = Builder.CreateIntrinsic(FMAD,
@@ -1338,7 +1340,7 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
     // If there's no wider mulhi, there's only a better expansion for powers of
     // two.
     // TODO: Should really know for each vector element.
-    if (isKnownToBeAPowerOfTwo(C, *DL, true, 0, AC, &I, DT))
+    if (isKnownToBeAPowerOfTwo(C, DL, true, 0, AC, &I, DT))
       return true;
 
     return false;
@@ -1348,8 +1350,8 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
     // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
     if (BinOpDen->getOpcode() == Instruction::Shl &&
         isa<Constant>(BinOpDen->getOperand(0)) &&
-        isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true,
-                               0, AC, &I, DT)) {
+        isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), DL, true, 0, AC, &I,
+                               DT)) {
       return true;
     }
   }
@@ -1357,9 +1359,9 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
   return false;
 }
 
-static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
+static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL) {
   // Check whether the sign can be determined statically.
-  KnownBits Known = computeKnownBits(V, *DL);
+  KnownBits Known = computeKnownBits(V, DL);
   if (Known.isNegative())
     return Constant::getAllOnesValue(V->getType());
   if (Known.isNonNegative())
@@ -1542,8 +1544,8 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
   if (foldBinOpIntoSelect(I))
     return true;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
-      UA->isUniform(&I) && promoteUniformOpToI32(I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+      UA.isUniform(&I) && promoteUniformOpToI32(I))
     return true;
 
   if (UseMul24Intrin && replaceMulWithMul24(I))
@@ -1655,11 +1657,11 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
         };
 
         WidenLoad->setMetadata(LLVMContext::MD_range,
-                               MDNode::get(Mod->getContext(), LowAndHigh));
+                               MDNode::get(F.getContext(), LowAndHigh));
       }
     }
 
-    int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
+    int TySize = DL.getTypeSizeInBits(I.getType());
     Type *IntNTy = Builder.getIntNTy(TySize);
     Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
     Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
@@ -1674,8 +1676,8 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
 bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) {
   bool Changed = false;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
-      UA->isUniform(&I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
+      UA.isUniform(&I))
     Changed |= promoteUniformOpToI32(I);
 
   return Changed;
@@ -1688,8 +1690,8 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
   Value *CmpVal;
   FCmpInst::Predicate Pred;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) {
-    if (UA->isUniform(&I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) {
+    if (UA.isUniform(&I))
       return promoteUniformOpToI32(I);
     return false;
   }
@@ -1722,7 +1724,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
 
   Fract->takeName(&I);
   I.replaceAllUsesWith(Fract);
-  RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+  RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
   return true;
 }
 
@@ -1947,7 +1949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
 
   FixedVectorType *FVT = dyn_cast<FixedVectorType>(I.getType());
   if (!FVT || FVT->getNumElements() == 1 ||
-      DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
+      DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
     return false;
 
   if (!ForceBreakLargePHIs && !canBreakPHINode(I))
@@ -1960,7 +1962,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
     unsigned Idx = 0;
     // For 8/16 bits type, don't scalarize fully but break it up into as many
     // 32-bit slices as we can, and scalarize the tail.
-    const unsigned EltSize = DL->getTypeSizeInBits(EltTy);
+    const unsigned EltSize = DL.getTypeSizeInBits(EltTy);
     const unsigned NumElts = FVT->getNumElements();
     if (EltSize == 8 || EltSize == 16) {
       const unsigned SubVecSize = (32 / EltSize);
@@ -2079,7 +2081,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
   SmallVector<const Value *, 4> WorkList;
   getUnderlyingObjects(I.getOperand(0), WorkList);
   if (!all_of(WorkList, [&](const Value *V) {
-        return isPtrKnownNeverNull(V, *DL, *TM, SrcAS);
+        return isPtrKnownNeverNull(V, DL, TM, SrcAS);
       }))
     return false;
 
@@ -2107,8 +2109,8 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
 bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
   bool Changed = false;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
-      UA->isUniform(&I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+      UA.isUniform(&I))
     Changed |= promoteUniformBitreverseToI32(I);
 
   return Changed;
@@ -2120,7 +2122,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
 /// If fract is a useful instruction for the subtarget. Does not account for the
 /// nan handling; the instruction has a nan check on the input value.
 Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
-  if (ST->hasFractBug())
+  if (ST.hasFractBug())
     return nullptr;
 
   if (I.getIntrinsicID() != Intrinsic::minnum)
@@ -2177,7 +2179,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
   // Match pattern for fract intrinsic in contexts where the nan check has been
   // optimized out (and hope the knowledge the source can't be nan wasn't lost).
   if (!I.hasNoNaNs() &&
-      !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(*DL, TLInfo)))
+      !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(DL, TLI)))
     return false;
 
   IRBuilder<> Builder(&I);
@@ -2189,7 +2191,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
   Fract->takeName(&I);
   I.replaceAllUsesWith(Fract);
 
-  RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+  RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
   return true;
 }
 
@@ -2201,7 +2203,7 @@ static bool isOneOrNegOne(const Value *Val) {
 // Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way.
 bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
   Type *Ty = Sqrt.getType()->getScalarType();
-  if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST->has16BitInsts()))
+  if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST.has16BitInsts()))
     return false;
 
   const FPMathOperator *FPOp = cast<const FPMathOperator>(&Sqrt);
@@ -2257,14 +2259,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
   return true;
 }
 
-bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
-  Impl.Mod = &M;
-  Impl.DL = &Impl.Mod->getDataLayout();
-  Impl.SqrtF32 = nullptr;
-  Impl.LdexpF32 = nullptr;
-  return false;
-}
-
 bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
@@ -2274,36 +2268,26 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
     return false;
 
   const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
-  Impl.TM = &TM;
-  Impl.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-  Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
-  Impl.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  Impl.UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+  const TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  AssumptionCache *AC =
+      &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
-  SIModeRegisterDefaults Mode(F, *Impl.ST);
-  Impl.HasFP32DenormalFlush =
-      Mode.FP32Denormals == DenormalMode::getPreserveSign();
-  return Impl.run(F);
+  const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  const UniformityInfo &UA =
+      getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+  return AMDGPUCodeGenPrepareImpl(F, TM, TLI, AC, DT, UA).run();
 }
 
 PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
                                                 FunctionAnalysisManager &FAM) {
-  AMDGPUCodeGenPrepareImpl Impl;
-  Impl.Mod = F.getParent();
-  Impl.DL = &Impl.Mod->getDataLayout();
-  Impl.TM = static_cast<const AMDGPUTargetMachine *>(&TM);
-  Impl.TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
-  Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
-  Impl.AC = &FAM.getResult<AssumptionAnalysis>(F);
-  Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
-  Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
-  Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
-  SIModeRegisterDefaults Mode(F, *Impl.ST);
-  Impl.HasFP32DenormalFlush =
-      Mode.FP32Denormals == DenormalMode::getPreserveSign();
-  if (!Impl.run(F))
+  const AMDGPUTargetMachine &ATM = static_cast<const AMDGPUTargetMachine &>(TM);
+  const TargetLibraryInfo *TLI = &FAM.getResult<TargetLibraryAnalysis>(F);
+  AssumptionCache *AC = &FAM.getResult<AssumptionAnalysis>(F);
+  const DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+  const UniformityInfo &UA = FAM.getResult<UniformityInfoAnalysis>(F);
+  AMDGPUCodeGenPrepareImpl Impl(F, ATM, TLI, AC, DT, UA);
+  if (!Impl.run())
     return PreservedAnalyses::all();
   PreservedAnalyses PA = PreservedAnalyses::none();
   if (!Impl.FlowChanged)