[llvm] [AMDGPU] Add IR LiveReg type-based optimization (PR #66838)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 12:22:37 PDT 2024
================
@@ -102,14 +184,261 @@ bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ // "Optimize" the virtual regs that cross basic block boundaries. When
+ // building the SelectionDAG, vectors of illegal types that cross basic blocks
+ // will be scalarized and widened, with each scalar living in its
+ // own physical register. To work around this, this optimization converts the
+ // vectors to equivalent vectors of legal type (which are converted back
+ // before uses in subsequent blocks), to pack the bits into fewer physical
+ // registers (used in CopyToReg/CopyFromReg pairs).
+ LiveRegOptimizer LRO(Mod);
+
bool Changed = false;
for (auto &BB : F)
- for (Instruction &I : llvm::make_early_inc_range(BB))
+ for (Instruction &I : make_early_inc_range(BB)) {
Changed |= visit(I);
+ if (!LRO.shouldReplaceUses(I))
+ continue;
+ Changed |= LRO.replaceUses(I);
+ }
+ Changed |= LRO.replacePHIs();
return Changed;
}
+bool LiveRegOptimizer::replaceUses(Instruction &I) {
+ bool MadeChange = false;
+
+ struct ConvertUseInfo {
+ Instruction *Converted;
+ SmallVector<Instruction *, 4> Users;
+ };
+ DenseMap<BasicBlock *, ConvertUseInfo> InsertedConversionMap;
+
+ ConversionCandidateInfo FromCCI(&I, I.getParent(),
+ std::next(I.getIterator()));
+ FromCCI.setNewType(getCompatibleType(FromCCI.getLiveRegDef()));
+ for (auto IUser = I.user_begin(); IUser != I.user_end(); IUser++) {
+
+ Instruction *UserInst = cast<Instruction>(*IUser);
+ if (UserInst->getParent() != I.getParent() || isa<PHINode>(UserInst)) {
+ LLVM_DEBUG(dbgs() << *UserInst << "\n\tUses "
+ << *FromCCI.getOriginalType()
+ << " from previous block. Needs conversion\n");
+ convertToOptType(FromCCI);
+ if (!FromCCI.hasConverted())
+ continue;
+ // If it is a PHI node, just create and collect the new operand. We can
+ // only replace the PHI node once we have converted all the operands
+ if (auto PHI = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); Idx++) {
+ Value *IncVal = PHI->getIncomingValue(Idx);
+ if (&I == dyn_cast<Instruction>(IncVal)) {
+ BasicBlock *IncBlock = PHI->getIncomingBlock(Idx);
+ auto PHIOps =
+ find_if(PHIUpdater, [&UserInst](PHIUpdateInfo &Entry) {
+ return Entry.first == UserInst;
+ });
+
+ if (PHIOps == PHIUpdater.end())
+ PHIUpdater.push_back(
+ {UserInst, {{FromCCI.getConverted(), IncBlock}}});
+ else
+ PHIOps->second.push_back({FromCCI.getConverted(), IncBlock});
+
+ break;
+ }
+ }
+ continue;
+ }
+
+ // Do not create multiple conversion sequences if there are multiple
+ // uses in the same block
+ if (InsertedConversionMap.contains(UserInst->getParent())) {
+ InsertedConversionMap[UserInst->getParent()].Users.push_back(UserInst);
+ LLVM_DEBUG(dbgs() << "\tUser already has access to converted def\n");
+ continue;
+ }
+
+ ConversionCandidateInfo ToCCI(FromCCI.getConverted(), I.getType(),
+ UserInst->getParent(),
+
+ UserInst->getParent()->getFirstNonPHIIt());
+ convertFromOptType(ToCCI);
+ assert(ToCCI.hasConverted());
+ InsertedConversionMap[UserInst->getParent()] = {ToCCI.getConverted(),
+ {UserInst}};
+ }
+ }
+
+ // Replace uses of with in a separate loop that is not dependent upon the
+ // state of the uses
+ for (auto &Entry : InsertedConversionMap) {
+ for (auto &UserInst : Entry.second.Users) {
+ LLVM_DEBUG(dbgs() << *UserInst
+ << "\n\tNow uses: " << *Entry.second.Converted << '\n');
+ UserInst->replaceUsesOfWith(&I, Entry.second.Converted);
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+bool LiveRegOptimizer::replacePHIs() {
+ bool MadeChange = false;
+ for (auto Ele : PHIUpdater) {
+ auto [ThePHIInst, NewPHINodeOps] = Ele;
+ LLVM_DEBUG(dbgs() << "Attempting to replace: " << *ThePHIInst << '\n');
+ // If we have conveted all the required operands, then do the replacement
+ if (cast<PHINode>(ThePHIInst)->getNumIncomingValues() ==
+ NewPHINodeOps.size()) {
+ IRBuilder<> Builder(Ele.first);
+ auto NPHI = Builder.CreatePHI(NewPHINodeOps[0].first->getType(),
+ NewPHINodeOps.size());
+ for (auto IncVals : NewPHINodeOps) {
+ NPHI->addIncoming(IncVals.first, IncVals.second);
+ LLVM_DEBUG(dbgs() << " Using: " << *IncVals.first
+ << " For: " << IncVals.second->getName() << '\n');
+ }
+ LLVM_DEBUG(dbgs() << "Sucessfully replaced with " << *NPHI << '\n');
+ ConversionCandidateInfo ToCCI(
+ NPHI, ThePHIInst->getType(), ThePHIInst->getParent(),
+
+ ThePHIInst->getParent()->getFirstNonPHIIt());
+ convertFromOptType(ToCCI);
+ assert(ToCCI.hasConverted());
+ Ele.first->replaceAllUsesWith(ToCCI.getConverted());
+ // The old PHI is no longer used
+ ThePHIInst->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+Type *LiveRegOptimizer::getCompatibleType(Instruction *InstToConvert) {
+ Type *OriginalType = InstToConvert->getType();
+ assert(OriginalType->getScalarSizeInBits() <=
+ ConvertToScalar->getScalarSizeInBits());
+ VectorType *VTy = dyn_cast<VectorType>(OriginalType);
----------------
arsenm wrote:
Probably should just use FixedVectorType, you assumed fixed below
https://github.com/llvm/llvm-project/pull/66838
More information about the llvm-commits
mailing list