[llvm] [Transforms] Vectorizing Mem2Reg Pass (PR #107688)
William Junda Huang via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 10 16:09:40 PDT 2024
================
@@ -1060,6 +1349,194 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
}
}
+bool VectorizedMem2Reg::GatherAlloca(AllocaInst *AI, const AllocaInfo &Info) {
+ assert(Allocas.size() < MaxAllocaNum && "Allocas vector is full.");
+ // Add new alloca to the current batch.
+ size_t Index = Allocas.size();
+ Allocas.push_back(AI);
+
+ // Populate DEF states.
+ for (BasicBlock* Def : Info.DefiningBlocks) {
+ // We need to calculate IDF of every DEF block, adding them to the PQ here
+ // so that a BB is only added once at most.
+ if (BlockStates.get<DEF_STATE>(Def->getNumber()).none())
+ if (DomTreeNodeBase<BasicBlock> *Node = DT->getNode(Def))
+ PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())});
+ BlockStates.get<DEF_STATE>(Def->getNumber()).set(Index);
+ }
+
+ // Initialize Worklist to compute ALIVE state. Find all uses of the value
+ // where it is defined in another block and add them to Worklist.
+ for (BasicBlock *Use : Info.UsingBlocks) {
+ BBNumberTy BN = Use->getNumber();
+
+ // If the use block is not the def block, the use block is live-in. It is
+ // possible that a previous alloca lives in this block, so we should merge
+ // their UPDATE states.
+ if (!BlockStates.get<DEF_STATE>(BN)[Index]) {
+ PushWorkList(BN, AllocaState().set(Index));
+ continue;
+ }
+
+ // If use and def happens in the same block, check if the def occurs before
+ // the use, in this case the value is not live-in at this block.
+ for (BasicBlock::iterator I = Use->begin();; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI)
+ continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ break;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block. Add it to the worklist.
+ if (LI->getOperand(0) == AI) {
+ PushWorkList(BN, AllocaState().set(Index));
+ break;
+ }
+ }
+ }
+
+ return Allocas.size() == MaxAllocaNum;
+}
+
+void VectorizedMem2Reg::Calculate() {
+ // @TODO: Assign BB number in a way such that the BB deepest in the CFG has
+ // the largest number, and traverse it first using a priority queue. This
+ // allows faster convergence to the fixed-point.
+
+ // Compute ALIVE state for every block.
+ // Now Worklist is initialized with blocks containing any live-in allocas. We
+ // recursively add their predecessors to Worklist until we find the full
+ // region where the value is alive. Whenever a block's ALIVE state is updated,
+ // we need to check if the state value is actually modified, if so, we need
+ // to iterate its predecessors again to propagate the new state until reaching
+ // a fixed point.
+ while (!Worklist.empty()) {
+ auto [BN, State] = PopWorkList();
+
+ // Update the ALIVE state of this block. If the state remains unchanged, we
+ // have reached a fixed point, there is no more new liveness info to be
+ // propagated to its predecessors.
+ AllocaState OldState = BlockStates.get<ALIVE_STATE>(BN);
+ AllocaState NewState = (BlockStates.get<ALIVE_STATE>(BN) |= State);
+ if (NewState == OldState)
+ continue;
+
+ // If a fixed point is not reached, this is because either block BN is
+ // visited for the first time, or a loop in the CFG brings new liveness info
+ // back to this block. Either case, we add its predecessors to the worklist.
+ for (BBNumberTy Pred : Predecessors[BN]) {
+ // The value is not ALIVE in a predecessor if it contains a DEF, so we
+ // need to exclude such values, and only find those values not defined in
+ // this block.
+ AllocaState UpdateState = NewState & ~BlockStates.get<DEF_STATE>(Pred);
+ // Only add to Worklist if there is any value ALIVE at Pred.
+ if (UpdateState.any())
+ PushWorkList(Pred, UpdateState);
+ }
+ }
+
+ // Initialize UPDATE states of blocks in PQ to maintain the invaraince. We
+ // calculate IDF of every DEF, so the initial UPDATE state is DEF state.
+ for (auto &Node : GetPQContainer()) {
+ unsigned BN = Node.first->getBlock()->getNumber();
+ BlockStates.get<UPDATE_STATE>(BN) = BlockStates.get<DEF_STATE>(BN);
+ }
+
+ // Compute IDF for every block containing alloca. Visiting blocks from the
+ // largest to the smallest DT level number.
+ while (!PQ.empty()) {
+ // RootState is the values available at Root, which will be propagated to
+ // the successors of its dominatees per the algorithm of IDF.
+ auto [Root, RootState] = PopPQ();
+ unsigned RootLevel = Root->getLevel();
+ BBNumberTy RootBN = Root->getBlock()->getNumber();
+
+ // Perform one iteration of dominance frontier computation on all blocks
+ // dominated by root. Here Worklist is not associated with UPDATE state
+ // because visited nodes are updated with the same RootState instead.
+ Worklist.push_back(RootBN);
+ while (!Worklist.empty()) {
+ unsigned BN = Worklist.back();
+ Worklist.pop_back();
+
+ for (BBNumberTy Succ : Successors[BN]) {
+ auto SuccNode = DT->getNode(Succ);
+ unsigned SuccLevel = SuccNode->getLevel();
+
+ // Successor node Succ with higher level in DT must be dominated by
+ // current node BN, so PHI will not be placed in it.
+ if (SuccLevel > RootLevel)
+ continue;
+
+ // Update IDF state of Succ by merging its previous state with available
+ // values from Root. Values no longer alive need not to be propagated,
+ // so that the algorithm converges faster.
+ AllocaState AliveState = BlockStates.get<ALIVE_STATE>(Succ);
+ AllocaState OldState = BlockStates.get<IDF_STATE>(Succ);
+ AllocaState NewState =
+ (BlockStates.get<IDF_STATE>(Succ) |= (RootState & AliveState));
+ // If IDF state is unchanged, we reached a fixed point, and there will
+ // be no more new value to propagate. This includes the case that no
+ // value from Root is alive at Succ ((RootState & AliveState) == 0).
+ if (NewState == OldState)
+ continue;
+
+ // We always filter UPDATE state with ALIVE state, so it is an invariant
+ // that IDF values are a subset of ALIVE values.
+ assert((AliveState | OldState) == AliveState);
+ assert((AliveState | NewState) == AliveState);
+
+ // Any newly set bit in IDF state represents inserted PHI, add it to the
+ // output.
+ AllocaState Inserted = NewState ^ OldState;
+ do {
+ size_t Index = 0;
+ if constexpr (MaxAllocaNum <= sizeof(unsigned long long) * CHAR_BIT) {
+ Index = llvm::countr_zero(Inserted.to_ullong());
+ } else {
+ while (!Inserted.test(Index))
+ ++Index;
+ }
+ PHIBlocks[Index].push_back(Succ);
+ Inserted.reset(Index);
+ } while (Inserted.any());
+
+ // If any new PHI is inserted at Succ, we need to iterate it too since
+ // it will propagate the PHI to blocks it does not dominate. An existing
+ // value is killed by DEF so the UPDATE state should exclude it.
+ AllocaState UpdateState = NewState & ~BlockStates.get<DEF_STATE>(Succ);
+ if (UpdateState.any())
+ PushPQ(SuccNode, UpdateState);
+ }
+
+ // Visit every node in DT subtree.
+ for (auto DomChild : *(DT->getNode(BN))) {
+ BBNumberTy DomChildBN = DomChild->getBlock()->getNumber();
+ // Since any value available at the dominator is available at the child
+ // node, we merge the dominator's IDF state into it. If the child's IDF
+ // state is unchanged, we reached a fixed point, so we do not need to
+ // visit it. Values no longer alive need not to be propagated, so that
+ // the algorithm converges faster.
+ AllocaState OldState = BlockStates.get<IDF_STATE>(DomChildBN);
+ AllocaState NewState = BlockStates.get<IDF_STATE>(DomChildBN) |=
+ (RootState & BlockStates.get<ALIVE_STATE>(DomChildBN));
+ // Since DT is a tree, there will be no dups in Worklist.
+ if (OldState != NewState)
+ Worklist.push_back(DomChildBN);
+ }
+ }
+ }
+
+ // Order inserted PHI nodes in a deterministic way.
+ for (size_t I = 0; I < Allocas.size(); ++I)
+ llvm::sort(PHIBlocks[I]);
----------------
huangjd wrote:
This is to be consistent with whatever sort used by the scalar algorithm
https://github.com/llvm/llvm-project/pull/107688
More information about the llvm-commits
mailing list