[llvm] [X86][SimplifyCFG] Support hoisting load/store with conditional faulting (PR #96878)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 3 01:40:46 PDT 2024


================
@@ -2978,6 +2987,218 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
   return BIEndProb < Likely;
 }
 
+/// Hoist load/store instructions from the conditional successor blocks up into
+/// the block.
+///
+/// \param BI conditional branch instruction for the successor blocks.
+/// \param ThenBB the successor block to hoist from. If NULL, both of the
+///        successors are considered.
+///
+/// We are looking for code like the following:
+/// \code
+///   BB:
+///     ...
+///     %cond = icmp ult %x, %y
+///     br i1 %cond, label %TrueBB, label %FalseBB
+///   FalseBB:
+///     store i32 1, ptr %q, align 4
+///     ...
+///   TrueBB:
+///     %0 = load i32, ptr %b, align 4
+///     store i32 %0, ptr %p, align 4
+///     ...
+/// \endcode
+//
+/// We are going to transform this into:
+///
+/// \code
+///   BB:
+///     ...
+///     %cond = icmp ult %x, %y
+///     %0 = cload i32, ptr %b, %cond
+///     cstore i32 %0, ptr %p, %cond
+///     cstore i32 1, ptr %q, ~%cond
+///     br i1 %cond, label %TrueBB, label %FalseBB
+///   FalseBB:
+///     ...
+///   TrueBB:
+///     ...
+/// \endcode
+///
+/// where cload/cstore is represented by intrinsic like llvm.masked.load/store,
+/// e.g.
+///
+/// \code
+///   %vcond = bitcast i1 %cond to <1 x i1>
+///   %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
+///                         (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
+///   %0 = bitcast <1 x i32> %v0 to i32
+///   call void @llvm.masked.store.v1i32.p0
+//                          (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
+///   %cond.not = xor i1 %cond, true
+///   %vcond.not = bitcast i1 %cond.not to <1 x i>
+///   call void @llvm.masked.store.v1i32.p0
+///              (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
+/// \endcode
+///
+/// \returns true if any load/store is hosited.
+///
+/// Note that this tranform should be run
+/// * before SpeculativelyExecuteBB so that the latter can have more chance.
+/// * after hoistCommonCodeFromSuccessors to ensure unconditional loads/stores
+///   are handled first.
+bool SimplifyCFGOpt::hoistLoadStoreWithCondFaultingFromSuccessors(
+    BranchInst *BI, BasicBlock *ThenBB) {
+  if (!HoistLoadsStoresWithCondFaulting ||
+      !Options.HoistLoadsStoresWithCondFaulting ||
+      !TTI.hasConditionalLoadStoreForType())
+    return false;
+
+  if (!BI || !BI->isConditional())
+    return false;
+
+  if (!isProfitableToSpeculate(BI, ThenBB != BI->getSuccessor(0), TTI))
+    return false;
+
+  SmallVector<BasicBlock *, 2> Successors;
+  if (ThenBB)
+    Successors.push_back(ThenBB);
+  else
+    Successors.append({BI->getSuccessor(0), BI->getSuccessor(1)});
+
+  // If either of the blocks has it's address taken, then we can't do this fold,
+  // because the code we'd hoist would no longer run when we jump into the block
+  // by it's address.
+  for (auto *Succ : Successors)
+    if (Succ->hasAddressTaken())
+      return false;
+
+  // Not use isa<AllocaInst>(getUnderlyingObject(I.getOperand(0)) to avoid
+  // checking all intermediate operands dominate the branch.
+  auto IsLoadFromAlloca = [](const Instruction &I) {
+    return isa<LoadInst>(I) && isa<AllocaInst>((I.getOperand(0)));
+  };
+
+  // Collect hoisted loads/stores.
+  SmallSetVector<Instruction *, 4> HoistedInsts;
+  // Not hoist load/store if
+  // 1. target does not have corresponding conditional faulting load/store.
+  // 2. it's volatile or atomic.
+  // 3. there is a load/store that can not be hoisted in the same bb.
+  // 4. there is a non-load/store that's not safe to speculatively execute
+  //    in the same bb.
+  // 5. any operand of it does not dominate the branch.
+  // 6. it's a store and a memory read is skipped.
+  auto HoistInstsInBB = [&](BasicBlock *BB) {
+    bool SkipMemoryRead = false;
+    // A more efficient way to check domination. An operand dominates the
+    // BranchInst if
+    // 1. it's not defined in the same bb as the instruction.
+    // 2. it's to be hoisted.
+    //
+    // b/c BB is only predecessor and BranchInst does not define any value.
+    auto OpsDominatesBranch = [&](Instruction &I) {
+      return llvm::all_of(I.operands(), [&](Value *Op) {
+        if (auto *J = dyn_cast<Instruction>(Op)) {
+          if (HoistedInsts.contains(J))
+            return true;
+          if (J->getParent() == I.getParent())
+            return false;
+        }
+        return true;
+      });
+    };
+    for (auto &I : *BB) {
+      auto *LI = dyn_cast<LoadInst>(&I);
+      auto *SI = dyn_cast<StoreInst>(&I);
+      if (LI || SI) {
+        bool IsSimple = (LI && LI->isSimple()) || (SI && SI->isSimple());
+        if (!IsSimple || !OpsDominatesBranch(I))
+          return false;
+        auto *Type = getLoadStoreType(&I);
+        // a load from alloca is always safe.
+        if (!IsLoadFromAlloca(I) && !TTI.hasConditionalLoadStoreForType(Type))
+          return false;
+        // Conservative aliasing check.
+        if (SI && SkipMemoryRead)
+          return false;
+        HoistedInsts.insert(&I);
+      } else if (!I.isTerminator() && !isSafeToSpeculativelyExecute(&I))
+        return false;
+      else if (I.mayReadFromMemory())
+        SkipMemoryRead = true;
+    }
+    return true;
+  };
+
+  for (auto *Succ : Successors)
+    if (!HoistInstsInBB(Succ))
+      return false;
+
+  if (HoistedInsts.empty())
+    return false;
+
+  // Put newly added instructions before the BranchInst.
+  IRBuilder<> Builder(BI);
+  auto &Context = BI->getParent()->getContext();
+  auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
+  auto *Cond = BI->getOperand(0);
+  auto *VCond = Builder.CreateBitCast(Cond, VCondTy);
+  Value *VCondNot = nullptr;
+  for (auto *I : HoistedInsts) {
+    // Only need to move the position for load from alloca.
+    if (IsLoadFromAlloca(*I)) {
+      I->moveBefore(BI);
+      continue;
+    }
+
+    bool InvertCond = I->getParent() == BI->getSuccessor(1);
+    // Construct the inverted condition if need.
+    if (InvertCond && !VCondNot)
+      VCondNot = Builder.CreateBitCast(
+          Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
+
+    auto *Mask = InvertCond ? VCondNot : VCond;
+    auto *Op0 = I->getOperand(0);
+    if (auto *LI = dyn_cast<LoadInst>(I)) {
+      // Load
+      auto *Ty = I->getType();
+      // NOTE: Now we assume conditional faulting load/store is supported for
+      // scalar only when creating new instructions, but it's easy to extend it
+      // for vector types in the future.
+      assert(!Ty->isVectorTy() && "not implemented");
+      auto *V0 = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1), Op0,
+                                          LI->getAlign(), Mask);
+      auto *S0 = Builder.CreateBitCast(V0, Ty);
+      V0->copyMetadata(*I);
+      I->replaceAllUsesWith(S0);
+    } else {
+      // Store
+      assert(!Op0->getType()->isVectorTy() && "not implemented");
+      auto *StoredVal =
+          Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+      auto *VStore = Builder.CreateMaskedStore(
+          StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
+      VStore->copyMetadata(*I);
+      // FIXME: DIAssignID is not supported for masked store yet.
+      // (Verifier::visitDIAssignIDMetadata)
+      at::deleteAssignmentMarkers(VStore);
+      VStore->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
+        return Node->getMetadataID() == Metadata::DIAssignIDKind;
+      });
+    }
+  }
+
+  // Erase the hoisted instrutions in reverse order to avoid use-w/o-define
+  // error.
+  std::for_each(HoistedInsts.rbegin(), HoistedInsts.rend(), [&](auto I) {
+    if (!IsLoadFromAlloca(*I))
+      I->eraseFromParent();
+  });
----------------
antoniofrighetto wrote:

```suggestion
  for (auto *Inst : reverse(HoistedInsts))
    if (!IsLoadFromAlloca(*Inst))
      Inst->eraseFromParent();
```

https://github.com/llvm/llvm-project/pull/96878


More information about the llvm-commits mailing list