[llvm] [X86][SimplifyCFG] Support hoisting load/store with conditional faulting (PR #96878)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 28 11:17:32 PDT 2024


================
@@ -2960,6 +2967,199 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
   return HaveRewritablePHIs;
 }
 
+/// Hoist load/store instructions from the conditional successor blocks up into
+/// the block.
+///
+/// We are looking for code like the following:
+/// \code
+///   BB:
+///     ...
+///     %cond = icmp ult %x, %y
+///     br i1 %cond, label %TrueBB, label %FalseBB
+///   FalseBB:
+///     store i32 1, ptr %q, align 4
+///     ...
+///   TrueBB:
+///     %0 = load i32, ptr %b, align 4
+///     store i32 %0, ptr %p, align 4
+///     ...
+/// \endcode
+//
+/// We are going to transform this into:
+///
+/// \code
+///   BB:
+///     ...
+///     %cond = icmp ult %x, %y
+///     %0 = cload i32, ptr %b, %cond
+///     cstore i32 %0, ptr %p, %cond
+///     cstore i32 1, ptr %q, ~%cond
+///     br i1 %cond, label %TrueBB, label %FalseBB
+///   FalseBB:
+///     ...
+///   TrueBB:
+///     ...
+/// \endcode
+///
+/// where cload/cstore is represented by intrinsic like llvm.masked.load/store,
+/// e.g.
+///
+/// \code
+///   %vcond = bitcast i1 %cond to <1 x i1>
+///   %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
+///                         (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
+///   %0 = bitcast <1 x i32> %v0 to i32
+///   call void @llvm.masked.store.v1i32.p0
+//                          (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
+///   %cond.not = xor i1 %cond, true
+///   %vcond.not = bitcast i1 %cond.not to <1 x i>
+///   call void @llvm.masked.store.v1i32.p0
+///              (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
+/// \endcode
+///
+/// \returns true if any load/store is hosited.
+///
+/// Note that this tranform should be run
+/// * before SpeculativelyExecuteBB so that the latter can have more chance.
+/// * after hoistCommonCodeFromSuccessors to ensure unconditional loads/stores
+///   are handled first.
+bool SimplifyCFGOpt::hoistLoadStoreWithCondFaultingFromSuccessors(
+    BasicBlock *BB) {
+  if (!HoistLoadsStoresWithCondFaulting ||
+      !TTI.hasConditionalLoadStoreForType())
+    return false;
+
+  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return false;
+
+  BasicBlock *IfTrueBB = BI->getSuccessor(0);
+  BasicBlock *IfFalseBB = BI->getSuccessor(1);
+
+  // If either of the blocks has it's address taken, then we can't do this fold,
+  // because the code we'd hoist would no longer run when we jump into the block
+  // by it's address.
+  for (auto *Succ : {IfTrueBB, IfFalseBB})
+    if (Succ->hasAddressTaken())
+      return false;
+
+  // Not use isa<AllocaInst>(getUnderlyingObject(I.getOperand(0)) to avoid
+  // checking all intermediate operands dominate the branch.
+  auto IsLoadFromAlloca = [](const Instruction &I) {
+    return isa<LoadInst>(I) && isa<AllocaInst>((I.getOperand(0)));
+  };
+
+  // Collect hoisted loads/stores.
+  SmallSetVector<Instruction *, 4> HoistedInsts;
+  // Not hoist load/store if
+  // 1. target does not have corresponding conditional faulting load/store.
+  // 2. it's volatile or atomic.
+  // 3. there is a load/store that can not be hoisted in the same bb.
+  // 4. there is a non-load/store that's not safe to speculatively execute
+  //    in the same bb.
+  // 5. any operand of it does not dominate the branch.
+  // 6. it's a store and a memory read is skipped.
+  auto HoistInstsInBB = [&](BasicBlock *BB) {
+    bool SkipMemoryRead = false;
+    // A more efficient way to check domination. An operand dominates the
+    // BranchInst if
+    // 1. it's not defined in the same bb as the instruction.
+    // 2. it's to be hoisted.
+    //
+    // b/c BB is only predecessor and BranchInst does not define any value.
+    auto OpsDominatesBranch = [&](Instruction &I) {
+      return llvm::all_of(I.operands(), [&](Value *Op) {
+        if (auto *J = dyn_cast<Instruction>(Op)) {
+          if (HoistedInsts.contains(J))
+            return true;
+          if (J->getParent() == I.getParent())
+            return false;
+        }
+        return true;
+      });
+    };
+    for (auto &I : *BB) {
+      auto *LI = dyn_cast<LoadInst>(&I);
+      auto *SI = dyn_cast<StoreInst>(&I);
+      if (LI || SI) {
+        bool IsSimple = (LI && LI->isSimple()) || (SI && SI->isSimple());
+        if (!IsSimple || !OpsDominatesBranch(I))
+          return false;
+        auto *Type = getLoadStoreType(&I);
+        // a load from alloca is always safe.
+        if (!IsLoadFromAlloca(I) && !TTI.hasConditionalLoadStoreForType(Type))
+          return false;
+        // Conservative aliasing check.
+        if (SI && SkipMemoryRead)
+          return false;
+        HoistedInsts.insert(&I);
+      } else if (!I.isTerminator() && !isSafeToSpeculativelyExecute(&I))
+        return false;
----------------
dtcxzyw wrote:

This patch breaks our internal build:
Reproducer:
```
; bin/opt -mtriple=x86_64 -mattr=+cf -passes=simplifycfg reduced.ll -S
define void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(i1 %cmp2.not, i1 %cmp7, ptr %p) {
entry:
  br i1 %cmp2.not, label %if.end11, label %land.lhs.true

land.lhs.true:                                    ; preds = %entry
  br i1 %cmp7, label %if.then8, label %if.end11

if.then8:                                         ; preds = %land.lhs.true
  br label %if.end11

if.end11:                                         ; preds = %if.then8, %land.lhs.true, %entry
  %0 = load i64, ptr %p, align 8
  %add = or i64 %0, 0
  ret void
}
```
```
Instruction does not dominate all uses!
  %4 = bitcast <1 x i64> %3 to i64
  %add = or i64 %4, 0
LLVM ERROR: Broken module found, compilation aborted!
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: bin/opt -mtriple=x86_64 -mattr=+cf -passes=simplifycfg reduced.ll -S
1.      Running pass "verify" on module "reduced.ll"
 #0 0x00007ff8e3e14410 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.19.0git+0x214410)
 #1 0x00007ff8e3e1141f llvm::sys::RunSignalHandlers() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.19.0git+0x21141f)
 #2 0x00007ff8e3e11575 SignalHandler(int) Signals.cpp:0:0
 #3 0x00007ff8e3842520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x00007ff8e38969fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007ff8e38969fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007ff8e38969fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x00007ff8e3842476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007ff8e38287f3 abort ./stdlib/abort.c:81:7
 #9 0x00007ff8e3c786a0 llvm::report_bad_alloc_error(char const*, bool) (.cold) ErrorHandling.cpp:0:0
#10 0x00007ff8e3d277fe (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.19.0git+0x1277fe)
#11 0x00007ff8ddb894e2 (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.19.0git+0x3894e2)
#12 0x00007ff8e42494f6 llvm::detail::PassModel<llvm::Module, llvm::VerifierPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.19.0git+0x1f4f6)
#13 0x00007ff8ddb48d0b llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.19.0git+0x348d0b)
#14 0x00007ff8e42576b4 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.19.0git+0x2d6b4)
#15 0x00007ff8e426478a optMain (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.19.0git+0x3a78a)
#16 0x00007ff8e3829d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#17 0x00007ff8e3829e40 call_init ./csu/../csu/libc-start.c:128:20
#18 0x00007ff8e3829e40 __libc_start_main ./csu/../csu/libc-start.c:379:5
#19 0x0000594a0b98e095 _start (bin/opt+0x1095)
Aborted (core dumped)
```

https://github.com/llvm/llvm-project/pull/96878


More information about the llvm-commits mailing list