[llvm] [X86][SimplifyCFG] Support hoisting load/store with conditional faulting (PR #96878)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 11:17:32 PDT 2024
================
@@ -2960,6 +2967,199 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
return HaveRewritablePHIs;
}
+/// Hoist load/store instructions from the conditional successor blocks up into
+/// the block.
+///
+/// We are looking for code like the following:
+/// \code
+/// BB:
+/// ...
+/// %cond = icmp ult %x, %y
+/// br i1 %cond, label %TrueBB, label %FalseBB
+/// FalseBB:
+/// store i32 1, ptr %q, align 4
+/// ...
+/// TrueBB:
+/// %0 = load i32, ptr %b, align 4
+/// store i32 %0, ptr %p, align 4
+/// ...
+/// \endcode
+//
+/// We are going to transform this into:
+///
+/// \code
+/// BB:
+/// ...
+/// %cond = icmp ult %x, %y
+/// %0 = cload i32, ptr %b, %cond
+/// cstore i32 %0, ptr %p, %cond
+/// cstore i32 1, ptr %q, ~%cond
+/// br i1 %cond, label %TrueBB, label %FalseBB
+/// FalseBB:
+/// ...
+/// TrueBB:
+/// ...
+/// \endcode
+///
+/// where cload/cstore is represented by intrinsic like llvm.masked.load/store,
+/// e.g.
+///
+/// \code
+/// %vcond = bitcast i1 %cond to <1 x i1>
+/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
+/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
+/// %0 = bitcast <1 x i32> %v0 to i32
+/// call void @llvm.masked.store.v1i32.p0
+// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
+/// %cond.not = xor i1 %cond, true
+/// %vcond.not = bitcast i1 %cond.not to <1 x i>
+/// call void @llvm.masked.store.v1i32.p0
+/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
+/// \endcode
+///
+/// \returns true if any load/store is hosited.
+///
+/// Note that this tranform should be run
+/// * before SpeculativelyExecuteBB so that the latter can have more chance.
+/// * after hoistCommonCodeFromSuccessors to ensure unconditional loads/stores
+/// are handled first.
+bool SimplifyCFGOpt::hoistLoadStoreWithCondFaultingFromSuccessors(
+ BasicBlock *BB) {
+ if (!HoistLoadsStoresWithCondFaulting ||
+ !TTI.hasConditionalLoadStoreForType())
+ return false;
+
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ BasicBlock *IfTrueBB = BI->getSuccessor(0);
+ BasicBlock *IfFalseBB = BI->getSuccessor(1);
+
+ // If either of the blocks has it's address taken, then we can't do this fold,
+ // because the code we'd hoist would no longer run when we jump into the block
+ // by it's address.
+ for (auto *Succ : {IfTrueBB, IfFalseBB})
+ if (Succ->hasAddressTaken())
+ return false;
+
+ // Not use isa<AllocaInst>(getUnderlyingObject(I.getOperand(0)) to avoid
+ // checking all intermediate operands dominate the branch.
+ auto IsLoadFromAlloca = [](const Instruction &I) {
+ return isa<LoadInst>(I) && isa<AllocaInst>((I.getOperand(0)));
+ };
+
+ // Collect hoisted loads/stores.
+ SmallSetVector<Instruction *, 4> HoistedInsts;
+ // Not hoist load/store if
+ // 1. target does not have corresponding conditional faulting load/store.
+ // 2. it's volatile or atomic.
+ // 3. there is a load/store that can not be hoisted in the same bb.
+ // 4. there is a non-load/store that's not safe to speculatively execute
+ // in the same bb.
+ // 5. any operand of it does not dominate the branch.
+ // 6. it's a store and a memory read is skipped.
+ auto HoistInstsInBB = [&](BasicBlock *BB) {
+ bool SkipMemoryRead = false;
+ // A more efficient way to check domination. An operand dominates the
+ // BranchInst if
+ // 1. it's not defined in the same bb as the instruction.
+ // 2. it's to be hoisted.
+ //
+ // b/c BB is only predecessor and BranchInst does not define any value.
+ auto OpsDominatesBranch = [&](Instruction &I) {
+ return llvm::all_of(I.operands(), [&](Value *Op) {
+ if (auto *J = dyn_cast<Instruction>(Op)) {
+ if (HoistedInsts.contains(J))
+ return true;
+ if (J->getParent() == I.getParent())
+ return false;
+ }
+ return true;
+ });
+ };
+ for (auto &I : *BB) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ auto *SI = dyn_cast<StoreInst>(&I);
+ if (LI || SI) {
+ bool IsSimple = (LI && LI->isSimple()) || (SI && SI->isSimple());
+ if (!IsSimple || !OpsDominatesBranch(I))
+ return false;
+ auto *Type = getLoadStoreType(&I);
+ // a load from alloca is always safe.
+ if (!IsLoadFromAlloca(I) && !TTI.hasConditionalLoadStoreForType(Type))
+ return false;
+ // Conservative aliasing check.
+ if (SI && SkipMemoryRead)
+ return false;
+ HoistedInsts.insert(&I);
+ } else if (!I.isTerminator() && !isSafeToSpeculativelyExecute(&I))
+ return false;
----------------
dtcxzyw wrote:
This patch breaks our internal build:
Reproducer:
```
; bin/opt -mtriple=x86_64 -mattr=+cf -passes=simplifycfg reduced.ll -S
define void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm(i1 %cmp2.not, i1 %cmp7, ptr %p) {
entry:
br i1 %cmp2.not, label %if.end11, label %land.lhs.true
land.lhs.true: ; preds = %entry
br i1 %cmp7, label %if.then8, label %if.end11
if.then8: ; preds = %land.lhs.true
br label %if.end11
if.end11: ; preds = %if.then8, %land.lhs.true, %entry
%0 = load i64, ptr %p, align 8
%add = or i64 %0, 0
ret void
}
```
```
Instruction does not dominate all uses!
%4 = bitcast <1 x i64> %3 to i64
%add = or i64 %4, 0
LLVM ERROR: Broken module found, compilation aborted!
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0. Program arguments: bin/opt -mtriple=x86_64 -mattr=+cf -passes=simplifycfg reduced.ll -S
1. Running pass "verify" on module "reduced.ll"
#0 0x00007ff8e3e14410 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.19.0git+0x214410)
#1 0x00007ff8e3e1141f llvm::sys::RunSignalHandlers() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.19.0git+0x21141f)
#2 0x00007ff8e3e11575 SignalHandler(int) Signals.cpp:0:0
#3 0x00007ff8e3842520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
#4 0x00007ff8e38969fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
#5 0x00007ff8e38969fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
#6 0x00007ff8e38969fc pthread_kill ./nptl/pthread_kill.c:89:10
#7 0x00007ff8e3842476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
#8 0x00007ff8e38287f3 abort ./stdlib/abort.c:81:7
#9 0x00007ff8e3c786a0 llvm::report_bad_alloc_error(char const*, bool) (.cold) ErrorHandling.cpp:0:0
#10 0x00007ff8e3d277fe (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.19.0git+0x1277fe)
#11 0x00007ff8ddb894e2 (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.19.0git+0x3894e2)
#12 0x00007ff8e42494f6 llvm::detail::PassModel<llvm::Module, llvm::VerifierPass, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.19.0git+0x1f4f6)
#13 0x00007ff8ddb48d0b llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.19.0git+0x348d0b)
#14 0x00007ff8e42576b4 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.19.0git+0x2d6b4)
#15 0x00007ff8e426478a optMain (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.19.0git+0x3a78a)
#16 0x00007ff8e3829d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#17 0x00007ff8e3829e40 call_init ./csu/../csu/libc-start.c:128:20
#18 0x00007ff8e3829e40 __libc_start_main ./csu/../csu/libc-start.c:379:5
#19 0x0000594a0b98e095 _start (bin/opt+0x1095)
Aborted (core dumped)
```
https://github.com/llvm/llvm-project/pull/96878
More information about the llvm-commits
mailing list