[llvm] 89a6106 - [SROA] Rewrite store-into-selected-address into predicated stores
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 10 10:08:27 PST 2022
Author: Roman Lebedev
Date: 2022-12-10T21:07:03+03:00
New Revision: 89a6106ce50689c733be13aaef4be5f3f73708a2
URL: https://github.com/llvm/llvm-project/commit/89a6106ce50689c733be13aaef4be5f3f73708a2
DIFF: https://github.com/llvm/llvm-project/commit/89a6106ce50689c733be13aaef4be5f3f73708a2.diff
LOG: [SROA] Rewrite store-into-selected-address into predicated stores
Same basic idea as with unfolding loads into predicated loads,
but we obviously can't have speculative stores.
Added:
Modified:
llvm/include/llvm/Transforms/Scalar/SROA.h
llvm/lib/Transforms/Scalar/SROA.cpp
llvm/test/Transforms/SROA/select-store.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h
index 85b75c4d4640..26348da22021 100644
--- a/llvm/include/llvm/Transforms/Scalar/SROA.h
+++ b/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -27,6 +27,7 @@ namespace llvm {
class AllocaInst;
class LoadInst;
+class StoreInst;
class AssumptionCache;
class DominatorTree;
class DomTreeUpdater;
@@ -46,7 +47,7 @@ class Partition;
class SROALegacyPass;
class SelectHandSpeculativity {
- unsigned char Storage = 0;
+ unsigned char Storage = 0; // None are speculatable by default.
using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
public:
@@ -64,7 +65,10 @@ static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
using PossiblySpeculatableLoad =
PointerIntPair<LoadInst *, 2, sroa::SelectHandSpeculativity>;
-using PossiblySpeculatableLoads = SmallVector<PossiblySpeculatableLoad, 2>;
+using UnspeculatableStore = StoreInst *;
+using RewriteableMemOp =
+ std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
+using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
} // end namespace sroa
@@ -130,8 +134,7 @@ class SROAPass : public PassInfoMixin<SROAPass> {
/// A worklist of select instructions to rewrite prior to promoting
/// allocas.
- SmallMapVector<SelectInst *, sroa::PossiblySpeculatableLoads, 8>
- SelectsToRewrite;
+ SmallMapVector<SelectInst *, sroa::RewriteableMemOps, 8> SelectsToRewrite;
/// Select instructions that use an alloca and are subsequently loaded can be
/// rewritten to load both input pointers and then select between the result,
@@ -149,7 +152,7 @@ class SROAPass : public PassInfoMixin<SROAPass> {
/// or if we are allowed to perform CFG modifications.
/// If found an intervening bitcast with a single use of the load,
/// allow the promotion.
- static std::optional<sroa::PossiblySpeculatableLoads>
+ static std::optional<sroa::RewriteableMemOps>
isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
public:
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index cf988e02beb2..96afc1af7beb 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -108,6 +108,9 @@ STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
STATISTIC(NumLoadsPredicated,
"Number of loads rewritten into predicated loads to allow promotion");
+STATISTIC(
+ NumStoresPredicated,
+ "Number of stores rewritten into predicated loads to allow promotion");
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
@@ -1353,17 +1356,25 @@ isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG) {
return Spec;
}
-std::optional<sroa::PossiblySpeculatableLoads>
+std::optional<sroa::RewriteableMemOps>
SROAPass::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
- PossiblySpeculatableLoads Loads;
+ RewriteableMemOps Ops;
for (User *U : SI.users()) {
- LoadInst *LI;
- BitCastInst *BC = dyn_cast<BitCastInst>(U);
- if (BC && BC->hasOneUse())
- LI = dyn_cast<LoadInst>(*BC->user_begin());
- else
- LI = dyn_cast<LoadInst>(U);
+ if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
+ U = *BC->user_begin();
+
+ if (auto *Store = dyn_cast<StoreInst>(U)) {
+ // Note that atomic stores can be transformed; atomic semantics do not
+ // have any meaning for a local alloca. Stores are not speculatable,
+ // however, so if we can't turn it into a predicated store, we are done.
+ if (Store->isVolatile() || PreserveCFG)
+ return {}; // Give up on this `select`.
+ Ops.emplace_back(Store);
+ continue;
+ }
+
+ auto *LI = dyn_cast<LoadInst>(U);
// Note that atomic loads can be transformed;
// atomic semantics do not have any meaning for a local alloca.
@@ -1371,13 +1382,12 @@ SROAPass::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
return {}; // Give up on this `select`.
PossiblySpeculatableLoad Load(LI);
-
if (!LI->isSimple()) {
// If the `load` is not simple, we can't speculatively execute it,
// but we could handle this via a CFG modification. But can we?
if (PreserveCFG)
return {}; // Give up on this `select`.
- Loads.emplace_back(Load);
+ Ops.emplace_back(Load);
continue;
}
@@ -1387,10 +1397,10 @@ SROAPass::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
return {}; // Give up on this `select`.
Load.setInt(Spec);
- Loads.emplace_back(Load);
+ Ops.emplace_back(Load);
}
- return Loads;
+ return Ops;
}
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
@@ -1430,18 +1440,20 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
LI.replaceAllUsesWith(V);
}
-static void rewriteLoadOfSelect(SelectInst &SI, LoadInst &LI,
- sroa::SelectHandSpeculativity Spec,
- DomTreeUpdater &DTU) {
- LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
- BasicBlock *Head = LI.getParent();
+template <typename T>
+static void rewriteMemOpOfSelect(SelectInst &SI, T &I,
+ sroa::SelectHandSpeculativity Spec,
+ DomTreeUpdater &DTU) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
+ LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
+ BasicBlock *Head = I.getParent();
Instruction *ThenTerm = nullptr;
Instruction *ElseTerm = nullptr;
if (Spec.areNoneSpeculatable())
- SplitBlockAndInsertIfThenElse(SI.getCondition(), &LI, &ThenTerm, &ElseTerm,
+ SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
SI.getMetadata(LLVMContext::MD_prof), &DTU);
else {
- SplitBlockAndInsertIfThen(SI.getCondition(), &LI, /*Unreachable=*/false,
+ SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
SI.getMetadata(LLVMContext::MD_prof), &DTU,
/*LI=*/nullptr, /*ThenBlock=*/nullptr);
if (Spec.isSpeculatable(/*isTrueVal=*/true))
@@ -1449,46 +1461,75 @@ static void rewriteLoadOfSelect(SelectInst &SI, LoadInst &LI,
}
auto *HeadBI = cast<BranchInst>(Head->getTerminator());
Spec = {}; // Do not use `Spec` beyond this point.
- BasicBlock *Tail = LI.getParent();
+ BasicBlock *Tail = I.getParent();
Tail->setName(Head->getName() + ".cont");
- auto *PN = PHINode::Create(LI.getType(), 2, "", &LI);
+ PHINode *PN;
+ if (isa<LoadInst>(I))
+ PN = PHINode::Create(I.getType(), 2, "", &I);
for (BasicBlock *SuccBB : successors(Head)) {
bool IsThen = SuccBB == HeadBI->getSuccessor(0);
int SuccIdx = IsThen ? 0 : 1;
- auto *NewLoadBB = SuccBB == Tail ? Head : SuccBB;
- if (NewLoadBB != Head) {
- NewLoadBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
- ++NumLoadsPredicated;
+ auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
+ if (NewMemOpBB != Head) {
+ NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
+ if (isa<LoadInst>(I))
+ ++NumLoadsPredicated;
+ else
+ ++NumStoresPredicated;
} else
++NumLoadsSpeculated;
- auto *CondLoad = cast<LoadInst>(LI.clone());
- CondLoad->insertBefore(NewLoadBB->getTerminator());
- CondLoad->setOperand(0, SI.getOperand(1 + SuccIdx));
- CondLoad->setName(LI.getName() + (IsThen ? ".then" : ".else") + ".val");
- PN->addIncoming(CondLoad, NewLoadBB);
+ auto &CondMemOp = cast<T>(*I.clone());
+ CondMemOp.insertBefore(NewMemOpBB->getTerminator());
+ CondMemOp.setOperand(I.getPointerOperandIndex(),
+ SI.getOperand(1 + SuccIdx));
+ if (isa<LoadInst>(I)) {
+ CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
+ PN->addIncoming(&CondMemOp, NewMemOpBB);
+ } else
+ LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
}
- PN->takeName(&LI);
- LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
- LI.replaceAllUsesWith(PN);
+ if (isa<LoadInst>(I)) {
+ PN->takeName(&I);
+ LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
+ I.replaceAllUsesWith(PN);
+ }
+}
+
+static void rewriteMemOpOfSelect(SelectInst &SelInst, Instruction &I,
+ sroa::SelectHandSpeculativity Spec,
+ DomTreeUpdater &DTU) {
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
+ else if (auto *SI = dyn_cast<StoreInst>(&I))
+ rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
+ else
+ llvm_unreachable_internal("Only for load and store.");
}
-static bool rewriteSelectInstLoads(SelectInst &SI,
- const sroa::PossiblySpeculatableLoads &Loads,
- IRBuilderTy &IRB, DomTreeUpdater *DTU) {
+static bool rewriteSelectInstMemOps(SelectInst &SI,
+ const sroa::RewriteableMemOps &Ops,
+ IRBuilderTy &IRB, DomTreeUpdater *DTU) {
bool CFGChanged = false;
LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
- for (const PossiblySpeculatableLoad &Load : Loads) {
- LoadInst *LI = Load.getPointer();
- sroa::SelectHandSpeculativity Spec = Load.getInt();
+ for (const RewriteableMemOp &Op : Ops) {
+ sroa::SelectHandSpeculativity Spec;
+ Instruction *I;
+ if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
+ I = *US;
+ } else {
+ auto PSL = std::get<PossiblySpeculatableLoad>(Op);
+ I = PSL.getPointer();
+ Spec = PSL.getInt();
+ }
if (Spec.areAllSpeculatable()) {
- speculateSelectInstLoads(SI, *LI, IRB);
+ speculateSelectInstLoads(SI, cast<LoadInst>(*I), IRB);
} else {
assert(DTU && "Should not get here when not allowed to modify the CFG!");
- rewriteLoadOfSelect(SI, *LI, Spec, *DTU);
+ rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
CFGChanged = true;
}
- LI->eraseFromParent();
+ I->eraseFromParent();
}
for (User *U : make_early_inc_range(SI.users()))
@@ -4490,20 +4531,20 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
break;
}
- SmallVector<std::pair<SelectInst *, PossiblySpeculatableLoads>, 2>
+ SmallVector<std::pair<SelectInst *, RewriteableMemOps>, 2>
NewSelectsToRewrite;
NewSelectsToRewrite.reserve(SelectUsers.size());
for (SelectInst *Sel : SelectUsers) {
- std::optional<PossiblySpeculatableLoads> Loads =
+ std::optional<RewriteableMemOps> Ops =
isSafeSelectToSpeculate(*Sel, PreserveCFG);
- if (!Loads) {
+ if (!Ops) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
NewSelectsToRewrite.clear();
break;
}
- NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Loads));
+ NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
}
if (Promotable) {
@@ -4809,7 +4850,7 @@ SROAPass::runOnAlloca(AllocaInst &AI) {
while (!RemainingSelectsToRewrite.empty()) {
const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
CFGChanged |=
- rewriteSelectInstLoads(*K, V, IRB, PreserveCFG ? nullptr : DTU);
+ rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
}
return {Changed, CFGChanged};
diff --git a/llvm/test/Transforms/SROA/select-store.ll b/llvm/test/Transforms/SROA/select-store.ll
index 830b0cf67924..33090c1d3b24 100644
--- a/llvm/test/Transforms/SROA/select-store.ll
+++ b/llvm/test/Transforms/SROA/select-store.ll
@@ -6,15 +6,28 @@ declare i8 @gen.i8()
declare ptr @gen.ptr()
define i8 @store(i8 %init, i1 %cond, ptr dereferenceable(4) %escape) {
-; CHECK-LABEL: @store(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP:%.*]] = alloca i8, align 4
-; CHECK-NEXT: store i8 [[INIT:%.*]], ptr [[TMP]], align 4
-; CHECK-NEXT: [[REINIT:%.*]] = call i8 @gen.i8()
-; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[TMP]], ptr [[ESCAPE:%.*]]
-; CHECK-NEXT: store i8 [[REINIT]], ptr [[ADDR]], align 4
-; CHECK-NEXT: [[TMP_0_RES:%.*]] = load i8, ptr [[TMP]], align 4
-; CHECK-NEXT: ret i8 [[TMP_0_RES]]
+; CHECK-PRESERVE-CFG-LABEL: @store(
+; CHECK-PRESERVE-CFG-NEXT: entry:
+; CHECK-PRESERVE-CFG-NEXT: [[TMP:%.*]] = alloca i8, align 4
+; CHECK-PRESERVE-CFG-NEXT: store i8 [[INIT:%.*]], ptr [[TMP]], align 4
+; CHECK-PRESERVE-CFG-NEXT: [[REINIT:%.*]] = call i8 @gen.i8()
+; CHECK-PRESERVE-CFG-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[TMP]], ptr [[ESCAPE:%.*]]
+; CHECK-PRESERVE-CFG-NEXT: store i8 [[REINIT]], ptr [[ADDR]], align 4
+; CHECK-PRESERVE-CFG-NEXT: [[TMP_0_RES:%.*]] = load i8, ptr [[TMP]], align 4
+; CHECK-PRESERVE-CFG-NEXT: ret i8 [[TMP_0_RES]]
+;
+; CHECK-MODIFY-CFG-LABEL: @store(
+; CHECK-MODIFY-CFG-NEXT: entry:
+; CHECK-MODIFY-CFG-NEXT: [[REINIT:%.*]] = call i8 @gen.i8()
+; CHECK-MODIFY-CFG-NEXT: br i1 [[COND:%.*]], label [[ENTRY_THEN:%.*]], label [[ENTRY_ELSE:%.*]]
+; CHECK-MODIFY-CFG: entry.then:
+; CHECK-MODIFY-CFG-NEXT: br label [[ENTRY_CONT:%.*]]
+; CHECK-MODIFY-CFG: entry.else:
+; CHECK-MODIFY-CFG-NEXT: store i8 [[REINIT]], ptr [[ESCAPE:%.*]], align 4
+; CHECK-MODIFY-CFG-NEXT: br label [[ENTRY_CONT]]
+; CHECK-MODIFY-CFG: entry.cont:
+; CHECK-MODIFY-CFG-NEXT: [[TMP_0:%.*]] = phi i8 [ [[REINIT]], [[ENTRY_THEN]] ], [ [[INIT:%.*]], [[ENTRY_ELSE]] ]
+; CHECK-MODIFY-CFG-NEXT: ret i8 [[TMP_0]]
;
entry:
%tmp = alloca i8, align 4
@@ -48,15 +61,28 @@ entry:
}
define i8 @store_atomic_unord(i8 %init, i1 %cond, ptr dereferenceable(4) %escape) {
-; CHECK-LABEL: @store_atomic_unord(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP:%.*]] = alloca i8, align 4
-; CHECK-NEXT: store i8 [[INIT:%.*]], ptr [[TMP]], align 4
-; CHECK-NEXT: [[REINIT:%.*]] = call i8 @gen.i8()
-; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[TMP]], ptr [[ESCAPE:%.*]]
-; CHECK-NEXT: store atomic i8 [[REINIT]], ptr [[ADDR]] unordered, align 4
-; CHECK-NEXT: [[TMP_0_RES:%.*]] = load i8, ptr [[TMP]], align 4
-; CHECK-NEXT: ret i8 [[TMP_0_RES]]
+; CHECK-PRESERVE-CFG-LABEL: @store_atomic_unord(
+; CHECK-PRESERVE-CFG-NEXT: entry:
+; CHECK-PRESERVE-CFG-NEXT: [[TMP:%.*]] = alloca i8, align 4
+; CHECK-PRESERVE-CFG-NEXT: store i8 [[INIT:%.*]], ptr [[TMP]], align 4
+; CHECK-PRESERVE-CFG-NEXT: [[REINIT:%.*]] = call i8 @gen.i8()
+; CHECK-PRESERVE-CFG-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[TMP]], ptr [[ESCAPE:%.*]]
+; CHECK-PRESERVE-CFG-NEXT: store atomic i8 [[REINIT]], ptr [[ADDR]] unordered, align 4
+; CHECK-PRESERVE-CFG-NEXT: [[TMP_0_RES:%.*]] = load i8, ptr [[TMP]], align 4
+; CHECK-PRESERVE-CFG-NEXT: ret i8 [[TMP_0_RES]]
+;
+; CHECK-MODIFY-CFG-LABEL: @store_atomic_unord(
+; CHECK-MODIFY-CFG-NEXT: entry:
+; CHECK-MODIFY-CFG-NEXT: [[REINIT:%.*]] = call i8 @gen.i8()
+; CHECK-MODIFY-CFG-NEXT: br i1 [[COND:%.*]], label [[ENTRY_THEN:%.*]], label [[ENTRY_ELSE:%.*]]
+; CHECK-MODIFY-CFG: entry.then:
+; CHECK-MODIFY-CFG-NEXT: br label [[ENTRY_CONT:%.*]]
+; CHECK-MODIFY-CFG: entry.else:
+; CHECK-MODIFY-CFG-NEXT: store atomic i8 [[REINIT]], ptr [[ESCAPE:%.*]] unordered, align 4
+; CHECK-MODIFY-CFG-NEXT: br label [[ENTRY_CONT]]
+; CHECK-MODIFY-CFG: entry.cont:
+; CHECK-MODIFY-CFG-NEXT: [[TMP_0:%.*]] = phi i8 [ [[REINIT]], [[ENTRY_THEN]] ], [ [[INIT:%.*]], [[ENTRY_ELSE]] ]
+; CHECK-MODIFY-CFG-NEXT: ret i8 [[TMP_0]]
;
entry:
%tmp = alloca i8, align 4
@@ -88,6 +114,3 @@ entry:
%res = load i8, ptr %tmp, align 4
ret i8 %res
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-MODIFY-CFG: {{.*}}
-; CHECK-PRESERVE-CFG: {{.*}}
More information about the llvm-commits
mailing list