[llvm] r261122 - [WinEH] Optimize WinEH state stores
David Majnemer via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 17 10:37:12 PST 2016
Author: majnemer
Date: Wed Feb 17 12:37:11 2016
New Revision: 261122
URL: http://llvm.org/viewvc/llvm-project?rev=261122&view=rev
Log:
[WinEH] Optimize WinEH state stores
32-bit x86 Windows targets use a linked-list of nodes allocated on the
stack, referenced to via thread-local storage. The personality routine
interprets one of the fields in the node as a 'state number' which
indicates where the personality routine should transfer control.
State transitions are possible only before call-sites which may throw
exceptions. Our previous scheme had us update the state number before
all call-sites which may throw.
Instead, we can try to minimize the number of times we need to store by
reasoning about the nearest store which dominates the current call-site.
If the last store agrees with the current call-site, then we know that
the state-update is redundant and can be elided.
This is largely straightforward: an RPO walk of the blocks allows us to
correctly forward propagate the information when the function is a DAG.
Currently, loops are not handled optimally and may trigger superfluous
state stores.
Differential Revision: http://reviews.llvm.org/D16763
Modified:
llvm/trunk/lib/Target/X86/X86WinEHState.cpp
llvm/trunk/test/CodeGen/WinEH/wineh-statenumbering.ll
llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll
Modified: llvm/trunk/lib/Target/X86/X86WinEHState.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86WinEHState.cpp?rev=261122&r1=261121&r2=261122&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86WinEHState.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86WinEHState.cpp Wed Feb 17 12:37:11 2016
@@ -15,14 +15,20 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include <deque>
using namespace llvm;
@@ -33,6 +39,8 @@ void initializeWinEHStatePassPass(PassRe
}
namespace {
+const int OverdefinedState = INT_MIN;
+
class WinEHStatePass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
@@ -82,6 +90,8 @@ private:
// Per-function state
EHPersonality Personality = EHPersonality::Unknown;
Function *PersonalityFn = nullptr;
+ bool UseStackGuard = false;
+ int ParentBaseState;
/// The stack allocation containing all EH data, including the link in the
/// fs:00 chain and the current state.
@@ -170,6 +180,7 @@ bool WinEHStatePass::runOnFunction(Funct
// Reset per-function state.
PersonalityFn = nullptr;
Personality = EHPersonality::Unknown;
+ UseStackGuard = false;
return true;
}
@@ -247,7 +258,6 @@ void WinEHStatePass::emitExceptionRegist
// Struct type of RegNode. Used for GEPing.
Type *RegNodeTy;
- StringRef PersonalityName = PersonalityFn->getName();
IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
Type *Int8PtrType = Builder.getInt8PtrTy();
if (Personality == EHPersonality::MSVC_CXX) {
@@ -259,7 +269,8 @@ void WinEHStatePass::emitExceptionRegist
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -1
StateFieldIndex = 2;
- insertStateNumberStore(&*Builder.GetInsertPoint(), -1);
+ ParentBaseState = -1;
+ insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
// Handler = __ehhandler$F
Function *Trampoline = generateLSDAInEAXThunk(F);
Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
@@ -267,7 +278,6 @@ void WinEHStatePass::emitExceptionRegist
} else if (Personality == EHPersonality::MSVC_X86SEH) {
// If _except_handler4 is in use, some additional guard checks and prologue
// stuff is required.
- bool UseStackGuard = (PersonalityName == "_except_handler4");
RegNodeTy = getSEHRegistrationType();
RegNode = Builder.CreateAlloca(RegNodeTy);
// SavedESP = llvm.stacksave()
@@ -276,7 +286,10 @@ void WinEHStatePass::emitExceptionRegist
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -2 / -1
StateFieldIndex = 4;
- insertStateNumberStore(&*Builder.GetInsertPoint(), UseStackGuard ? -2 : -1);
+ StringRef PersonalityName = PersonalityFn->getName();
+ UseStackGuard = (PersonalityName == "_except_handler4");
+ ParentBaseState = UseStackGuard ? -2 : -1;
+ insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
// ScopeTable = llvm.x86.seh.lsda(F)
Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
Value *LSDA = Builder.CreateCall(
@@ -388,6 +401,88 @@ void WinEHStatePass::unlinkExceptionRegi
Builder.CreateStore(Next, FSZero);
}
+// Figure out what state we should assign calls in this block.
+static int getBaseStateForBB(DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ WinEHFuncInfo &FuncInfo, BasicBlock *BB) {
+ int BaseState = -1;
+ auto &BBColors = BlockColors[BB];
+
+ assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
+ BasicBlock *FuncletEntryBB = BBColors.front();
+ if (auto *FuncletPad =
+ dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
+ auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+ if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+ BaseState = BaseStateI->second;
+ }
+
+ return BaseState;
+}
+
+// Calculate the state a call-site is in.
+static int getStateForCallSite(DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ WinEHFuncInfo &FuncInfo, CallSite CS) {
+ if (auto *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ // Look up the state number of the EH pad this unwinds to.
+ assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
+ return FuncInfo.InvokeStateMap[II];
+ }
+ // Possibly throwing call instructions have no actions to take after
+ // an unwind. Ensure they are in the -1 state.
+ return getBaseStateForBB(BlockColors, FuncInfo, CS.getParent());
+}
+
+// Calculate the intersection of all the FinalStates for a BasicBlock's
+// predecessor.
+static int getPredState(DenseMap<BasicBlock *, int> &FinalStates, Function &F,
+ int ParentBaseState, BasicBlock *BB) {
+ // The entry block has no predecessors but we know that the prologue always
+ // sets us up with a fixed state.
+ if (&F.getEntryBlock() == BB)
+ return ParentBaseState;
+
+ // This is an EH Pad, conservatively report this basic block as overdefined.
+ if (BB->isEHPad())
+ return OverdefinedState;
+
+ int CommonState = OverdefinedState;
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ // We didn't manage to get a state for one of these predecessors,
+ // conservatively report this basic block as overdefined.
+ auto PredEndState = FinalStates.find(PredBB);
+ if (PredEndState == FinalStates.end())
+ return OverdefinedState;
+
+ // This code is reachable via exceptional control flow,
+ // conservatively report this basic block as overdefined.
+ if (isa<CatchReturnInst>(PredBB->getTerminator()))
+ return OverdefinedState;
+
+ int PredState = PredEndState->second;
+ assert(PredState != OverdefinedState &&
+ "overdefined BBs shouldn't be in FinalStates");
+ if (CommonState == OverdefinedState)
+ CommonState = PredState;
+
+ // At least two predecessors have different FinalStates,
+ // conservatively report this basic block as overdefined.
+ if (CommonState != PredState)
+ return OverdefinedState;
+ }
+
+ return CommonState;
+};
+
+static bool isStateStoreNeeded(EHPersonality Personality, CallSite CS) {
+ if (!CS)
+ return false;
+
+ if (isAsynchronousEHPersonality(Personality))
+ return !CS.doesNotAccessMemory();
+
+ return !CS.doesNotThrow();
+}
+
void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
// Mark the registration node. The backend needs to know which alloca it is so
// that it can recover the original frame pointer.
@@ -405,38 +500,86 @@ void WinEHStatePass::addStateStores(Func
// Iterate all the instructions and emit state number stores.
DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(F);
- for (BasicBlock &BB : F) {
- // Figure out what state we should assign calls in this block.
- int BaseState = -1;
- auto &BBColors = BlockColors[&BB];
+ ReversePostOrderTraversal<Function *> RPOT(&F);
- assert(BBColors.size() == 1 &&
- "multi-color BB not removed by preparation");
- BasicBlock *FuncletEntryBB = BBColors.front();
- if (auto *FuncletPad =
- dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
- // We do not support nesting funclets within cleanuppads.
- if (isa<CleanupPadInst>(FuncletPad))
+ // InitialStates yields the state of the first call-site for a BasicBlock.
+ DenseMap<BasicBlock *, int> InitialStates;
+ // FinalStates yields the state of the last call-site for a BasicBlock.
+ DenseMap<BasicBlock *, int> FinalStates;
+ // Worklist used to revisit BasicBlocks with indeterminate
+ // Initial/Final-States.
+ std::deque<BasicBlock *> Worklist;
+ // Fill in InitialStates and FinalStates for BasicBlocks with call-sites.
+ for (BasicBlock *BB : RPOT) {
+ int InitialState = OverdefinedState;
+ int FinalState;
+ if (&F.getEntryBlock() == BB)
+ InitialState = FinalState = ParentBaseState;
+ for (Instruction &I : *BB) {
+ CallSite CS(&I);
+ if (!isStateStoreNeeded(Personality, CS))
continue;
- auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
- if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
- BaseState = BaseStateI->second;
+ int State = getStateForCallSite(BlockColors, FuncInfo, CS);
+ if (InitialState == OverdefinedState)
+ InitialState = State;
+ FinalState = State;
+ }
+ // No call-sites in this basic block? That's OK, we will come back to these
+ // in a later pass.
+ if (InitialState == OverdefinedState) {
+ Worklist.push_back(BB);
+ continue;
}
+ DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
+ << " InitialState=" << InitialState << '\n');
+ DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
+ << " FinalState=" << FinalState << '\n');
+ InitialStates.insert({BB, InitialState});
+ FinalStates.insert({BB, FinalState});
+ }
+
+ // Try to fill-in InitialStates and FinalStates which have no call-sites.
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.front();
+ Worklist.pop_front();
+ // This BasicBlock has already been figured out, nothing more we can do.
+ if (InitialStates.count(BB) != 0)
+ continue;
+
+ int PredState = getPredState(FinalStates, F, ParentBaseState, BB);
+ if (PredState == OverdefinedState)
+ continue;
+
+ // We successfully inferred this BasicBlock's state via it's predecessors;
+ // enqueue it's successors to see if we can infer their states.
+ InitialStates.insert({BB, PredState});
+ FinalStates.insert({BB, PredState});
+ for (BasicBlock *SuccBB : successors(BB))
+ Worklist.push_back(SuccBB);
+ }
+
+ // Finally, insert state stores before call-sites which transition us to a new
+ // state.
+ for (BasicBlock *BB : RPOT) {
+ auto &BBColors = BlockColors[BB];
+ BasicBlock *FuncletEntryBB = BBColors.front();
+ if (isa<CleanupPadInst>(FuncletEntryBB->getFirstNonPHI()))
+ continue;
+
+ int PrevState = getPredState(FinalStates, F, ParentBaseState, BB);
+ DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
+ << " PrevState=" << PrevState << '\n');
+
+ for (Instruction &I : *BB) {
+ CallSite CS(&I);
+ if (!isStateStoreNeeded(Personality, CS))
+ continue;
- for (Instruction &I : BB) {
- if (auto *CI = dyn_cast<CallInst>(&I)) {
- // Possibly throwing call instructions have no actions to take after
- // an unwind. Ensure they are in the -1 state.
- if (CI->doesNotThrow())
- continue;
- insertStateNumberStore(CI, BaseState);
- } else if (auto *II = dyn_cast<InvokeInst>(&I)) {
- // Look up the state number of the landingpad this unwinds to.
- assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
- int State = FuncInfo.InvokeStateMap[II];
- insertStateNumberStore(II, State);
- }
+ int State = getStateForCallSite(BlockColors, FuncInfo, CS);
+ if (State != PrevState)
+ insertStateNumberStore(&I, State);
+ PrevState = State;
}
}
}
Modified: llvm/trunk/test/CodeGen/WinEH/wineh-statenumbering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WinEH/wineh-statenumbering.ll?rev=261122&r1=261121&r2=261122&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WinEH/wineh-statenumbering.ll (original)
+++ llvm/trunk/test/CodeGen/WinEH/wineh-statenumbering.ll Wed Feb 17 12:37:11 2016
@@ -28,7 +28,11 @@ entry:
; CHECK: entry:
; CHECK: store i32 -1
; CHECK: call void @g(i32 3)
+ ; CHECK-NEXT: call void @g(i32 4)
+ ; CHECK-NEXT: call void @g(i32 5)
call void @g(i32 3)
+ call void @g(i32 4)
+ call void @g(i32 5)
store i32 0, i32* %tmp, align 4
%0 = bitcast i32* %tmp to i8*
; CHECK: store i32 0
@@ -54,14 +58,22 @@ catch.3:
; CHECK: catch.3:
; CHECK: store i32 3
; CHECK: call void @g(i32 1)
+ ; CHECK-NEXT: call void @g(i32 2)
+ ; CHECK-NEXT: call void @g(i32 3)
call void @g(i32 1)
+ call void @g(i32 2)
+ call void @g(i32 3)
catchret from %2 to label %try.cont
try.cont: ; preds = %catch.3
; CHECK: try.cont:
; CHECK: store i32 1
; CHECK: call void @g(i32 2)
+ ; CHECK-NEXT: call void @g(i32 3)
+ ; CHECK-NEXT: call void @g(i32 4)
call void @g(i32 2)
+ call void @g(i32 3)
+ call void @g(i32 4)
unreachable
unreachable: ; preds = %catch
@@ -111,6 +123,10 @@ try.cont:
; CHECK: try.cont:
; CHECK: store i32 1
; CHECK: call void @dtor()
+ ; CHECK-NEXT: call void @dtor()
+ ; CHECK-NEXT: call void @dtor()
+ call void @dtor() #3 [ "funclet"(token %1) ]
+ call void @dtor() #3 [ "funclet"(token %1) ]
call void @dtor() #3 [ "funclet"(token %1) ]
catchret from %1 to label %try.cont4
@@ -131,6 +147,52 @@ unreachable1:
unreachable
}
+; CHECK-LABEL: define void @required_state_store(
+define void @required_state_store(i1 zeroext %cond) personality i32 (...)* @_except_handler3 {
+entry:
+ %__exception_code = alloca i32, align 4
+ call void (...) @llvm.localescape(i32* nonnull %__exception_code)
+; CHECK: store i32 -1
+; CHECK: call void @g(i32 0)
+ call void @g(i32 0)
+ br i1 %cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+; CHECK: store i32 0
+; CHECK-NEXT: invoke void @g(i32 1)
+ invoke void @g(i32 1)
+ to label %if.end unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %if.then
+ %0 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* bitcast (i32 ()* @"\01?filt$0 at 0@required_state_store@@" to i8*)]
+ catchret from %1 to label %if.end
+
+if.end: ; preds = %if.then, %__except.ret, %entry
+; CHECK: store i32 -1
+; CHECK-NEXT: call void @dtor()
+ call void @dtor()
+ ret void
+}
+
+define internal i32 @"\01?filt$0 at 0@required_state_store@@"() {
+entry:
+ %0 = tail call i8* @llvm.frameaddress(i32 1)
+ %1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %0)
+ %2 = tail call i8* @llvm.localrecover(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %1, i32 0)
+ %__exception_code = bitcast i8* %2 to i32*
+ %3 = getelementptr inbounds i8, i8* %0, i32 -20
+ %4 = bitcast i8* %3 to { i32*, i8* }**
+ %5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
+ %6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
+ %7 = load i32*, i32** %6, align 4
+ %8 = load i32, i32* %7, align 4
+ store i32 %8, i32* %__exception_code, align 4
+ ret i32 1
+}
+
declare void @g(i32) #0
declare void @dtor()
@@ -139,6 +201,16 @@ declare x86_stdcallcc void @_CxxThrowExc
declare i32 @__CxxFrameHandler3(...)
+declare i8* @llvm.frameaddress(i32)
+
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+
+declare i8* @llvm.localrecover(i8*, i8*, i32)
+
+declare void @llvm.localescape(...)
+
+declare i32 @_except_handler3(...)
+
attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn }
Modified: llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll?rev=261122&r1=261121&r2=261122&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cleanuppad-inalloca.ll Wed Feb 17 12:37:11 2016
@@ -51,7 +51,7 @@ ehcleanup:
; CHECK: "?dtor$2@?0?passes_two at 4HA":
; CHECK: pushl %ebp
; CHECK: subl $8, %esp
-; CHECK: addl $12, %ebp
+; CHECK: addl $16, %ebp
; CHECK: {{movl|leal}} -{{[0-9]+}}(%ebp), %ecx
; CHECK: calll "??1A@@QAE at XZ"
; CHECK: addl $8, %esp
More information about the llvm-commits
mailing list