[llvm-commits] [llvm] r163302 - in /llvm/trunk: lib/Transforms/Utils/SimplifyCFG.cpp test/Transforms/SimplifyCFG/switch_create.ll test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
Hans Wennborg
hans at hanshq.net
Thu Sep 6 02:43:28 PDT 2012
Author: hans
Date: Thu Sep 6 04:43:28 2012
New Revision: 163302
URL: http://llvm.org/viewvc/llvm-project?rev=163302&view=rev
Log:
Build lookup tables for switches (PR884)
This adds a transformation to SimplifyCFG that attemps to turn switch
instructions into loads from lookup tables. It works on switches that
are only used to initialize one or more phi nodes in a common successor
basic block, for example:
int f(int x) {
switch (x) {
case 0: return 5;
case 1: return 4;
case 2: return -2;
case 5: return 7;
case 6: return 9;
default: return 42;
}
This speeds up the code by removing the hard-to-predict jump, and
reduces code size by removing the code for the jump targets.
Added:
llvm/trunk/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
Modified:
llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll
Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=163302&r1=163301&r2=163302&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Thu Sep 6 04:43:28 2012
@@ -22,6 +22,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
+#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
@@ -54,6 +55,7 @@
cl::desc("Duplicate return instructions into unconditional branches"));
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
namespace {
/// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -2977,6 +2979,287 @@
return Changed;
}
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+bool ValidLookupTableConstant(Constant *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return CE->isGEPWithNoNotionalOverIndexing();
+
+ return isa<ConstantFP>(C) ||
+ isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) ||
+ isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
+}
+
+/// GetCaseResulsts - Try to determine the resulting constant values in phi
+/// nodes at the common destination basic block for one of the case
+/// destinations of a switch instruction.
+static bool GetCaseResults(SwitchInst *SI,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty, continue to its successor.
+ if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() &&
+ !isa<PHINode>(CaseDest->begin())) {
+
+ TerminatorInst *Terminator = CaseDest->getTerminator();
+ if (Terminator->getNumSuccessors() != 1)
+ return false;
+
+ Pred = CaseDest;
+ CaseDest = Terminator->getSuccessor(0);
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx));
+ if (!ConstVal)
+ return false;
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return true;
+}
+
+/// BuildLookupTable - Build a lookup table with the contents of Results, using
+/// DefaultResult to fill the holes in the table. If the table ends up
+/// containing the same result in each element, set *SingleResult to that value
+/// and return NULL.
+static GlobalVariable *BuildLookupTable(
+ Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const std::vector<std::pair<ConstantInt*,Constant*> >& Results,
+ Constant *DefaultResult,
+ Constant **SingleResult) {
+ assert(Results.size() && "Need values to build lookup table");
+ assert(TableSize >= Results.size() && "Table needs to hold all values");
+
+ // If all values in the table are equal, this is that value.
+ Constant *SameResult = Results.begin()->second;
+
+ // Build up the table contents.
+ std::vector<Constant*> TableContents(TableSize);
+ for (size_t I = 0, E = Results.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Results[I].first;
+ Constant *CaseRes = Results[I].second;
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SameResult)
+ SameResult = NULL;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Results.size() < TableSize) {
+ for (unsigned i = 0; i < TableSize; ++i) {
+ if (!TableContents[i])
+ TableContents[i] = DefaultResult;
+ }
+
+ if (DefaultResult != SameResult)
+ SameResult = NULL;
+ }
+
+ // Same result was used in the entire table; just return that.
+ if (SameResult) {
+ *SingleResult = SameResult;
+ return NULL;
+ }
+
+ ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ GV->setUnnamedAddr(true);
+ return GV;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+ IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+ // FIXME: Handle unreachable cases.
+
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: If the results are all integers and the lookup table would fit in a
+ // target-legal register, we should store them as a bitmap and use shift/mask
+ // to look up the result.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore the switch if the number of cases are too small.
+ // This is similar to the check when building jump tables in
+ // SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Determine the best cut-off.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI.getCaseValue();
+ ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+ BasicBlock *CommonDest = NULL;
+ typedef std::vector<std::pair<ConstantInt*, Constant*> > ResultListTy;
+ SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode*, Constant*> DefaultResults;
+ SmallDenseMap<PHINode*, Type*> ResultTypes;
+ SmallVector<PHINode*, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+ if (!ResultLists.count(I->first))
+ PHIs.push_back(I->first);
+ ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ }
+ }
+
+ // Get the resulting values for the default case.
+ {
+ SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList))
+ return false;
+ for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+ PHINode *PHI = DefaultResultsList[I].first;
+ Constant *Result = DefaultResultsList[I].second;
+ DefaultResults[PHI] = Result;
+ ResultTypes[PHI] = Result->getType();
+ }
+ }
+
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ // The table density should be at lest 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ // Be careful to avoid overlow in the density computation.
+ if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
+ return false;
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ if (SI->getNumCases() * 10 < TableSize * 4)
+ return false;
+
+ // Build the lookup tables.
+ SmallDenseMap<PHINode*, GlobalVariable*> LookupTables;
+ SmallDenseMap<PHINode*, Constant*> SingleResults;
+
+ Module &Mod = *CommonDest->getParent()->getParent();
+ for (SmallDenseMap<PHINode*, ResultListTy>::iterator I = ResultLists.begin(),
+ E = ResultLists.end(); I != E; ++I) {
+ PHINode *PHI = I->first;
+
+ Constant *SingleResult = NULL;
+ LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, I->second,
+ DefaultResults[PHI], &SingleResult);
+ SingleResults[PHI] = SingleResult;
+ }
+
+ // Create the BB that does the lookups.
+ BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Check whether the condition value is within the case range, and branch to
+ // the new BB.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+ bool ReturnedEarly = false;
+ for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
+ I != E; ++I) {
+ PHINode *PHI = *I;
+ // There was a single result for this phi; just use that.
+ if (Constant *SingleResult = SingleResults[PHI]) {
+ PHI->addIncoming(SingleResult, LookupBB);
+ continue;
+ }
+
+ Value *GEPIndices[] = { Builder.getInt32(0), TableIndex };
+ Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices,
+ "switch.gep");
+ Value *Result = Builder.CreateLoad(GEP, "switch.load");
+
+ // If the result is only going to be used to return from the function,
+ // we want to do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) {
+ if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ }
+ }
+
+ if (!ReturnedEarly)
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == SI->getDefaultDest()) continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ return true;
+}
+
bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// If this switch is too complex to want to look at, ignore it.
if (!isValueEqualityComparison(SI))
@@ -3016,6 +3299,9 @@
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
+ if (SwitchToLookupTable(SI, Builder))
+ return SimplifyCFG(BB) | true;
+
return false;
}
Modified: llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll?rev=163302&r1=163301&r2=163302&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/switch_create.ll Thu Sep 6 04:43:28 2012
@@ -141,8 +141,9 @@
ret i1 %UnifiedRetVal
; CHECK: @test6
-; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
+; CHECK: %switch.tableidx = sub i32 %tmp.2.i, 14
+; CHECK: %0 = icmp ult i32 %switch.tableidx, 6
+; CHECK: select i1 %0, i1 true, i1 false
}
define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
Added: llvm/trunk/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll?rev=163302&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll Thu Sep 6 04:43:28 2012
@@ -0,0 +1,140 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The table for @f
+; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
+
+; The float table for @h
+; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+
+; The int table for @h
+; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8] c"*\09X\05"
+
+; The table for @foostring
+; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
+
+; A simple int-to-int selection switch.
+; It is dense enough to be replaced by table lookup.
+; The result is directly by a ret from an otherwise empty bb,
+; so we return early, directly from the lookup bb.
+
+define i32 @f(i32 %c) nounwind uwtable readnone {
+entry:
+ switch i32 %c, label %sw.default [
+ i32 42, label %return
+ i32 43, label %sw.bb1
+ i32 44, label %sw.bb2
+ i32 45, label %sw.bb3
+ i32 46, label %sw.bb4
+ i32 47, label %sw.bb5
+ i32 48, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+
+; CHECK: @f
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
+; CHECK: return:
+; CHECK-NEXT: ret i32 15
+}
+
+; A switch used to initialize two variables, an i8 and a float.
+
+declare void @dummy(i8 signext, float)
+define void @h(i32 %x) {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %sw.epilog
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+
+sw.epilog:
+ %a.0 = phi i8 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ]
+ %b.0 = phi float [ 0x4023FAE140000000, %sw.default ], [ 0x4001AE1480000000, %sw.bb3 ], [ 0x4012449BA0000000, %sw.bb2 ], [ 0x3FF3BE76C0000000, %sw.bb1 ], [ 0x40091EB860000000, %entry ]
+ call void @dummy(i8 signext %a.0, float %b.0)
+ ret void
+
+; CHECK: @h
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table2, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i8* %switch.gep
+; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load2 = load float* %switch.gep1
+; CHECK-NEXT: br label %sw.epilog
+; CHECK: sw.epilog:
+; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
+; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
+; CHECK-NEXT: ret void
+}
+
+
+; Switch used to return a string.
+
+ at .str = private unnamed_addr constant [4 x i8] c"foo\00", align 1
+ at .str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1
+ at .str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1
+ at .str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1
+ at .str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
+
+define i8* @foostring(i32 %x) {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %return
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+
+return:
+ %retval.0 = phi i8* [ getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0), %sw.default ],
+ [ getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+ [ getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+ [ getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+ [ getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), %entry ]
+ ret i8* %retval.0
+
+; CHECK: @foostring
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i8** %switch.gep
+; CHECK-NEXT: ret i8* %switch.load
+}
More information about the llvm-commits
mailing list