[LLVMdev] Switch containing holes via table lookup
Jasper Neumann
jn at sirrida.de
Tue Feb 11 14:06:14 PST 2014
Hi folks!
In /lib/Transforms/Utils/SimplifyCFG.cpp is optimization code which
converts a switch statement to a table lookup but has problems when
there are holes in the cases list and the default case can not be served
with the table.
My first attempt to fix this is done by additionally testing a small set.
As an example the function
==>
unsigned test(unsigned x) {
switch (x) {
case 100: return 0;
case 101: return 1;
case 103: return 2;
case 105: return 3;
case 107: return 4;
case 109: return 5;
case 110: return 6;
default: return x*3;
}
}
<==
can be converted to
==>
.file "t.cpp"
.text
.globl _Z4testj
.align 16, 0x90
.type _Z4testj, at function
_Z4testj: # @_Z4testj
.cfi_startproc
# BB#0: # %entry
# kill: EDI<def> EDI<kill>
RDI<def>
leal -100(%rdi), %eax
cmpl $11, %eax
jae .LBB0_3
# BB#1: # %switch.lookup
movl $1707, %ecx # imm = 0x6AB
btl %eax, %ecx
jae .LBB0_3
# BB#2: # %switch.lookup2
cltq
movl .Lswitch.table(,%rax,4), %eax
retq
.LBB0_3: # %sw.default
leal (%rdi,%rdi,2), %eax
retq
.Ltmp0:
.size _Z4testj, .Ltmp0-_Z4testj
.cfi_endproc
.type .Lswitch.table, at object # @switch.table
.section .rodata,"a", at progbits
.align 16
.Lswitch.table:
.long 0 # 0x0
.long 1 # 0x1
.long 0 # 0x0
.long 2 # 0x2
.long 0 # 0x0
.long 3 # 0x3
.long 0 # 0x0
.long 4 # 0x4
.long 0 # 0x0
.long 5 # 0x5
.long 6 # 0x6
.size .Lswitch.table, 44
.ident "clang version 3.5 (trunk 200617)"
.section ".note.GNU-stack","", at progbits
<==
By the way, what for is "cltq" just before accessing the value table?
The upper dword of rax should be zero.
Unfortunately my patch does not always work and the compiler crashes.
What have I overlooked? Please help.
Best regards
Jasper
PS: I posted a very similar message to llvm-commits at cs.uiuc.edu but got
no response; perhaps this was the wrong place to discuss it.
-------------- next part --------------
Index: SimplifyCFG.cpp
===================================================================
--- SimplifyCFG.cpp (revision 200617)
+++ SimplifyCFG.cpp (working copy)
@@ -66,6 +66,10 @@
"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
cl::desc("Hoist conditional stores if an unconditional store precedes"));
+static cl::opt<bool> CheckHoles(
+ "simplifycfg-check-holes", cl::Hidden, cl::init(true),
+ cl::desc("Allow holes in a value table by testing a mask"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -3745,12 +3749,33 @@
uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
bool TableHasHoles = (NumResults < TableSize);
- // If the table has holes, we need a constant result for the default case.
+ unsigned NeededMaskSize = 0;
+ if (CheckHoles) {
+ if (TableSize <= 32 && TD->fitsInLegalInteger(32))
+ NeededMaskSize = 32;
+ else if (TableSize <= 64 && TD->fitsInLegalInteger(64))
+ NeededMaskSize = 64;
+ }
+
+ // If the table has holes, we need a constant result for the default case...
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
- if (TableHasHoles && !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList, TD))
+ bool NeedMask = (TableHasHoles &&
+ !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList, TD));
+ // ...or need to check for valid values with a sufficiently small bit set.
+ if (NeedMask && NeededMaskSize == 0)
return false;
+ if (NeedMask) {
+ // As an extra penalty for the validity test we require more cases.
+ if (SI->getNumCases() < 4) // TODO: Make threshold configurable.
+ return false;
+ // We need any value to fill the table; the first one suffices.
+ SwitchInst::CaseIt CI = SI->case_begin();
+ GetCaseResults(SI, CI.getCaseValue(), CI.getCaseSuccessor(),
+ &CommonDest, DefaultResultsList, TD);
+ }
+
for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
PHINode *PHI = DefaultResultsList[I].first;
Constant *Result = DefaultResultsList[I].second;
@@ -3796,6 +3821,43 @@
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
+
+ if (NeedMask) {
+ uint64_t UseMask = 0;
+ for (SwitchInst::CaseIt CI = SI->case_begin(), E = SI->case_end();
+ CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ UseMask |= 1ULL <<
+ (CaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue();
+ }
+
+ BasicBlock *MaskBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup2",
+ CommonDest->getParent(),
+ CommonDest);
+ ConstantInt *Zero;
+ ConstantInt *One;
+ ConstantInt *Mask;
+ Value *CmpVal;
+ if (NeededMaskSize == 32) {
+ Zero = Builder.getInt32(0);
+ One = Builder.getInt32(1);
+ Mask = Builder.getInt32(UseMask);
+ CmpVal = Builder.CreateZExtOrTrunc(TableIndex, Builder.getInt32Ty());
+ } else {
+ Zero = Builder.getInt64(0);
+ One = Builder.getInt64(1);
+ Mask = Builder.getInt64(UseMask);
+ CmpVal = Builder.CreateZExtOrTrunc(TableIndex, Builder.getInt64Ty());
+ }
+ Value *Shr = Builder.CreateLShr(Mask, CmpVal);
+ Value *And = Builder.CreateAnd(Shr, One);
+ Value *CmpZ = Builder.CreateICmpNE(And, Zero);
+ Builder.CreateCondBr(CmpZ, MaskBB, SI->getDefaultDest());
+ Builder.SetInsertPoint(MaskBB);
+ LookupBB = MaskBB;
+ }
+
bool ReturnedEarly = false;
for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
PHINode *PHI = PHIs[I];
More information about the llvm-dev
mailing list