Switch containing holes via table lookup, version 3

Thu Mar 6 13:55:17 PST 2014

Hi Jasper,

Apologies for the long delay. Comments below:

> Index: SimplifyCFG.cpp
> ===================================================================
> --- SimplifyCFG.cpp (revision 202101)
> +++ SimplifyCFG.cpp (working copy)
> @@ -68,6 +68,7 @@
>
>  STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
>  STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
> +STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
>  STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
>  STATISTIC(NumSpeculations, "Number of speculative executed instructions");
>
> @@ -3453,7 +3454,8 @@
>                        ConstantInt *Offset,
>               const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
>                        Constant *DefaultValue,
> -                      const DataLayout *DL);
> +                      const DataLayout *DL,
> +                      bool IgnoreHoles);

We're not really ignoring the holes though, we're just going to fill
them with TableContents[0]. How about we rename "DefaultValue" to
"HoleValue" or something like that, and just pass in TableContents[0]
to it directly when we're using a bitmask?

>
>      /// BuildLookup - Build instructions with Builder to retrieve the value at
>      /// the position given by Index in the lookup table.
> @@ -3500,7 +3502,8 @@
>                                       ConstantInt *Offset,
>               const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
>                                       Constant *DefaultValue,
> -                                     const DataLayout *DL)
> +                                     const DataLayout *DL,
> +                                     bool IgnoreHoles)
>      : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
>    assert(Values.size() && "Can't build lookup table without values!");
>    assert(TableSize >= Values.size() && "Can't fit values in table!");
> @@ -3527,14 +3530,19 @@
>
>    // Fill in any holes in the table with the default result.
>    if (Values.size() < TableSize) {
> -    assert(DefaultValue && "Need a default value to fill the lookup table holes.");
> -    assert(DefaultValue->getType() == ValueType);
> +    Constant *DV;
> +    if (DefaultValue && !IgnoreHoles)
> +      DV = DefaultValue;
> +    else
> +      DV = TableContents[0];  // The first entry is not a hole.

If we'd simply pass in TableContents[0] to DefaultValue (renamed to
HoleValue), we wouldn't have to do this little dance.

> +    assert(DV && "Need a default value to fill the lookup table holes.");
> +    assert(DV->getType() == ValueType);
>      for (uint64_t I = 0; I < TableSize; ++I) {
>        if (!TableContents[I])
> -        TableContents[I] = DefaultValue;
> +        TableContents[I] = DV;
>      }
>
> -    if (DefaultValue != SingleValue)
> +    if (DV != SingleValue)
>        SingleValue = 0;
>    }
>
> @@ -3745,12 +3753,26 @@
>    uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
>    bool TableHasHoles = (NumResults < TableSize);
>
> -  // If the table has holes, we need a constant result for the default case.
> +  // If the table has holes, we need a constant result for the default case...
>    SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
> -  if (TableHasHoles && !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
> -                                       DefaultResultsList, DL))
> +  bool GotCaseResults = false;
> +  if (TableHasHoles) {
> +    GotCaseResults = GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
> +                                    DefaultResultsList, DL);
> +  }
> +  // ...or need to check for valid values with a sufficiently small bit set.
> +  bool NeedMask = (TableHasHoles && !GotCaseResults);

How about renmaing GotCaseResults to HasDefaultResults? I think that's
easier to understand.

> +  if (NeedMask && !DL->fitsInLegalInteger(TableSize))

DL can be NULL here. This is causing the segfaults in the lit tests
you mentioned

>      return false;
>
> +  // Now we either can fill all holes (if present) with the default value
> +  // or we can check for the holes with a suitable bitmask.
> +  if (NeedMask) {
> +    // As an extra penalty for the validity test we require more cases.
> +    if (SI->getNumCases() < 4)  // TODO: Make threshold configurable.
> +      return false;

I think this extra getNumCases() check should come right after we've
defined NeedMask above.

> +  }
> +
>    for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
>      PHINode *PHI = DefaultResultsList[I].first;
>      Constant *Result = DefaultResultsList[I].second;
> @@ -3796,12 +3818,53 @@
>
>    // Populate the BB that does the lookups.
>    Builder.SetInsertPoint(LookupBB);
> +
> +  if (NeedMask) {
> +    // Insert code which checks a bitmask to check for valid table indices.
> +    BasicBlock *MaskBB = LookupBB;
> +    MaskBB->setName("hole_check");

It feels a little odd that we kind of take LookupBB here, rename it,
and then create a new one :) The code above which creates branches to
LookupBB is actually branching to this hole_check bb.

I wonder if it would be easier to follow if we first created LookupBB,
then if NeedMask is true we create HoleCheckBB. When we create the
branches above we branch to HoleCheckBB if it's non-NULL, and LookupBB
otherwise.

> +    LookupBB = BasicBlock::Create(Mod.getContext(),
> +                                  "switch.lookup",
> +                                  CommonDest->getParent(),
> +                                  CommonDest);
> +
> +    // Build bitmask; fill in a 1 bit for every case.
> +    APInt MaskInt(TableSize, 0);
> +    APInt One(TableSize, 1);
> +    APInt Zero(TableSize, 0);
> +    const ResultListTy &ResultList = ResultLists[PHIs[0]];
> +    for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
> +      uint64_t Idx = (ResultList[I].first->getValue() -
> +                      MinCaseVal->getValue()).getLimitedValue();
> +      MaskInt |= One << Idx;
> +    }
> +    ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
> +
> +    // Get the TableIndex'th bit of the bitmask.
> +    // If this bit is 0 (meaning hole) jump to the default destination,
> +    // else continue with table lookup.
> +    IntegerType *MapTy = TableMask->getType();
> +    Value *CmpVal = Builder.CreateZExtOrTrunc(TableIndex, MapTy,
> +                                              "switch.cmpval");
> +    Value *Shifted = Builder.CreateLShr(TableMask, CmpVal,
> +                                        "switch.shifted");
> +    Value *Masked = Builder.CreateAnd(Shifted,
> +                                      ConstantInt::get(Mod.getContext(), One),
> +                                      "switch.masked");
> +    Value *CmpZ = Builder.CreateICmpNE(Masked,
> +                                       ConstantInt::get(Mod.getContext(), Zero),
> +                                       "switch.cmpz");
> +    Builder.CreateCondBr(CmpZ, LookupBB, SI->getDefaultDest());

Like I said before, I wonder if this could be simplified. To read the
n'th least significant bit from x, we should be able to just do (x >>
n), then truncate that to i1, and stick that in the CondBr directly.
What do you think? (We'd have to check that it still gets lowere to a
bt instruction of course.)

On Wed, Feb 26, 2014 at 9:23 AM, Jasper Neumann <jn at sirrida.de> wrote:
> Hi folks!
>
> There is code which converts a switch statement to a table lookup but has
> problems when there are holes in the cases and the default case can not be
> served with the table.
>
> The affected file is /lib/Transforms/Utils/SimplifyCFG.cpp .
> This is done by additionally testing a small set.
>
> As an example the function
> ==>
> unsigned test(unsigned x) {
>   switch (x) {
>     case 100: return 0;
>     case 101: return 1;
>     case 103: return 2;
>     case 105: return 3;
>     case 107: return 4;
>     case 109: return 5;
>     case 110: return 6;
>     default: return x*3;
>     }
>   }
> <==
> will be converted with clang -O3 -S test.cpp to
> ==>
>         .file   "test.cpp"
>         .text
>         .globl  _Z4testj
>         .align  16, 0x90
>         .type   _Z4testj, at function
> _Z4testj:                               # @_Z4testj
>         .cfi_startproc
> # BB#0:                                 # %entry
>                                         # kill: EDI<def> EDI<kill> RDI<def>
>         leal    -100(%rdi), %eax
>         cmpl    $11, %eax
>         jae     .LBB0_3
> # BB#1:                                 # %hole_check
>         movl    $1707, %ecx             # imm = 0x6AB
>         btl     %eax, %ecx
>         jae     .LBB0_3
> # BB#2:                                 # %switch.lookup
>         cltq
>         movl    .Lswitch.table(,%rax,4), %eax
>         retq
> .LBB0_3:                                # %sw.default
>         leal    (%rdi,%rdi,2), %eax
>         retq
> .Ltmp0:
>         .size   _Z4testj, .Ltmp0-_Z4testj
>         .cfi_endproc
>
>         .type   .Lswitch.table, at object  # @switch.table
>         .section        .rodata,"a", at progbits
>         .align  16
> .Lswitch.table:
>         .long   0                       # 0x0
>         .long   1                       # 0x1
>         .long   0                       # 0x0
>         .long   2                       # 0x2
>         .long   0                       # 0x0
>         .long   3                       # 0x3
>         .long   0                       # 0x0
>         .long   4                       # 0x4
>         .long   0                       # 0x0
>         .long   5                       # 0x5
>         .long   6                       # 0x6
>         .size   .Lswitch.table, 44
>
>
>         .ident  "clang version 3.5 (trunk 202101)"
>         .section        ".note.GNU-stack","", at progbits
> <==
>
> I applied this patch to compile the clang/llvm project. There were 1290
> switches converted without needing a mask and additional 156 ones needing a
> mask.
> Probably many more could be converted using perfect hashing, but this is a
> different story...
>
> Best regards
> Jasper
> PS: Many thanks to Hans Wennborg to getting it run!