[LLVMdev] Switch containing holes via table lookup

Jasper Neumann jn at sirrida.de
Tue Feb 11 14:06:14 PST 2014


Hi folks!

In /lib/Transforms/Utils/SimplifyCFG.cpp is optimization code which 
converts a switch statement to a table lookup but has problems when 
there are holes in the cases list and the default case can not be served 
with the table.

My first attempt to fix this is done by additionally testing a small set.

As an example the function
==>
unsigned test(unsigned x) {
    switch (x) {
      case 100: return 0;
      case 101: return 1;
      case 103: return 2;
      case 105: return 3;
      case 107: return 4;
      case 109: return 5;
      case 110: return 6;
      default: return x*3;
      }
    }
<==
can be converted to
==>
	.file	"t.cpp"
	.text
	.globl	_Z4testj
	.align	16, 0x90
	.type	_Z4testj, at function
_Z4testj:                               # @_Z4testj
	.cfi_startproc
# BB#0:                                 # %entry
                                          # kill: EDI<def> EDI<kill> 
RDI<def>
	leal	-100(%rdi), %eax
	cmpl	$11, %eax
	jae	.LBB0_3
# BB#1:                                 # %switch.lookup
	movl	$1707, %ecx             # imm = 0x6AB
	btl	%eax, %ecx
	jae	.LBB0_3
# BB#2:                                 # %switch.lookup2
	cltq
	movl	.Lswitch.table(,%rax,4), %eax
	retq
.LBB0_3:                                # %sw.default
	leal	(%rdi,%rdi,2), %eax
	retq
.Ltmp0:
	.size	_Z4testj, .Ltmp0-_Z4testj
	.cfi_endproc

	.type	.Lswitch.table, at object  # @switch.table
	.section	.rodata,"a", at progbits
	.align	16
.Lswitch.table:
	.long	0                       # 0x0
	.long	1                       # 0x1
	.long	0                       # 0x0
	.long	2                       # 0x2
	.long	0                       # 0x0
	.long	3                       # 0x3
	.long	0                       # 0x0
	.long	4                       # 0x4
	.long	0                       # 0x0
	.long	5                       # 0x5
	.long	6                       # 0x6
	.size	.Lswitch.table, 44


	.ident	"clang version 3.5 (trunk 200617)"
	.section	".note.GNU-stack","", at progbits
<==

By the way, what for is "cltq" just before accessing the value table?
The upper dword of rax should be zero.

Unfortunately my patch does not always work and the compiler crashes.
What have I overlooked? Please help.

Best regards
Jasper
PS: I posted a very similar message to llvm-commits at cs.uiuc.edu but got 
no response; perhaps this was the wrong place to discuss it.
-------------- next part --------------
Index: SimplifyCFG.cpp
===================================================================
--- SimplifyCFG.cpp	(revision 200617)
+++ SimplifyCFG.cpp	(working copy)
@@ -66,6 +66,10 @@
     "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
     cl::desc("Hoist conditional stores if an unconditional store precedes"));
 
+static cl::opt<bool> CheckHoles(
+    "simplifycfg-check-holes", cl::Hidden, cl::init(true),
+    cl::desc("Allow holes in a value table by testing a mask"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
 STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -3745,12 +3749,33 @@
   uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
   bool TableHasHoles = (NumResults < TableSize);
 
-  // If the table has holes, we need a constant result for the default case.
+  unsigned NeededMaskSize = 0;
+  if (CheckHoles) {
+    if (TableSize <= 32 && TD->fitsInLegalInteger(32))
+      NeededMaskSize = 32;
+    else if (TableSize <= 64 && TD->fitsInLegalInteger(64))
+      NeededMaskSize = 64;
+  }
+
+  // If the table has holes, we need a constant result for the default case...
   SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
-  if (TableHasHoles && !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
-                                       DefaultResultsList, TD))
+  bool NeedMask = (TableHasHoles &&
+                  !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+                                  DefaultResultsList, TD));
+  // ...or need to check for valid values with a sufficiently small bit set.
+  if (NeedMask && NeededMaskSize == 0)
     return false;
 
+  if (NeedMask) {
+    // As an extra penalty for the validity test we require more cases.
+    if (SI->getNumCases() < 4)  // TODO: Make threshold configurable.
+      return false;
+    // We need any value to fill the table; the first one suffices.
+    SwitchInst::CaseIt CI = SI->case_begin();
+    GetCaseResults(SI, CI.getCaseValue(), CI.getCaseSuccessor(),
+                   &CommonDest, DefaultResultsList, TD);
+  }
+
   for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
     PHINode *PHI = DefaultResultsList[I].first;
     Constant *Result = DefaultResultsList[I].second;
@@ -3796,6 +3821,43 @@
 
   // Populate the BB that does the lookups.
   Builder.SetInsertPoint(LookupBB);
+
+  if (NeedMask) {
+    uint64_t UseMask = 0;
+    for (SwitchInst::CaseIt CI = SI->case_begin(), E = SI->case_end();
+         CI != E; ++CI) {
+      ConstantInt *CaseVal = CI.getCaseValue();
+      UseMask |= 1ULL <<
+        (CaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue();
+    }
+
+    BasicBlock *MaskBB = BasicBlock::Create(Mod.getContext(),
+                                            "switch.lookup2",
+                                            CommonDest->getParent(),
+                                            CommonDest);
+    ConstantInt *Zero;
+    ConstantInt *One;
+    ConstantInt *Mask;
+    Value *CmpVal;
+    if (NeededMaskSize == 32) {
+      Zero = Builder.getInt32(0);
+      One = Builder.getInt32(1);
+      Mask = Builder.getInt32(UseMask);
+      CmpVal = Builder.CreateZExtOrTrunc(TableIndex, Builder.getInt32Ty());
+    } else {
+      Zero = Builder.getInt64(0);
+      One = Builder.getInt64(1);
+      Mask = Builder.getInt64(UseMask);
+      CmpVal = Builder.CreateZExtOrTrunc(TableIndex, Builder.getInt64Ty());
+    }
+    Value *Shr = Builder.CreateLShr(Mask, CmpVal);
+    Value *And = Builder.CreateAnd(Shr, One);
+    Value *CmpZ = Builder.CreateICmpNE(And, Zero);
+    Builder.CreateCondBr(CmpZ, MaskBB, SI->getDefaultDest());
+    Builder.SetInsertPoint(MaskBB);
+    LookupBB = MaskBB;
+  }
+
   bool ReturnedEarly = false;
   for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
     PHINode *PHI = PHIs[I];


More information about the llvm-dev mailing list