[cfe-commits] r74919 - /cfe/trunk/lib/Lex/Lexer.cpp

Chris Lattner sabre at nondot.org
Tue Jul 7 10:09:55 PDT 2009


Author: lattner
Date: Tue Jul  7 12:09:54 2009
New Revision: 74919

URL: http://llvm.org/viewvc/llvm-project?rev=74919&view=rev
Log:
Convert the CharInfo table to be statically initialized, instead of dynamically initialized.  Patch by Ryan Flynn!

Modified:
    cfe/trunk/lib/Lex/Lexer.cpp

Modified: cfe/trunk/lib/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=74919&r1=74918&r2=74919&view=diff

==============================================================================
--- cfe/trunk/lib/Lex/Lexer.cpp (original)
+++ cfe/trunk/lib/Lex/Lexer.cpp Tue Jul  7 12:09:54 2009
@@ -242,8 +242,6 @@
 // Character information.
 //===----------------------------------------------------------------------===//
 
-static unsigned char CharInfo[256];
-
 enum {
   CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0'
   CHAR_VERT_WS  = 0x02,  // '\r', '\n'
@@ -253,25 +251,98 @@
   CHAR_PERIOD   = 0x20   // .
 };
 
+// Statically initialize CharInfo table based on ASCII character set
+// Reference: FreeBSD 7.2 /usr/share/misc/ascii
+static const unsigned char CharInfo[256] =
+{
+// 0 NUL         1 SOH         2 STX         3 ETX
+// 4 EOT         5 ENQ         6 ACK         7 BEL
+   0           , 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+// 8 BS          9 HT         10 NL         11 VT
+//12 NP         13 CR         14 SO         15 SI
+   0           , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS,
+   CHAR_HORZ_WS, CHAR_VERT_WS, 0           , 0           ,
+//16 DLE        17 DC1        18 DC2        19 DC3
+//20 DC4        21 NAK        22 SYN        23 ETB
+   0           , 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//24 CAN        25 EM         26 SUB        27 ESC
+//28 FS         29 GS         30 RS         31 US
+   0           , 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//32 SP         33  !         34  "         35  #
+//36  $         37  %         38  &         39  '
+   CHAR_HORZ_WS, 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//40  (         41  )         42  *         43  +
+//44  ,         45  -         46  .         47  /
+   0           , 0           , 0           , 0           ,
+   0           , 0           , CHAR_PERIOD , 0           ,
+//48  0         49  1         50  2         51  3
+//52  4         53  5         54  6         55  7
+   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
+   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
+//56  8         57  9         58  :         59  ;
+//60  <         61  =         62  >         63  ?
+   CHAR_NUMBER , CHAR_NUMBER , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//64  @         65  A         66  B         67  C
+//68  D         69  E         70  F         71  G
+   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//72  H         73  I         74  J         75  K
+//76  L         77  M         78  N         79  O
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//80  P         81  Q         82  R         83  S
+//84  T         85  U         86  V         87  W
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//88  X         89  Y         90  Z         91  [
+//92  \         93  ]         94  ^         95  _
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
+   0           , 0           , 0           , CHAR_UNDER  ,
+//96  `         97  a         98  b         99  c
+//100  d       101  e        102  f        103  g
+   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//104  h       105  i        106  j        107  k
+//108  l       109  m        110  n        111  o
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//112  p       113  q        114  r        115  s
+//116  t       117  u        118  v        119  w
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//120  x       121  y        122  z        123  {
+//124  |        125  }        126  ~        127 DEL
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
+   0           , 0           , 0           , 0
+};
+
 static void InitCharacterInfo() {
   static bool isInited = false;
   if (isInited) return;
-  isInited = true;
-  
-  // Intiialize the CharInfo table.
-  // TODO: statically initialize this.
-  CharInfo[(int)' '] = CharInfo[(int)'\t'] = 
-  CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
-  CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
-  
-  CharInfo[(int)'_'] = CHAR_UNDER;
-  CharInfo[(int)'.'] = CHAR_PERIOD;
-  for (unsigned i = 'a'; i <= 'z'; ++i)
-    CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
+  // check the statically-initialized CharInfo table
+  assert(CHAR_HORZ_WS == CharInfo[(int)' ']);
+  assert(CHAR_HORZ_WS == CharInfo[(int)'\t']);
+  assert(CHAR_HORZ_WS == CharInfo[(int)'\f']);
+  assert(CHAR_HORZ_WS == CharInfo[(int)'\v']);
+  assert(CHAR_VERT_WS == CharInfo[(int)'\n']);
+  assert(CHAR_VERT_WS == CharInfo[(int)'\r']);
+  assert(CHAR_UNDER   == CharInfo[(int)'_']);
+  assert(CHAR_PERIOD  == CharInfo[(int)'.']);
+  for (unsigned i = 'a'; i <= 'z'; ++i) {
+    assert(CHAR_LETTER == CharInfo[i]);
+    assert(CHAR_LETTER == CharInfo[i+'A'-'a']);
+  }
   for (unsigned i = '0'; i <= '9'; ++i)
-    CharInfo[i] = CHAR_NUMBER;
+    assert(CHAR_NUMBER == CharInfo[i]);
+  isInited = true;
 }
 
+
 /// isIdentifierBody - Return true if this is the body character of an
 /// identifier, which is [a-zA-Z0-9_].
 static inline bool isIdentifierBody(unsigned char c) {





More information about the cfe-commits mailing list