[cfe-commits] r39150 - in /cfe/cfe/trunk: AST/Sema.cpp AST/Sema.h AST/SemaDecl.cpp Parse/ParseDecl.cpp Parse/ParseExpr.cpp Parse/Parser.cpp Sema/Sema.cpp Sema/Sema.h Sema/SemaDecl.cpp clang.xcodeproj/project.pbxproj include/clang/Parse/Action.h include/clang/Parse/Parser.h

sabre at cs.uiuc.edu sabre at cs.uiuc.edu
Wed Jul 11 09:39:57 PDT 2007


Author: sabre
Date: Wed Jul 11 11:39:57 2007
New Revision: 39150

URL: http://llvm.org/viewvc/llvm-project?rev=39150&view=rev
Log:
Change courses on how we do semantic analysis.  Semantic analysis
fundamentally requires having an AST around, so move all sema to the
AST library.  This is the first step, later steps will be needed to
clean up libast.

Added:
    cfe/cfe/trunk/AST/SemaDecl.cpp   (with props)
    cfe/cfe/trunk/Sema/SemaDecl.cpp   (with props)
Modified:
    cfe/cfe/trunk/AST/Sema.cpp
    cfe/cfe/trunk/AST/Sema.h
    cfe/cfe/trunk/Parse/ParseDecl.cpp
    cfe/cfe/trunk/Parse/ParseExpr.cpp
    cfe/cfe/trunk/Parse/Parser.cpp
    cfe/cfe/trunk/Sema/Sema.cpp
    cfe/cfe/trunk/Sema/Sema.h
    cfe/cfe/trunk/clang.xcodeproj/project.pbxproj
    cfe/cfe/trunk/include/clang/Parse/Action.h
    cfe/cfe/trunk/include/clang/Parse/Parser.h

Modified: cfe/cfe/trunk/AST/Sema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/Sema.cpp?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/AST/Sema.cpp (original)
+++ cfe/cfe/trunk/AST/Sema.cpp Wed Jul 11 11:39:57 2007
@@ -13,13 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ASTBuilder.h"
-#include "clang/Parse/Action.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
+#include "clang/Parse/Action.h"
 #include "clang/Parse/Scope.h"
 #include "clang/Lex/IdentifierTable.h"
 #include "clang/Lex/Preprocessor.h"
-#include "llvm/Support/Compiler.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 using namespace clang;
 
@@ -214,18 +216,225 @@
   return Val;
 }
 
+
+
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  if (C >= 'A' && C <= 'F') return C-'A'+10;
+  return -1;
+}
+
+/// ParseStringExpr - The specified tokens were lexed as pasted string
+/// fragments (e.g. "foo" "bar" L"baz").
+
 /// ParseStringExpr - This accepts a string after semantic analysis. This string
 /// may be the result of string concatenation ([C99 5.1.1.2, translation phase
 /// #6]), so it may come from multiple tokens.
 /// 
-Action::ExprResult ASTBuilder::
-ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
-                SourceLocation *TokLocs, unsigned NumToks) {
-  assert(NumToks && "Must have at least one string!");
-  return new StringExpr(StrData, StrLen, isWide);
+Action::ExprResult
+ASTBuilder::ParseStringExpr(const LexerToken *StringToks,
+                            unsigned NumStringToks) {
+  assert(NumStringToks && "Must have at least one string!");
+
+  // Scan all of the string portions, remember the max individual token length,
+  // computing a bound on the concatenated string length, and see whether any
+  // piece is a wide-string.  If any of the string portions is a wide-string
+  // literal, the result is a wide-string literal [C99 6.4.5p4].
+  unsigned MaxTokenLength = StringToks[0].getLength();
+  unsigned SizeBound = StringToks[0].getLength()-2;  // -2 for "".
+  bool AnyWide = StringToks[0].getKind() == tok::wide_string_literal;
+  
+  // The common case is that there is only one string fragment.
+  for (unsigned i = 1; i != NumStringToks; ++i) {
+    // The string could be shorter than this if it needs cleaning, but this is a
+    // reasonable bound, which is all we need.
+    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+
+    // Remember maximum string piece length.
+    if (StringToks[i].getLength() > MaxTokenLength) 
+      MaxTokenLength = StringToks[i].getLength();
+    
+    // Remember if we see any wide strings.
+    AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
+  }
+  
+  
+  // Include space for the null terminator.
+  ++SizeBound;
+  
+  // TODO: K&R warning: "traditional C rejects string constant concatenation"
+  
+  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
+  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
+  unsigned wchar_tByteWidth = ~0U;
+  if (AnyWide)
+    wchar_tByteWidth =
+      PP.getTargetInfo().getWCharWidth(StringToks[0].getLocation());
+  
+  // The output buffer size needs to be large enough to hold wide characters.
+  // This is a worst-case assumption which basically corresponds to L"" "long".
+  if (AnyWide)
+    SizeBound *= wchar_tByteWidth;
+  
+  // Create a temporary buffer to hold the result string data.
+  SmallString<512> ResultBuf;
+  ResultBuf.resize(SizeBound);
+  
+  // Likewise, but for each string piece.
+  SmallString<512> TokenBuf;
+  TokenBuf.resize(MaxTokenLength);
+  
+  // Loop over all the strings, getting their spelling, and expanding them to
+  // wide strings as appropriate.
+  char *ResultPtr = &ResultBuf[0];   // Next byte to fill in.
+  
+  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+    const char *ThisTokBuf = &TokenBuf[0];
+    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
+    // that ThisTokBuf points to a buffer that is big enough for the whole token
+    // and 'spelled' tokens can only shrink.
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    
+    // TODO: Input character set mapping support.
+    
+    // Skip L marker for wide strings.
+    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
+    
+    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    ++ThisTokBuf;
+    
+    while (ThisTokBuf != ThisTokEnd) {
+      // Is this a span of non-escape characters?
+      if (ThisTokBuf[0] != '\\') {
+        const char *InStart = ThisTokBuf;
+        do {
+          ++ThisTokBuf;
+        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+        
+        // Copy the character span over.
+        unsigned Len = ThisTokBuf-InStart;
+        if (!AnyWide) {
+          memcpy(ResultPtr, InStart, Len);
+          ResultPtr += Len;
+        } else {
+          // Note: our internal rep of wide char tokens is always little-endian.
+          for (; Len; --Len, ++InStart) {
+            *ResultPtr++ = InStart[0];
+            // Add zeros at the end.
+            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+              *ResultPtr++ = 0;
+          }
+        }
+        continue;
+      }
+      
+      // Otherwise, this is an escape character.  Skip the '\' char.
+      ++ThisTokBuf;
+      
+      // We know that this character can't be off the end of the buffer, because
+      // that would have been \", which would not have been the end of string.
+      unsigned ResultChar = *ThisTokBuf++;
+      switch (ResultChar) {
+      // These map to themselves.
+      case '\\': case '\'': case '"': case '?': break;
+        
+      // These have fixed mappings.
+      case 'a':
+        // TODO: K&R: the meaning of '\\a' is different in traditional C
+        ResultChar = 7;
+        break;
+      case 'b':
+        ResultChar = 8;
+        break;
+      case 'e':
+        PP.Diag(StringToks[i], diag::ext_nonstandard_escape, "e");
+        ResultChar = 27;
+        break;
+      case 'f':
+        ResultChar = 12;
+        break;
+      case 'n':
+        ResultChar = 10;
+        break;
+      case 'r':
+        ResultChar = 13;
+        break;
+      case 't':
+        ResultChar = 9;
+        break;
+      case 'v':
+        ResultChar = 11;
+        break;
+        
+      //case 'u': case 'U':  // FIXME: UCNs.
+      case 'x': // Hex escape.
+        if (ThisTokBuf == ThisTokEnd ||
+            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
+          PP.Diag(StringToks[i], diag::err_hex_escape_no_digits);
+          ResultChar = 0;
+          break;
+        }
+        ++ThisTokBuf; // Consumed one hex digit.
+        
+        assert(0 && "hex escape: unimp!");
+        break;
+      case '0': case '1': case '2': case '3':
+      case '4': case '5': case '6': case '7':
+        // Octal escapes.
+        assert(0 && "octal escape: unimp!");
+        break;
+        
+      // Otherwise, these are not valid escapes.
+      case '(': case '{': case '[': case '%':
+        // GCC accepts these as extensions.  We warn about them as such though.
+        if (!PP.getLangOptions().NoExtensions) {
+          PP.Diag(StringToks[i], diag::ext_nonstandard_escape,
+                  std::string()+(char)ResultChar);
+          break;
+        }
+        // FALL THROUGH.
+      default:
+        if (isgraph(ThisTokBuf[0])) {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  std::string()+(char)ResultChar);
+        } else {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  "x"+utohexstr(ResultChar));
+        }
+      }
+
+      // Note: our internal rep of wide char tokens is always little-endian.
+      *ResultPtr++ = ResultChar & 0xFF;
+      
+      if (AnyWide) {
+        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+          *ResultPtr++ = ResultChar >> i*8;
+      }
+    }
+  }
+  
+  // Add zero terminator.
+  *ResultPtr = 0;
+  if (AnyWide) {
+    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+      *ResultPtr++ = 0;
+  }
+  
+  SmallVector<SourceLocation, 4> StringTokLocs;
+  for (unsigned i = 0; i != NumStringToks; ++i)
+    StringTokLocs.push_back(StringToks[i].getLocation());
+  
+  // FIXME: use factory.
+  
+  // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
+  return new StringExpr(&ResultBuf[0], ResultPtr-&ResultBuf[0], AnyWide);
 }
 
-
 // Unary Operators.  'Tok' is the token for the operator.
 Action::ExprResult ASTBuilder::ParseUnaryOp(SourceLocation OpLoc,
                                             tok::TokenKind Op,

Modified: cfe/cfe/trunk/AST/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/Sema.h?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/AST/Sema.h (original)
+++ cfe/cfe/trunk/AST/Sema.h Wed Jul 11 11:39:57 2007
@@ -26,7 +26,7 @@
 /// builds AST nodes for the code being parsed.  Clients can either use this
 /// unmodified or subclass it and overload methods to do more specialized
 /// things.
-class ASTBuilder : public SemanticAction {
+class ASTBuilder : public Action {
   Preprocessor &PP;
   
   /// LastInGroupList - This vector is populated when there are multiple
@@ -99,10 +99,11 @@
   virtual ExprResult ParseFloatingConstant(SourceLocation Loc);
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val);
-  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
-                                     bool isWide,
-                                     SourceLocation *TokLocs, unsigned NumToks);
-  
+
+  /// ParseStringExpr - The specified tokens were lexed as pasted string
+  /// fragments (e.g. "foo" "bar" L"baz").
+  virtual ExprResult ParseStringExpr(const LexerToken *Toks, unsigned NumToks);
+    
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   virtual ExprResult ParseUnaryOp(SourceLocation OpLoc, tok::TokenKind Op,
                                   ExprTy *Input);

Added: cfe/cfe/trunk/AST/SemaDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/SemaDecl.cpp?rev=39150&view=auto

==============================================================================
--- cfe/cfe/trunk/AST/SemaDecl.cpp (added)
+++ cfe/cfe/trunk/AST/SemaDecl.cpp Wed Jul 11 11:39:57 2007
@@ -0,0 +1,18 @@
+//===--- SemaDecl.cpp - Semantic Analysis for Declarations ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements semantic analysis for declarations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Parse/Parser.h"
+#include "clang/Parse/SemaDeclSpec.h"
+using namespace llvm;
+using namespace clang;
+

Propchange: cfe/cfe/trunk/AST/SemaDecl.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/AST/SemaDecl.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Modified: cfe/cfe/trunk/Parse/ParseDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/ParseDecl.cpp?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/Parse/ParseDecl.cpp (original)
+++ cfe/cfe/trunk/Parse/ParseDecl.cpp Wed Jul 11 11:39:57 2007
@@ -145,8 +145,7 @@
       }
     }
     
-    // Inform the current actions module that we just parsed a declarator or
-    // invoke semantic analysis for this declarator.
+    // Inform the current actions module that we just parsed this declarator.
     // FIXME: pass asm & attributes.
     LastDeclInGroup = Actions.ParseDeclarator(CurScope, D, Init.Val,
                                               LastDeclInGroup);

Modified: cfe/cfe/trunk/Parse/ParseExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/ParseExpr.cpp?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/Parse/ParseExpr.cpp (original)
+++ cfe/cfe/trunk/Parse/ParseExpr.cpp Wed Jul 11 11:39:57 2007
@@ -21,10 +21,8 @@
 
 #include "clang/Parse/Parser.h"
 #include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 using namespace clang;
 
@@ -886,15 +884,6 @@
   return Result;
 }
 
-/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
-/// not valid.
-static int HexDigitValue(char C) {
-  if (C >= '0' && C <= '9') return C-'0';
-  if (C >= 'a' && C <= 'f') return C-'a'+10;
-  if (C >= 'A' && C <= 'F') return C-'A'+10;
-  return -1;
-}
-
 /// ParseStringLiteralExpression - This handles the various token types that
 /// form string literals, and also handles string concatenation [C99 5.1.1.2,
 /// translation phase #6].
@@ -908,26 +897,7 @@
   // considered to be strings for concatenation purposes.
   SmallVector<LexerToken, 4> StringToks;
   
-  // While we're looking at all of the string portions, remember the max
-  // individual token length, computing a bound on the concatenated string
-  // length, and see whether any piece is a wide-string.  If any of the string
-  // portions is a wide-string literal, the result is also a wide-string literal
-  // [C99 6.4.5p4].
-  unsigned SizeBound = 0, MaxTokenLength = 0;
-  bool AnyWide = false;
   do {
-    // The string could be shorter than this if it needs cleaning, but this is a
-    // reasonable bound, which is all we need.
-    SizeBound += Tok.getLength()-2;  // -2 for "".
-    
-    // Find maximum string piece length.
-    if (Tok.getLength() > MaxTokenLength) 
-      MaxTokenLength = Tok.getLength();
-    
-    // Remember if we see any wide strings.
-    AnyWide |= Tok.getKind() == tok::wide_string_literal;
-    
-    // Remember the string token.
     StringToks.push_back(Tok);
     ConsumeStringToken();
   } while (isTokenStringLiteral());
@@ -935,177 +905,6 @@
   
   // If using minimal actions, don't do any semantic analysis of the parsed
   // string fragments.
-  if (MinimalActions)
-    return MinimalActions->ParseStringExpr(&StringToks[0], StringToks.size());
-  
-  // Include space for the null terminator.
-  ++SizeBound;
-  
-  // TODO: K&R warning: "traditional C rejects string constant concatenation"
-  
-  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
-  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
-  unsigned wchar_tByteWidth = ~0U;
-  if (AnyWide)
-    wchar_tByteWidth=getTargetInfo().getWCharWidth(StringToks[0].getLocation());
-  
-  // The output buffer size needs to be large enough to hold wide characters.
-  // This is a worst-case assumption which basically corresponds to L"" "long".
-  if (AnyWide)
-    SizeBound *= wchar_tByteWidth;
-  
-  // Create a temporary buffer to hold the result string data.
-  SmallString<512> ResultBuf;
-  ResultBuf.resize(SizeBound);
-  
-  // Likewise, but for each string piece.
-  SmallString<512> TokenBuf;
-  TokenBuf.resize(MaxTokenLength);
-  
-  // Loop over all the strings, getting their spelling, and expanding them to
-  // wide strings as appropriate.
-  char *ResultPtr = &ResultBuf[0];   // Next byte to fill in.
-  
-  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
-    const char *ThisTokBuf = &TokenBuf[0];
-    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
-    // that ThisTokBuf points to a buffer that is big enough for the whole token
-    // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
-    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
-    
-    // TODO: Input character set mapping support.
-    
-    // Skip L marker for wide strings.
-    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
-    
-    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
-    ++ThisTokBuf;
-    
-    while (ThisTokBuf != ThisTokEnd) {
-      // Is this a span of non-escape characters?
-      if (ThisTokBuf[0] != '\\') {
-        const char *InStart = ThisTokBuf;
-        do {
-          ++ThisTokBuf;
-        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
-        
-        // Copy the character span over.
-        unsigned Len = ThisTokBuf-InStart;
-        if (!AnyWide) {
-          memcpy(ResultPtr, InStart, Len);
-          ResultPtr += Len;
-        } else {
-          // Note: our internal rep of wide char tokens is always little-endian.
-          for (; Len; --Len, ++InStart) {
-            *ResultPtr++ = InStart[0];
-            // Add zeros at the end.
-            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-              *ResultPtr++ = 0;
-          }
-        }
-        continue;
-      }
-      
-      // Otherwise, this is an escape character.  Skip the '\' char.
-      ++ThisTokBuf;
-      
-      // We know that this character can't be off the end of the buffer, because
-      // that would have been \", which would not have been the end of string.
-      unsigned ResultChar = *ThisTokBuf++;
-      switch (ResultChar) {
-      // These map to themselves.
-      case '\\': case '\'': case '"': case '?': break;
-        
-      // These have fixed mappings.
-      case 'a':
-        // TODO: K&R: the meaning of '\\a' is different in traditional C
-        ResultChar = 7;
-        break;
-      case 'b':
-        ResultChar = 8;
-        break;
-      case 'e':
-        PP.Diag(StringToks[i], diag::ext_nonstandard_escape, "e");
-        ResultChar = 27;
-        break;
-      case 'f':
-        ResultChar = 12;
-        break;
-      case 'n':
-        ResultChar = 10;
-        break;
-      case 'r':
-        ResultChar = 13;
-        break;
-      case 't':
-        ResultChar = 9;
-        break;
-      case 'v':
-        ResultChar = 11;
-        break;
-        
-      //case 'u': case 'U':  // FIXME: UCNs.
-      case 'x': // Hex escape.
-        if (ThisTokBuf == ThisTokEnd ||
-            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
-          PP.Diag(StringToks[i], diag::err_hex_escape_no_digits);
-          ResultChar = 0;
-          break;
-        }
-        ++ThisTokBuf; // Consumed one hex digit.
-        
-        assert(0 && "hex escape: unimp!");
-        break;
-      case '0': case '1': case '2': case '3':
-      case '4': case '5': case '6': case '7':
-        // Octal escapes.
-        assert(0 && "octal escape: unimp!");
-        break;
-        
-      // Otherwise, these are not valid escapes.
-      case '(': case '{': case '[': case '%':
-        // GCC accepts these as extensions.  We warn about them as such though.
-        if (!PP.getLangOptions().NoExtensions) {
-          PP.Diag(StringToks[i], diag::ext_nonstandard_escape,
-                  std::string()+(char)ResultChar);
-          break;
-        }
-        // FALL THROUGH.
-      default:
-        if (isgraph(ThisTokBuf[0])) {
-          PP.Diag(StringToks[i], diag::ext_unknown_escape,
-                  std::string()+(char)ResultChar);
-        } else {
-          PP.Diag(StringToks[i], diag::ext_unknown_escape,
-                  "x"+utohexstr(ResultChar));
-        }
-      }
-
-      // Note: our internal rep of wide char tokens is always little-endian.
-      *ResultPtr++ = ResultChar & 0xFF;
-      
-      if (AnyWide) {
-        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-          *ResultPtr++ = ResultChar >> i*8;
-      }
-    }
-  }
-  
-  // Add zero terminator.
-  *ResultPtr = 0;
-  if (AnyWide) {
-    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-      *ResultPtr++ = 0;
-  }
-  
-  SmallVector<SourceLocation, 4> StringTokLocs;
-  for (unsigned i = 0; i != StringToks.size(); ++i)
-    StringTokLocs.push_back(StringToks[i].getLocation());
-  
-  // Hand this off to the Actions.
-  return SemaActions->ParseStringExpr(&ResultBuf[0], ResultPtr-&ResultBuf[0],
-                                      AnyWide, &StringTokLocs[0],
-                                      StringTokLocs.size());
+  return Actions.ParseStringExpr(&StringToks[0], StringToks.size());
 }
 

Modified: cfe/cfe/trunk/Parse/Parser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/Parser.cpp?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/Parse/Parser.cpp (original)
+++ cfe/cfe/trunk/Parse/Parser.cpp Wed Jul 11 11:39:57 2007
@@ -17,20 +17,8 @@
 using namespace llvm;
 using namespace clang;
 
-Parser::Parser(Preprocessor &pp, MinimalAction &MinActions)
-  : PP(pp), Actions(MinActions), Diags(PP.getDiagnostics()) {
-  MinimalActions = &MinActions;
-  SemaActions = 0;
-  Tok.setKind(tok::eof);
-  CurScope = 0;
-  
-  ParenCount = BracketCount = BraceCount = 0;
-}
-
-Parser::Parser(Preprocessor &pp, SemanticAction &SemanticActions)
-  : PP(pp), Actions(SemanticActions), Diags(PP.getDiagnostics()) {
-  MinimalActions = 0;
-  SemaActions = &SemanticActions;
+Parser::Parser(Preprocessor &pp, Action &actions)
+  : PP(pp), Actions(actions), Diags(PP.getDiagnostics()) {
   Tok.setKind(tok::eof);
   CurScope = 0;
   

Modified: cfe/cfe/trunk/Sema/Sema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Sema/Sema.cpp?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/Sema/Sema.cpp (original)
+++ cfe/cfe/trunk/Sema/Sema.cpp Wed Jul 11 11:39:57 2007
@@ -13,13 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ASTBuilder.h"
-#include "clang/Parse/Action.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
+#include "clang/Parse/Action.h"
 #include "clang/Parse/Scope.h"
 #include "clang/Lex/IdentifierTable.h"
 #include "clang/Lex/Preprocessor.h"
-#include "llvm/Support/Compiler.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 using namespace clang;
 
@@ -214,18 +216,225 @@
   return Val;
 }
 
+
+
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  if (C >= 'A' && C <= 'F') return C-'A'+10;
+  return -1;
+}
+
+/// ParseStringExpr - The specified tokens were lexed as pasted string
+/// fragments (e.g. "foo" "bar" L"baz").
+
 /// ParseStringExpr - This accepts a string after semantic analysis. This string
 /// may be the result of string concatenation ([C99 5.1.1.2, translation phase
 /// #6]), so it may come from multiple tokens.
 /// 
-Action::ExprResult ASTBuilder::
-ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
-                SourceLocation *TokLocs, unsigned NumToks) {
-  assert(NumToks && "Must have at least one string!");
-  return new StringExpr(StrData, StrLen, isWide);
+Action::ExprResult
+ASTBuilder::ParseStringExpr(const LexerToken *StringToks,
+                            unsigned NumStringToks) {
+  assert(NumStringToks && "Must have at least one string!");
+
+  // Scan all of the string portions, remember the max individual token length,
+  // computing a bound on the concatenated string length, and see whether any
+  // piece is a wide-string.  If any of the string portions is a wide-string
+  // literal, the result is a wide-string literal [C99 6.4.5p4].
+  unsigned MaxTokenLength = StringToks[0].getLength();
+  unsigned SizeBound = StringToks[0].getLength()-2;  // -2 for "".
+  bool AnyWide = StringToks[0].getKind() == tok::wide_string_literal;
+  
+  // The common case is that there is only one string fragment.
+  for (unsigned i = 1; i != NumStringToks; ++i) {
+    // The string could be shorter than this if it needs cleaning, but this is a
+    // reasonable bound, which is all we need.
+    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+
+    // Remember maximum string piece length.
+    if (StringToks[i].getLength() > MaxTokenLength) 
+      MaxTokenLength = StringToks[i].getLength();
+    
+    // Remember if we see any wide strings.
+    AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
+  }
+  
+  
+  // Include space for the null terminator.
+  ++SizeBound;
+  
+  // TODO: K&R warning: "traditional C rejects string constant concatenation"
+  
+  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
+  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
+  unsigned wchar_tByteWidth = ~0U;
+  if (AnyWide)
+    wchar_tByteWidth =
+      PP.getTargetInfo().getWCharWidth(StringToks[0].getLocation());
+  
+  // The output buffer size needs to be large enough to hold wide characters.
+  // This is a worst-case assumption which basically corresponds to L"" "long".
+  if (AnyWide)
+    SizeBound *= wchar_tByteWidth;
+  
+  // Create a temporary buffer to hold the result string data.
+  SmallString<512> ResultBuf;
+  ResultBuf.resize(SizeBound);
+  
+  // Likewise, but for each string piece.
+  SmallString<512> TokenBuf;
+  TokenBuf.resize(MaxTokenLength);
+  
+  // Loop over all the strings, getting their spelling, and expanding them to
+  // wide strings as appropriate.
+  char *ResultPtr = &ResultBuf[0];   // Next byte to fill in.
+  
+  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+    const char *ThisTokBuf = &TokenBuf[0];
+    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
+    // that ThisTokBuf points to a buffer that is big enough for the whole token
+    // and 'spelled' tokens can only shrink.
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    
+    // TODO: Input character set mapping support.
+    
+    // Skip L marker for wide strings.
+    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
+    
+    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    ++ThisTokBuf;
+    
+    while (ThisTokBuf != ThisTokEnd) {
+      // Is this a span of non-escape characters?
+      if (ThisTokBuf[0] != '\\') {
+        const char *InStart = ThisTokBuf;
+        do {
+          ++ThisTokBuf;
+        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+        
+        // Copy the character span over.
+        unsigned Len = ThisTokBuf-InStart;
+        if (!AnyWide) {
+          memcpy(ResultPtr, InStart, Len);
+          ResultPtr += Len;
+        } else {
+          // Note: our internal rep of wide char tokens is always little-endian.
+          for (; Len; --Len, ++InStart) {
+            *ResultPtr++ = InStart[0];
+            // Add zeros at the end.
+            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+              *ResultPtr++ = 0;
+          }
+        }
+        continue;
+      }
+      
+      // Otherwise, this is an escape character.  Skip the '\' char.
+      ++ThisTokBuf;
+      
+      // We know that this character can't be off the end of the buffer, because
+      // that would have been \", which would not have been the end of string.
+      unsigned ResultChar = *ThisTokBuf++;
+      switch (ResultChar) {
+      // These map to themselves.
+      case '\\': case '\'': case '"': case '?': break;
+        
+      // These have fixed mappings.
+      case 'a':
+        // TODO: K&R: the meaning of '\\a' is different in traditional C
+        ResultChar = 7;
+        break;
+      case 'b':
+        ResultChar = 8;
+        break;
+      case 'e':
+        PP.Diag(StringToks[i], diag::ext_nonstandard_escape, "e");
+        ResultChar = 27;
+        break;
+      case 'f':
+        ResultChar = 12;
+        break;
+      case 'n':
+        ResultChar = 10;
+        break;
+      case 'r':
+        ResultChar = 13;
+        break;
+      case 't':
+        ResultChar = 9;
+        break;
+      case 'v':
+        ResultChar = 11;
+        break;
+        
+      //case 'u': case 'U':  // FIXME: UCNs.
+      case 'x': // Hex escape.
+        if (ThisTokBuf == ThisTokEnd ||
+            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
+          PP.Diag(StringToks[i], diag::err_hex_escape_no_digits);
+          ResultChar = 0;
+          break;
+        }
+        ++ThisTokBuf; // Consumed one hex digit.
+        
+        assert(0 && "hex escape: unimp!");
+        break;
+      case '0': case '1': case '2': case '3':
+      case '4': case '5': case '6': case '7':
+        // Octal escapes.
+        assert(0 && "octal escape: unimp!");
+        break;
+        
+      // Otherwise, these are not valid escapes.
+      case '(': case '{': case '[': case '%':
+        // GCC accepts these as extensions.  We warn about them as such though.
+        if (!PP.getLangOptions().NoExtensions) {
+          PP.Diag(StringToks[i], diag::ext_nonstandard_escape,
+                  std::string()+(char)ResultChar);
+          break;
+        }
+        // FALL THROUGH.
+      default:
+        if (isgraph(ThisTokBuf[0])) {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  std::string()+(char)ResultChar);
+        } else {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  "x"+utohexstr(ResultChar));
+        }
+      }
+
+      // Note: our internal rep of wide char tokens is always little-endian.
+      *ResultPtr++ = ResultChar & 0xFF;
+      
+      if (AnyWide) {
+        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+          *ResultPtr++ = ResultChar >> i*8;
+      }
+    }
+  }
+  
+  // Add zero terminator.
+  *ResultPtr = 0;
+  if (AnyWide) {
+    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+      *ResultPtr++ = 0;
+  }
+  
+  SmallVector<SourceLocation, 4> StringTokLocs;
+  for (unsigned i = 0; i != NumStringToks; ++i)
+    StringTokLocs.push_back(StringToks[i].getLocation());
+  
+  // FIXME: use factory.
+  
+  // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
+  return new StringExpr(&ResultBuf[0], ResultPtr-&ResultBuf[0], AnyWide);
 }
 
-
 // Unary Operators.  'Tok' is the token for the operator.
 Action::ExprResult ASTBuilder::ParseUnaryOp(SourceLocation OpLoc,
                                             tok::TokenKind Op,

Modified: cfe/cfe/trunk/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Sema/Sema.h?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/Sema/Sema.h (original)
+++ cfe/cfe/trunk/Sema/Sema.h Wed Jul 11 11:39:57 2007
@@ -26,7 +26,7 @@
 /// builds AST nodes for the code being parsed.  Clients can either use this
 /// unmodified or subclass it and overload methods to do more specialized
 /// things.
-class ASTBuilder : public SemanticAction {
+class ASTBuilder : public Action {
   Preprocessor &PP;
   
   /// LastInGroupList - This vector is populated when there are multiple
@@ -99,10 +99,11 @@
   virtual ExprResult ParseFloatingConstant(SourceLocation Loc);
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val);
-  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
-                                     bool isWide,
-                                     SourceLocation *TokLocs, unsigned NumToks);
-  
+
+  /// ParseStringExpr - The specified tokens were lexed as pasted string
+  /// fragments (e.g. "foo" "bar" L"baz").
+  virtual ExprResult ParseStringExpr(const LexerToken *Toks, unsigned NumToks);
+    
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   virtual ExprResult ParseUnaryOp(SourceLocation OpLoc, tok::TokenKind Op,
                                   ExprTy *Input);

Added: cfe/cfe/trunk/Sema/SemaDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Sema/SemaDecl.cpp?rev=39150&view=auto

==============================================================================
--- cfe/cfe/trunk/Sema/SemaDecl.cpp (added)
+++ cfe/cfe/trunk/Sema/SemaDecl.cpp Wed Jul 11 11:39:57 2007
@@ -0,0 +1,18 @@
+//===--- SemaDecl.cpp - Semantic Analysis for Declarations ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements semantic analysis for declarations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Parse/Parser.h"
+#include "clang/Parse/SemaDeclSpec.h"
+using namespace llvm;
+using namespace clang;
+

Propchange: cfe/cfe/trunk/Sema/SemaDecl.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Sema/SemaDecl.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Modified: cfe/cfe/trunk/clang.xcodeproj/project.pbxproj
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/clang.xcodeproj/project.pbxproj?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/clang.xcodeproj/project.pbxproj (original)
+++ cfe/cfe/trunk/clang.xcodeproj/project.pbxproj Wed Jul 11 11:39:57 2007
@@ -34,6 +34,7 @@
 		DE34621D0AFEB19B00DBC861 /* StmtPrinter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE34621C0AFEB19B00DBC861 /* StmtPrinter.cpp */; };
 		DE3462960B019D9800DBC861 /* Type.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE3462950B019D9800DBC861 /* Type.h */; };
 		DE3462D90B01B52900DBC861 /* SemaDeclSpec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE3462D80B01B52900DBC861 /* SemaDeclSpec.cpp */; };
+		DE3463400B02F0F800DBC861 /* SemaDecl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE34633F0B02F0F800DBC861 /* SemaDecl.cpp */; };
 		DE46BF280AE0A82D00CC047C /* TargetInfo.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE46BF270AE0A82D00CC047C /* TargetInfo.h */; };
 		DE5932D10AD60FF400BC794C /* clang.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932CD0AD60FF400BC794C /* clang.cpp */; };
 		DE5932D20AD60FF400BC794C /* clang.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE5932CE0AD60FF400BC794C /* clang.h */; };
@@ -156,6 +157,7 @@
 		DE34621C0AFEB19B00DBC861 /* StmtPrinter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StmtPrinter.cpp; path = AST/StmtPrinter.cpp; sourceTree = "<group>"; };
 		DE3462950B019D9800DBC861 /* Type.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Type.h; path = clang/Parse/Type.h; sourceTree = "<group>"; };
 		DE3462D80B01B52900DBC861 /* SemaDeclSpec.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaDeclSpec.cpp; path = Parse/SemaDeclSpec.cpp; sourceTree = "<group>"; };
+		DE34633F0B02F0F800DBC861 /* SemaDecl.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaDecl.cpp; path = AST/SemaDecl.cpp; sourceTree = "<group>"; };
 		DE46BF270AE0A82D00CC047C /* TargetInfo.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TargetInfo.h; sourceTree = "<group>"; };
 		DE5932CD0AD60FF400BC794C /* clang.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = clang.cpp; path = Driver/clang.cpp; sourceTree = "<group>"; };
 		DE5932CE0AD60FF400BC794C /* clang.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = clang.h; path = Driver/clang.h; sourceTree = "<group>"; };
@@ -312,6 +314,7 @@
 				DEC8DAAC0A94400300353FCA /* ASTStreamer.cpp */,
 				DED62ABA0AE2EDF1001E80A4 /* Decl.cpp */,
 				DE0FCB330A9C21F100248FD5 /* Expr.cpp */,
+				DE34633F0B02F0F800DBC861 /* SemaDecl.cpp */,
 				DE3452400AEF1A2D00DBC861 /* Stmt.cpp */,
 				DE34621C0AFEB19B00DBC861 /* StmtPrinter.cpp */,
 				DE345C560AFC69E800DBC861 /* StmtVisitor.cpp */,
@@ -471,6 +474,7 @@
 				DE3461270AFE68BE00DBC861 /* MinimalAction.cpp in Sources */,
 				DE34621D0AFEB19B00DBC861 /* StmtPrinter.cpp in Sources */,
 				DE3462D90B01B52900DBC861 /* SemaDeclSpec.cpp in Sources */,
+				DE3463400B02F0F800DBC861 /* SemaDecl.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

Modified: cfe/cfe/trunk/include/clang/Parse/Action.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Parse/Action.h?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Parse/Action.h (original)
+++ cfe/cfe/trunk/include/clang/Parse/Action.h Wed Jul 11 11:39:57 2007
@@ -204,6 +204,13 @@
   }
   virtual ExprResult ParseIntegerConstant(SourceLocation Loc) { return 0; }
   virtual ExprResult ParseFloatingConstant(SourceLocation Loc) { return 0; }
+  
+  /// ParseStringExpr - The specified tokens were lexed as pasted string
+  /// fragments (e.g. "foo" "bar" L"baz").
+  virtual ExprResult ParseStringExpr(const LexerToken *Toks, unsigned NumToks) {
+    return 0;
+  }
+  
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val) {
     return Val;  // Default impl returns operand.
@@ -293,33 +300,6 @@
                                          IdentifierInfo **IdentList,
                                          unsigned NumElts);
   
-  //===--------------------------------------------------------------------===//
-  // Expression Parsing Callbacks.
-  //===--------------------------------------------------------------------===//
-  
-  /// ParseStringExpr - The specified tokens were lexed as pasted string
-  /// fragments (e.g. "foo" "bar" L"baz").
-  virtual ExprResult ParseStringExpr(const LexerToken *Toks, unsigned NumToks){
-    return 0;
-  }
-  
-};
-
-/// SemanticAction - Clients the implement this interface expect Decl nodes to 
-/// be created, name lookup to be performed, and full semantic analysis of the
-/// source program to be performed.
-class SemanticAction : public Action {
-public:
-  
-  /// ParseStringExpr - The (null terminated) string data is specified with
-  /// StrData+StrLen.  isWide is true if this is a wide string. The Toks/NumToks
-  /// array exposes the input tokens to provide location information.
-  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
-                                     bool isWide,
-                                     SourceLocation *TokLocs, unsigned NumToks){
-    return 0;
-  }
-  
 };
 
 }  // end namespace clang

Modified: cfe/cfe/trunk/include/clang/Parse/Parser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Parse/Parser.h?rev=39150&r1=39149&r2=39150&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Parse/Parser.h (original)
+++ cfe/cfe/trunk/include/clang/Parse/Parser.h Wed Jul 11 11:39:57 2007
@@ -41,17 +41,10 @@
   /// and SemaActions for those uses that don't matter.
   Action &Actions;
   
-  /// MinimalActions/SemaActions - Exactly one of these two pointers is non-null
-  /// depending on whether the client of the parser wants semantic analysis,
-  /// name binding, and Decl creation performed or not.
-  MinimalAction  *MinimalActions;
-  SemanticAction *SemaActions;
-  
   Scope *CurScope;
   Diagnostic &Diags;
 public:
-  Parser(Preprocessor &PP, MinimalAction &MinActions);
-  Parser(Preprocessor &PP, SemanticAction &SemaActions);
+  Parser(Preprocessor &PP, Action &Actions);
   ~Parser();
 
   const LangOptions &getLang() const { return PP.getLangOptions(); }
@@ -349,8 +342,6 @@
   void ParseDirectDeclarator(Declarator &D);
   void ParseParenDeclarator(Declarator &D);
   void ParseBracketDeclarator(Declarator &D);
-  
-  DeclTy *SemaInitDeclarator(Declarator &D, ExprTy *Init, DeclTy *LastInGroup);
 };
 
 }  // end namespace clang





More information about the cfe-commits mailing list