[cfe-commits] r38539 - in /cfe/cfe/trunk: ./ Basic/ Driver/ Lex/ include/ include/clang/ include/clang/Basic/ include/clang/Lex/

Wed Jul 11 09:22:17 PDT 2007

Author: sabre
Date: Wed Jul 11 11:22:17 2007
New Revision: 38539

URL: http://llvm.org/viewvc/llvm-project?rev=38539&view=rev
Log:
Initial checkin of c-language parser

Added:
    cfe/cfe/trunk/Basic/
    cfe/cfe/trunk/Basic/Diagnostic.cpp   (with props)
    cfe/cfe/trunk/Basic/FileManager.cpp   (with props)
    cfe/cfe/trunk/Basic/Makefile   (with props)
    cfe/cfe/trunk/Basic/SourceBuffer.cpp   (with props)
    cfe/cfe/trunk/Basic/SourceManager.cpp   (with props)
    cfe/cfe/trunk/Basic/TokenKinds.cpp   (with props)
    cfe/cfe/trunk/Driver/
    cfe/cfe/trunk/Driver/clang.cpp   (with props)
    cfe/cfe/trunk/Lex/
    cfe/cfe/trunk/Lex/IdentifierTable.cpp   (with props)
    cfe/cfe/trunk/Lex/Lexer.cpp   (with props)
    cfe/cfe/trunk/Lex/MacroExpander.cpp   (with props)
    cfe/cfe/trunk/Lex/MacroInfo.cpp   (with props)
    cfe/cfe/trunk/Lex/Makefile   (with props)
    cfe/cfe/trunk/Lex/PPExpressions.cpp   (with props)
    cfe/cfe/trunk/Lex/Preprocessor.cpp   (with props)
    cfe/cfe/trunk/Makefile   (with props)
    cfe/cfe/trunk/README.txt   (with props)
    cfe/cfe/trunk/include/
    cfe/cfe/trunk/include/clang/
    cfe/cfe/trunk/include/clang/Basic/
    cfe/cfe/trunk/include/clang/Basic/Diagnostic.h   (with props)
    cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def   (with props)
    cfe/cfe/trunk/include/clang/Basic/FileManager.h   (with props)
    cfe/cfe/trunk/include/clang/Basic/SourceBuffer.h   (with props)
    cfe/cfe/trunk/include/clang/Basic/SourceLocation.h   (with props)
    cfe/cfe/trunk/include/clang/Basic/SourceManager.h   (with props)
    cfe/cfe/trunk/include/clang/Basic/TokenKinds.def   (with props)
    cfe/cfe/trunk/include/clang/Basic/TokenKinds.h   (with props)
    cfe/cfe/trunk/include/clang/Lex/
    cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h   (with props)
    cfe/cfe/trunk/include/clang/Lex/Lexer.h   (with props)
    cfe/cfe/trunk/include/clang/Lex/MacroExpander.h   (with props)
    cfe/cfe/trunk/include/clang/Lex/MacroInfo.h   (with props)
    cfe/cfe/trunk/include/clang/Lex/Preprocessor.h   (with props)

Added: cfe/cfe/trunk/Basic/Diagnostic.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Basic/Diagnostic.cpp?rev=38539&view=auto

==============================================================================

--- cfe/cfe/trunk/Basic/Diagnostic.cpp (added)
+++ cfe/cfe/trunk/Basic/Diagnostic.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,117 @@
+//===--- Diagnostic.cpp - C Language Family Diagnostic Handling -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Diagnostic-related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include <cassert>
+using namespace llvm;
+using namespace clang;
+
+/// Flag values for diagnostics.
+enum {
+  // Diagnostic classes.
+  NOTE       = 0x01,
+  WARNING    = 0x02,
+  EXTENSION  = 0x03,
+  ERROR      = 0x04,
+  FATAL      = 0x05,
+  class_mask = 0x07
+};
+
+/// DiagnosticFlags - A set of flags, or'd together, that describe the
+/// diagnostic.
+static unsigned char DiagnosticFlags[] = {
+#define DIAG(ENUM,FLAGS,DESC) FLAGS,
+#include "clang/Basic/DiagnosticKinds.def"
+  0
+};
+
+/// getDiagClass - Return the class field of the diagnostic.
+///
+static unsigned getDiagClass(unsigned DiagID) {
+  assert(DiagID < diag::NUM_DIAGNOSTICS && "Diagnostic ID out of range!");
+  return DiagnosticFlags[DiagID] & class_mask;
+}
+
+/// DiagnosticText - An english message to print for the diagnostic.  These
+/// should be localized.
+static const char * const DiagnosticText[] = {
+#define DIAG(ENUM,FLAGS,DESC) DESC,
+#include "clang/Basic/DiagnosticKinds.def"
+  0
+};
+
+/// isNoteWarningOrExtension - Return true if the unmapped diagnostic level of
+/// the specified diagnostic ID is a Note, Warning, or Extension.
+bool Diagnostic::isNoteWarningOrExtension(unsigned DiagID) {
+  return getDiagClass(DiagID) < ERROR;
+}
+
+
+/// getDescription - Given a diagnostic ID, return a description of the
+/// issue.
+const char *Diagnostic::getDescription(unsigned DiagID) {
+  assert(DiagID < diag::NUM_DIAGNOSTICS && "Diagnostic ID out of range!");
+  return DiagnosticText[DiagID];
+}
+
+/// getDiagnosticLevel - Based on the way the client configured the Diagnostic
+/// object, classify the specified diagnostic ID into a Level, consumable by
+/// the DiagnosticClient.
+Diagnostic::Level Diagnostic::getDiagnosticLevel(unsigned DiagID) const {
+  unsigned DiagClass = getDiagClass(DiagID);
+  
+  // TODO: specific diagnostics may be enabled or disabled.  Filter those based
+  // on their DiagID.
+  
+  // Map diagnostic classes based on command line argument settings.
+  if (DiagClass == EXTENSION) {
+    if (ErrorOnExtensions)
+      DiagClass = ERROR;
+    else if (WarnOnExtensions)
+      DiagClass = WARNING;
+    else
+      return Ignored;
+  }
+  
+  // If warnings are to be treated as errors, indicate this as such.
+  if (DiagClass == WARNING && WarningsAsErrors)
+    DiagClass = ERROR;
+  
+  switch (DiagClass) {
+  default: assert(0 && "Unknown diagnostic class!");
+  case NOTE:        return Diagnostic::Note;
+  case WARNING:     return Diagnostic::Warning;
+  case ERROR:       return Diagnostic::Error;
+  case FATAL:       return Diagnostic::Fatal;
+  }
+}
+
+/// Report - Issue the message to the client. If the client wants us to stop
+/// compilation, return true, otherwise return false.  DiagID is a member of
+/// the diag::kind enum.  
+bool Diagnostic::Report(SourceLocation Pos, unsigned DiagID,
+                        const std::string &Extra) {
+  // Figure out the diagnostic level of this message.
+  Diagnostic::Level DiagLevel = getDiagnosticLevel(DiagID);
+  
+  // If the client doesn't care about this message, don't map to the code.
+  if (DiagLevel == Diagnostic::Ignored)
+    return false;
+  
+  // Finally, report it.
+  return Client.HandleDiagnostic(DiagLevel, Pos, (diag::kind)DiagID, Extra) ||
+         DiagLevel == Fatal;
+}
+
+DiagnosticClient::~DiagnosticClient() {}

Propchange: cfe/cfe/trunk/Basic/Diagnostic.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Basic/Diagnostic.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Basic/FileManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Basic/FileManager.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Basic/FileManager.cpp (added)
+++ cfe/cfe/trunk/Basic/FileManager.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,136 @@
+//===--- FileManager.cpp - File System Probing and Caching ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the FileManager interface.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This should index all interesting directories with dirent calls.
+//  getdirentries ?
+//  opendir/readdir_r/closedir ?
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/FileManager.h"
+#include <iostream>
+using namespace llvm;
+using namespace clang;
+
+// FIXME: Enhance libsystem to support inode and other fields.
+#include <sys/stat.h>
+
+/// getDirectory - Lookup, cache, and verify the specified directory.  This
+/// returns null if the directory doesn't exist.
+/// 
+const DirectoryEntry *FileManager::getDirectory(const std::string &Filename) {
+  ++NumDirLookups;
+  // See if there is already an entry in the map.
+  std::map<std::string, DirectoryEntry*>::iterator I = 
+    DirEntries.lower_bound(Filename);
+  if (I != DirEntries.end() && I->first == Filename)
+    return I->second;
+  
+  ++NumDirCacheMisses;
+  
+  // By default, zero initialize it.
+  DirectoryEntry *&Ent =
+    DirEntries.insert(I, std::make_pair(Filename, (DirectoryEntry*)0))->second;
+  
+  // Nope, there isn't.  Check to see if the directory exists.
+  struct stat StatBuf;
+  if (stat(Filename.c_str(), &StatBuf) ||   // Error stat'ing.
+      !S_ISDIR(StatBuf.st_mode))            // Not a directory?
+    return 0;
+  
+  // It exists.  See if we have already opened a directory with the same inode.
+  // This occurs when one dir is symlinked to another, for example.
+  DirectoryEntry *&UDE = 
+    UniqueDirs[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)];
+  
+  if (UDE)  // Already have an entry with this inode, return it.
+    return Ent = UDE;
+  
+  // Otherwise, we don't have this directory yet, add it.
+  DirectoryEntry *DE = new DirectoryEntry();
+  DE->Name           = Filename;
+  return Ent = UDE = DE;
+}
+
+/// getFile - Lookup, cache, and verify the specified file.  This returns null
+/// if the file doesn't exist.
+/// 
+const FileEntry *FileManager::getFile(const std::string &Filename) {
+  ++NumFileLookups;
+  
+  // See if there is already an entry in the map.
+  std::map<std::string, FileEntry*>::iterator I = 
+    FileEntries.lower_bound(Filename);
+  if (I != FileEntries.end() && I->first == Filename)
+    return I->second;
+
+  ++NumFileCacheMisses;
+
+  // By default, zero initialize it.
+  FileEntry *&Ent =
+    FileEntries.insert(I, std::make_pair(Filename, (FileEntry*)0))->second;
+
+  // Figure out what directory it is in.
+  std::string DirName;
+  
+  // If the string contains a / in it, strip off everything after it.
+  // FIXME: this logic should be in sys::Path.
+  std::string::size_type SlashPos = Filename.find_last_of('/');
+  if (SlashPos == std::string::npos)
+    DirName = ".";  // Use the current directory if file has no path component.
+  else if (SlashPos == Filename.size()-1)
+    return 0;       // If filename ends with a /, it's a directory.
+  else
+    DirName = std::string(Filename.begin(), Filename.begin()+SlashPos);
+
+  const DirectoryEntry *DirInfo = getDirectory(DirName);
+  if (DirInfo == 0)  // Directory doesn't exist, file can't exist.
+    return 0;
+  
+  // FIXME: Use the directory info to prune this, before doing the stat syscall.
+  // FIXME: This will reduce the # syscalls.
+  
+  // Nope, there isn't.  Check to see if the file exists.
+  struct stat StatBuf;
+  if (stat(Filename.c_str(), &StatBuf) ||   // Error stat'ing.
+      S_ISDIR(StatBuf.st_mode))             // A directory?
+    return 0;
+  
+  // It exists.  See if we have already opened a directory with the same inode.
+  // This occurs when one dir is symlinked to another, for example.
+  FileEntry *&UFE = 
+    UniqueFiles[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)];
+  
+  if (UFE)  // Already have an entry with this inode, return it.
+    return Ent = UFE;
+  
+  // Otherwise, we don't have this directory yet, add it.
+  FileEntry *FE = new FileEntry();
+  FE->Size      = StatBuf.st_size;
+  FE->Name      = Filename;
+  FE->Dir       = DirInfo;
+  FE->UID       = NextFileUID++;
+  return Ent = UFE = FE;
+}
+
+void FileManager::PrintStats() const {
+  std::cerr << "\n*** File Manager Stats:\n";
+  std::cerr << UniqueFiles.size() << " files found, "
+            << UniqueDirs.size() << " dirs found.\n";
+  std::cerr << NumDirLookups << " dir lookups, "
+            << NumDirCacheMisses << " dir cache misses.\n";
+  std::cerr << NumFileLookups << " file lookups, "
+            << NumFileCacheMisses << " file cache misses.\n";
+  
+  //std::cerr << PagesMapped << BytesOfPagesMapped << FSLookups;
+}

Propchange: cfe/cfe/trunk/Basic/FileManager.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Basic/FileManager.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Basic/Makefile
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Basic/Makefile?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Basic/Makefile (added)
+++ cfe/cfe/trunk/Basic/Makefile Wed Jul 11 11:22:17 2007
@@ -0,0 +1,21 @@
+##===- clang/Basic/Makefile --------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file was developed by Chris Lattner and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+#  This implements the Basic library for the C-Language front-end.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME := clangBasic
+BUILD_ARCHIVE = 1
+
+CPPFLAGS += -I$(LEVEL)/tools/clang/include
+
+include $(LEVEL)/Makefile.common
+

Propchange: cfe/cfe/trunk/Basic/Makefile

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Basic/Makefile

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Basic/SourceBuffer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Basic/SourceBuffer.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Basic/SourceBuffer.cpp (added)
+++ cfe/cfe/trunk/Basic/SourceBuffer.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,162 @@
+//===--- SourceBuffer.cpp - C Language Family Source Buffer Impl. ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the SourceBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/SourceBuffer.h"
+#include "clang/Basic/FileManager.h"
+#include "llvm/System/MappedFile.h"
+#include "llvm/System/Process.h"
+#include <cstdio>
+#include <cstring>
+using namespace llvm;
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// SourceBuffer implementation itself.
+//===----------------------------------------------------------------------===//
+
+SourceBuffer::~SourceBuffer() {
+  if (MustDeleteBuffer)
+    delete [] BufferStart;
+}
+
+/// initCopyOf - Initialize this source buffer with a copy of the specified
+/// memory range.  We make the copy so that we can null terminate it
+/// successfully.
+void SourceBuffer::initCopyOf(const char *BufStart, const char *BufEnd) {
+  size_t Size = BufEnd-BufStart;
+  BufferStart = new char[Size+1];
+  BufferEnd = BufferStart+Size;
+  memcpy(const_cast<char*>(BufferStart), BufStart, Size);
+  *const_cast<char*>(BufferEnd) = 0;   // Null terminate buffer.
+  MustDeleteBuffer = false;
+}
+
+/// init - Initialize this SourceBuffer as a reference to externally allocated
+/// memory, memory that we know is already null terminated.
+void SourceBuffer::init(const char *BufStart, const char *BufEnd) {
+  assert(BufEnd[0] == 0 && "Buffer is not null terminated!");
+  BufferStart = BufStart;
+  BufferEnd = BufEnd;
+  MustDeleteBuffer = false;
+}
+
+//===----------------------------------------------------------------------===//
+// SourceBufferMem implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class SourceBufferMem : public SourceBuffer {
+  std::string FileID;
+public:
+  SourceBufferMem(const char *Start, const char *End, const char *FID)
+  : FileID(FID) {
+    init(Start, End);
+  }
+  
+  virtual const char *getBufferIdentifier() const {
+    return FileID.c_str();
+  }
+};
+}
+
+/// getMemBuffer - Open the specified memory range as a SourceBuffer.  Note
+/// that EndPtr[0] must be a null byte and be accessible!
+SourceBuffer *SourceBuffer::getMemBuffer(const char *StartPtr, 
+                                         const char *EndPtr,
+                                         const char *BufferName) {
+  return new SourceBufferMem(StartPtr, EndPtr, BufferName);
+}
+
+
+//===----------------------------------------------------------------------===//
+// SourceBufferFile implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class SourceBufferFile : public SourceBuffer {
+  sys::MappedFile File;
+public:
+  SourceBufferFile(const sys::Path &Filename);
+  
+  virtual const char *getBufferIdentifier() const {
+    return File.path().c_str();
+  }
+    
+  ~SourceBufferFile();
+};
+}
+
+SourceBufferFile::SourceBufferFile(const sys::Path &Filename) : File(Filename) {
+  // FIXME: This does an extra stat syscall to figure out the size, but we
+  // already know the size!
+  File.map();
+  
+  size_t Size = File.size();
+  
+  static unsigned PageSize = sys::Process::GetPageSize();
+  assert(((PageSize & (PageSize-1)) == 0) && PageSize &&
+         "Page size is not a power of 2!");
+  
+  // If this file is not an exact multiple of the system page size (common
+  // case), then the OS has zero terminated the buffer for us.
+  if ((Size & (PageSize-1))) {
+    init(File.charBase(), File.charBase()+Size);
+  } else {
+    // Otherwise, we allocate a new memory buffer and copy the data over
+    initCopyOf(File.charBase(), File.charBase()+Size);
+    
+    // No need to keep the file mapped any longer.
+    File.unmap();
+  }
+}
+
+SourceBufferFile::~SourceBufferFile() {
+  File.unmap();
+}
+
+
+SourceBuffer *SourceBuffer::getFile(const FileEntry *FileEnt) {
+  try {
+    return new SourceBufferFile(sys::Path(FileEnt->getName()));
+  } catch (...) {
+    return 0;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SourceBufferSTDIN implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class STDINBufferFile : public SourceBuffer {
+public:
+  virtual const char *getBufferIdentifier() const {
+    return "<stdin>";
+  }
+};
+}
+
+SourceBuffer *SourceBuffer::getSTDIN() {
+  char Buffer[4096*4];
+  
+  std::vector<char> FileData;
+  
+  // Read in all of the data from stdin, we cannot mmap stdin.
+  while (size_t ReadBytes = fread(Buffer, 1, 4096*4, stdin))
+    FileData.insert(FileData.end(), Buffer, Buffer+ReadBytes);
+  
+  size_t Size = FileData.size();
+  SourceBuffer *B = new STDINBufferFile();
+  B->initCopyOf(&FileData[0], &FileData[Size]);
+  return B;
+}

Propchange: cfe/cfe/trunk/Basic/SourceBuffer.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Basic/SourceBuffer.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Basic/SourceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Basic/SourceManager.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Basic/SourceManager.cpp (added)
+++ cfe/cfe/trunk/Basic/SourceManager.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,213 @@
+//===--- SourceManager.cpp - Track and cache source files -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the SourceManager interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceBuffer.h"
+#include "llvm/System/Path.h"
+#include <algorithm>
+#include <iostream>
+using namespace llvm;
+using namespace clang;
+
+SourceManager::~SourceManager() {
+  for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
+       E = FileInfos.end(); I != E; ++I) {
+    delete I->second.Buffer;
+    delete[] I->second.SourceLineCache;
+  }
+  
+  for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(), 
+       E = MemBufferInfos.end(); I != E; ++I) {
+    delete I->second.Buffer;
+    delete[] I->second.SourceLineCache;
+  }
+}
+
+/// getFileInfo - Create or return a cached FileInfo for the specified file.
+///
+const SourceManager::InfoRec *
+SourceManager::getInfoRec(const FileEntry *FileEnt) {
+  assert(FileEnt && "Didn't specify a file entry to use?");
+  // Do we already have information about this file?
+  std::map<const FileEntry *, FileInfo>::iterator I = 
+    FileInfos.lower_bound(FileEnt);
+  if (I != FileInfos.end() && I->first == FileEnt)
+    return &*I;
+  
+  // Nope, get information.
+  const SourceBuffer *File;
+  try {
+    File = clang::SourceBuffer::getFile(FileEnt);
+    if (File == 0)
+      return 0;
+  } catch (...) {
+    return 0;
+  }
+
+  const InfoRec &Entry =
+    *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
+  FileInfo &Info = const_cast<FileInfo &>(Entry.second);
+
+  Info.Buffer = File;
+  Info.SourceLineCache = 0;
+  Info.NumLines = 0;
+  return &Entry;
+}
+
+
+/// createMemBufferInfoRec - Create a new info record for the specified memory
+/// buffer.  This does no caching.
+const SourceManager::InfoRec *
+SourceManager::createMemBufferInfoRec(const SourceBuffer *Buffer) {
+  // Add a new info record to the MemBufferInfos list and return it.
+  FileInfo FI;
+  FI.Buffer = Buffer;
+  FI.SourceLineCache = 0;
+  FI.NumLines = 0;
+  MemBufferInfos.push_back(InfoRec(0, FI));
+  return &MemBufferInfos.back();
+}
+
+
+/// createFileID - Create a new fileID for the specified InfoRec and include
+/// position.  This works regardless of whether the InfoRec corresponds to a
+/// file or some other input source.
+unsigned SourceManager::createFileID(const InfoRec *File,
+                                     SourceLocation IncludePos) {
+  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
+  // to fit an arbitrary position in the file in the FilePos field.  To handle
+  // this, we create one FileID for each chunk of the file that fits in a
+  // FilePos field.
+  unsigned FileSize = File->second.Buffer->getBufferSize();
+  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
+    FileIDs.push_back(FileIDInfo(IncludePos, 0, File));
+    return FileIDs.size();
+  }
+  
+  // Create one FileID for each chunk of the file.
+  unsigned Result = FileIDs.size()+1;
+
+  unsigned ChunkNo = 0;
+  while (1) {
+    FileIDs.push_back(FileIDInfo(IncludePos, ChunkNo++, File));
+
+    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
+    FileSize -= (1 << SourceLocation::FilePosBits);
+  }
+
+  return Result;
+}
+
+/// getColumnNumber - Return the column # for the specified include position.
+/// this is significantly cheaper to compute than the line number.  This returns
+/// zero if the column number isn't known.
+unsigned SourceManager::getColumnNumber(SourceLocation IncludePos) const {
+  unsigned FileID = IncludePos.getFileID();
+  if (FileID == 0) return 0;
+  FileInfo *FileInfo = getFileInfo(FileID);
+  unsigned FilePos = getFilePos(IncludePos);
+  const SourceBuffer *Buffer = FileInfo->Buffer;
+  const char *Buf = Buffer->getBufferStart();
+
+  unsigned LineStart = FilePos;
+  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
+    --LineStart;
+  return FilePos-LineStart+1;
+}
+
+/// getLineNumber - Given a SourceLocation, return the physical line number
+/// for the position indicated.  This requires building and caching a table of
+/// line offsets for the SourceBuffer, so this is not cheap: use only when
+/// about to emit a diagnostic.
+unsigned SourceManager::getLineNumber(SourceLocation IncludePos) {
+  FileInfo *FileInfo = getFileInfo(IncludePos.getFileID());
+  
+  // If this is the first use of line information for this buffer, compute the
+  /// SourceLineCache for it on demand. 
+  if (FileInfo->SourceLineCache == 0) {
+    const SourceBuffer *Buffer = FileInfo->Buffer;
+    
+    // Find the file offsets of all of the *physical* source lines.  This does
+    // not look at trigraphs, escaped newlines, or anything else tricky.
+    std::vector<unsigned> LineOffsets;
+    
+    // Line #1 starts at char 0.
+    LineOffsets.push_back(0);
+    
+    const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
+    const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
+    unsigned Offs = 0;
+    while (1) {
+      // Skip over the contents of the line.
+      // TODO: Vectorize this?  This is very performance sensitive for programs
+      // with lots of diagnostics.
+      const unsigned char *NextBuf = (const unsigned char *)Buf;
+      while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
+        ++NextBuf;
+      Offs += NextBuf-Buf;
+      Buf = NextBuf;
+      
+      if (Buf[0] == '\n' || Buf[0] == '\r') {
+        // If this is \n\r or \r\n, skip both characters.
+        if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
+          ++Offs, ++Buf;
+        ++Offs, ++Buf;
+        LineOffsets.push_back(Offs);
+      } else {
+        // Otherwise, this is a null.  If end of file, exit.
+        if (Buf == End) break;
+        // Otherwise, skip the null.
+        ++Offs, ++Buf;
+      }
+    }
+    LineOffsets.push_back(Offs);
+    
+    // Copy the offsets into the FileInfo structure.
+    FileInfo->NumLines = LineOffsets.size();
+    FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
+    std::copy(LineOffsets.begin(), LineOffsets.end(),
+              FileInfo->SourceLineCache);
+  }
+
+  // Okay, we know we have a line number table.  Do a binary search to find the
+  // line number that this character position lands on.
+  unsigned NumLines = FileInfo->NumLines;
+  unsigned *SourceLineCache = FileInfo->SourceLineCache;
+    
+  // TODO: If this is performance sensitive, we could try doing simple radix
+  // type approaches to make good (tight?) initial guesses based on the
+  // assumption that all lines are the same average size.
+  unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
+                                   getFilePos(IncludePos)+1);
+  return Pos-SourceLineCache;
+}
+
+/// PrintStats - Print statistics to stderr.
+///
+void SourceManager::PrintStats() const {
+  std::cerr << "\n*** Source Manager Stats:\n";
+  std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
+            << " mem buffers mapped, " << FileIDs.size() 
+            << " file ID's allocated.\n";
+  
+  unsigned NumLineNumsComputed = 0;
+  unsigned NumFileBytesMapped = 0;
+  for (std::map<const FileEntry *, FileInfo>::const_iterator I = 
+       FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
+    NumLineNumsComputed += I->second.SourceLineCache != 0;
+    NumFileBytesMapped  += I->second.Buffer->getBufferSize();
+  }
+  std::cerr << NumFileBytesMapped << " bytes of files mapped, "
+            << NumLineNumsComputed << " files with line #'s computed.\n";
+}

Propchange: cfe/cfe/trunk/Basic/SourceManager.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Basic/SourceManager.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Basic/TokenKinds.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Basic/TokenKinds.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Basic/TokenKinds.cpp (added)
+++ cfe/cfe/trunk/Basic/TokenKinds.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,28 @@
+//===--- TokenKinds.cpp - Token Kinds Support -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the TokenKind enum and support functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/TokenKinds.h"
+#include <cassert>
+using namespace llvm;
+using namespace llvm::clang;
+
+static const char * const TokNames[] = {
+#define TOK(X) #X,
+#include "clang/Basic/TokenKinds.def"
+  0
+};
+
+const char *tok::getTokenName(enum TokenKind Kind) {
+  assert(Kind < tok::NUM_TOKENS);
+  return TokNames[Kind];
+}

Propchange: cfe/cfe/trunk/Basic/TokenKinds.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Basic/TokenKinds.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Driver/clang.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Driver/clang.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Driver/clang.cpp (added)
+++ cfe/cfe/trunk/Driver/clang.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,772 @@
+//===--- clang.cpp - C-Language Front-end ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This utility may be invoked in the following manner:
+//   clang --help         - Output information about command line switches
+//   clang [options]      - Read from stdin.
+//   clang [options] file - Read from "file".
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Options to support:
+//
+//   -ffatal-errors
+//   -ftabstop=width
+//   -fdollars-in-identifiers
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceBuffer.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/System/MappedFile.h"
+#include "llvm/System/Signals.h"
+#include <iostream>
+using namespace llvm;
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Global options.
+//===----------------------------------------------------------------------===//
+
+static cl::opt<bool>
+Verbose("v", cl::desc("Enable verbose output"));
+static cl::opt<bool>
+Stats("stats", cl::desc("Print performance metrics and statistics"));
+
+enum ProgActions {
+  RunPreprocessorOnly,          // Just lex, no output.
+  PrintPreprocessedInput,       // -E mode.
+  DumpTokens                    // Token dump mode.
+};
+
+static cl::opt<ProgActions> 
+ProgAction(cl::desc("Choose output type:"), cl::ZeroOrMore,cl::init(DumpTokens),
+           cl::values(
+             clEnumValN(RunPreprocessorOnly, "Eonly",
+                        "Just run preprocessor, no output (for timings)"),
+             clEnumValN(PrintPreprocessedInput, "E",
+                        "Run preprocessor, emit preprocessed file"),
+             clEnumValN(DumpTokens, "dumptokens",
+                        "Run preprocessor, dump internal rep of tokens"),
+             clEnumValEnd));
+
+
+//===----------------------------------------------------------------------===//
+// Our DiagnosticClient implementation
+//===----------------------------------------------------------------------===//
+
+// FIXME: Werror should take a list of things, -Werror=foo,bar
+static cl::opt<bool>
+WarningsAsErrors("Werror", cl::desc("Treat all warnings as errors"));
+
+static cl::opt<bool>
+WarnOnExtensions("pedantic",
+                 cl::desc("Issue a warning on uses of GCC extensions"));
+
+static cl::opt<bool>
+ErrorOnExtensions("pedantic-errors",
+                  cl::desc("Issue an error on uses of GCC extensions"));
+
+/// InitializeDiagnostics - Initialize the diagnostic object, based on the
+/// current command line option settings.
+static void InitializeDiagnostics(Diagnostic &Diags) {
+  Diags.setWarningsAsErrors(WarningsAsErrors);
+  Diags.setWarnOnExtensions(WarnOnExtensions);
+  Diags.setErrorOnExtensions(ErrorOnExtensions);
+}
+
+static cl::opt<bool>
+NoShowColumn("fno-show-column",
+             cl::desc("Do not include column number on diagnostics"));
+static cl::opt<bool>
+NoCaretDiagnostics("fno-caret-diagnostics",
+                   cl::desc("Do not include source line and caret with"
+                            " diagnostics"));
+
+/// DiagnosticPrinterSTDERR - This is a concrete diagnostic client, which prints
+/// the diagnostics to standard error.
+class DiagnosticPrinterSTDERR : public DiagnosticClient {
+  SourceManager &SourceMgr;
+  SourceLocation LastWarningLoc;
+public:
+  DiagnosticPrinterSTDERR(SourceManager &sourceMgr)
+    : SourceMgr(sourceMgr) {}
+  
+  void PrintIncludeStack(SourceLocation Pos);
+
+  virtual bool HandleDiagnostic(Diagnostic::Level DiagLevel,
+                                SourceLocation Pos,
+                                diag::kind ID, const std::string &Msg);
+};
+
+void DiagnosticPrinterSTDERR::
+PrintIncludeStack(SourceLocation Pos) {
+  unsigned FileID = Pos.getFileID();
+  if (FileID == 0) return;
+  
+  // Print out the other include frames first.
+  PrintIncludeStack(SourceMgr.getIncludeLoc(FileID));
+  
+  unsigned LineNo = SourceMgr.getLineNumber(Pos);
+  
+  const SourceBuffer *Buffer = SourceMgr.getBuffer(FileID);
+  std::cerr << "In file included from " << Buffer->getBufferIdentifier()
+            << ":" << LineNo << ":\n";
+}
+
+
+bool DiagnosticPrinterSTDERR::HandleDiagnostic(Diagnostic::Level Level, 
+                                               SourceLocation Pos,
+                                               diag::kind ID, 
+                                               const std::string &Extra) {
+  unsigned LineNo = 0, FilePos = 0, FileID = 0, ColNo = 0;
+  unsigned LineStart = 0, LineEnd = 0;
+  const SourceBuffer *Buffer = 0;
+  
+  if (Pos.isValid()) {
+    LineNo = SourceMgr.getLineNumber(Pos);
+    FilePos = SourceMgr.getFilePos(Pos);
+    FileID  = Pos.getFileID();
+    
+    // First, if this diagnostic is not in the main file, print out the
+    // "included from" lines.
+    if (LastWarningLoc != SourceMgr.getIncludeLoc(Pos.getFileID())) {
+      LastWarningLoc = SourceMgr.getIncludeLoc(Pos.getFileID());
+      PrintIncludeStack(LastWarningLoc);
+    }
+  
+    // Compute the column number.  Rewind from the current position to the start
+    // of the line.
+    ColNo = SourceMgr.getColumnNumber(Pos);
+    LineStart = FilePos-ColNo-1;  // Column # is 1-based
+  
+    // Compute the line end.  Scan forward from the error position to the end of
+    // the line.
+    Buffer = SourceMgr.getBuffer(FileID);
+    const char *Buf = Buffer->getBufferStart();
+    const char *BufEnd = Buffer->getBufferEnd();
+    LineEnd = FilePos;
+    while (Buf+LineEnd != BufEnd && 
+           Buf[LineEnd] != '\n' && Buf[LineEnd] != '\r')
+      ++LineEnd;
+  
+    std::cerr << Buffer->getBufferIdentifier() 
+              << ":" << LineNo << ":";
+    if (ColNo && !NoShowColumn) 
+      std::cerr << ColNo << ":";
+    std::cerr << " ";
+  }
+  
+  switch (Level) {
+  default: assert(0 && "Unknown diagnostic type!");
+  case Diagnostic::Note: std::cerr << "note: "; break;
+  case Diagnostic::Warning: std::cerr << "warning: "; break;
+  case Diagnostic::Error: std::cerr << "error: "; break;
+  case Diagnostic::Fatal: std::cerr << "fatal error: "; break;
+  case Diagnostic::Sorry: std::cerr << "sorry, unimplemented: "; break;
+  }
+  
+  std::string Msg = Diagnostic::getDescription(ID);
+  
+  // Replace all instances of %s in Msg with 'Extra'.
+  if (Msg.size() > 1) {
+    for (unsigned i = 0; i < Msg.size()-1; ++i) {
+      if (Msg[i] == '%' && Msg[i+1] == 's') {
+        Msg = std::string(Msg.begin(), Msg.begin()+i) +
+              Extra +
+              std::string(Msg.begin()+i+2, Msg.end());
+      }
+    }
+  }
+  std::cerr << Msg << "\n";
+  
+  if (!NoCaretDiagnostics && Pos.isValid()) {
+    // Print out a line of the source file.
+    const char *Buf = Buffer->getBufferStart();
+    std::cerr << std::string(Buf+LineStart, Buf+LineEnd) << "\n";
+    
+    // If the source line contained any tab characters between the start of the
+    // line and the diagnostic, replace the space we inserted with a tab, so
+    // that the carat will be indented exactly like the source line.
+    std::string Indent(ColNo-1, ' ');
+    for (unsigned i = LineStart; i != FilePos; ++i)
+      if (Buf[i] == '\t')
+        Indent[i-LineStart] = '\t';
+    
+    // Print out the caret itself.
+    std::cerr << Indent << "^\n";
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Initialization
+//===----------------------------------------------------------------------===//
+
+// FIXME: Preprocessor builtins to support.
+//   -A...    - Play with #assertions
+//   -undef   - Undefine all predefined macros
+
+static cl::list<std::string>
+D_macros("D", cl::value_desc("macro"), cl::Prefix,
+       cl::desc("Predefine the specified macro"));
+static cl::list<std::string>
+U_macros("U", cl::value_desc("macro"), cl::Prefix,
+         cl::desc("Undefine the specified macro"));
+
+// Append a #define line to Buf for Macro.  Macro should be of the form XXX,
+// in which case we emit "#define XXX 1" or "XXX=Y z W" in which case we emit
+// "#define XXX Y z W".  To get a #define with no value, use "XXX=".
+static void DefineBuiltinMacro(std::vector<char> &Buf, const char *Macro,
+                               const char *Command = "#define ") {
+  Buf.insert(Buf.end(), Command, Command+strlen(Command));
+  if (const char *Equal = strchr(Macro, '=')) {
+    // Turn the = into ' '.
+    Buf.insert(Buf.end(), Macro, Equal);
+    Buf.push_back(' ');
+    Buf.insert(Buf.end(), Equal+1, Equal+strlen(Equal));
+  } else {
+    // Push "macroname 1".
+    Buf.insert(Buf.end(), Macro, Macro+strlen(Macro));
+    Buf.push_back(' ');
+    Buf.push_back('1');
+  }
+  Buf.push_back('\n');
+}
+
+static void InitializePredefinedMacros(Preprocessor &PP, 
+                                       std::vector<char> &Buf) {
+  // FIXME: Implement magic like cpp_init_builtins for things like __STDC__
+  // and __DATE__ etc.
+#if 0
+  /* __STDC__ has the value 1 under normal circumstances.
+  However, if (a) we are in a system header, (b) the option
+  stdc_0_in_system_headers is true (set by target config), and
+  (c) we are not in strictly conforming mode, then it has the
+  value 0.  (b) and (c) are already checked in cpp_init_builtins.  */
+{
+  case BT_STDC:
+    if (cpp_in_system_header (pfile))
+      number = 0;
+    else
+      number = 1;
+    break;
+}
+#endif    
+  DefineBuiltinMacro(Buf, "__STDC__=1");
+  
+  // FIXME: This is obviously silly.  It should be more like gcc/c-cppbuiltin.c.
+  // Macros predefined by GCC 4.0.1.
+  DefineBuiltinMacro(Buf, "_ARCH_PPC=1");
+  DefineBuiltinMacro(Buf, "_BIG_ENDIAN=1");
+  DefineBuiltinMacro(Buf, "__APPLE_CC__=5250");
+  DefineBuiltinMacro(Buf, "__APPLE__=1");
+  DefineBuiltinMacro(Buf, "__BIG_ENDIAN__=1");
+  DefineBuiltinMacro(Buf, "__CHAR_BIT__=8");
+  DefineBuiltinMacro(Buf, "__CONSTANT_CFSTRINGS__=1");
+  DefineBuiltinMacro(Buf, "__DBL_DENORM_MIN__=4.9406564584124654e-324");
+  DefineBuiltinMacro(Buf, "__DBL_DIG__=15");
+  DefineBuiltinMacro(Buf, "__DBL_EPSILON__=2.2204460492503131e-16");
+  DefineBuiltinMacro(Buf, "__DBL_HAS_INFINITY__=1");
+  DefineBuiltinMacro(Buf, "__DBL_HAS_QUIET_NAN__=1");
+  DefineBuiltinMacro(Buf, "__DBL_MANT_DIG__=53");
+  DefineBuiltinMacro(Buf, "__DBL_MAX_10_EXP__=308");
+  DefineBuiltinMacro(Buf, "__DBL_MAX_EXP__=1024");
+  DefineBuiltinMacro(Buf, "__DBL_MAX__=1.7976931348623157e+308");
+  DefineBuiltinMacro(Buf, "__DBL_MIN_10_EXP__=(-307)");
+  DefineBuiltinMacro(Buf, "__DBL_MIN_EXP__=(-1021)");
+  DefineBuiltinMacro(Buf, "__DBL_MIN__=2.2250738585072014e-308");
+  DefineBuiltinMacro(Buf, "__DECIMAL_DIG__=33");
+  DefineBuiltinMacro(Buf, "__DYNAMIC__=1");
+  DefineBuiltinMacro(Buf, "__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__=1030");
+  DefineBuiltinMacro(Buf, "__FINITE_MATH_ONLY__=0");
+  DefineBuiltinMacro(Buf, "__FLT_DENORM_MIN__=1.40129846e-45F");
+  DefineBuiltinMacro(Buf, "__FLT_DIG__=6");
+  DefineBuiltinMacro(Buf, "__FLT_EPSILON__=1.19209290e-7F");
+  DefineBuiltinMacro(Buf, "__FLT_EVAL_METHOD__=0");
+  DefineBuiltinMacro(Buf, "__FLT_HAS_INFINITY__=1");
+  DefineBuiltinMacro(Buf, "__FLT_HAS_QUIET_NAN__=1");
+  DefineBuiltinMacro(Buf, "__FLT_MANT_DIG__=24");
+  DefineBuiltinMacro(Buf, "__FLT_MAX_10_EXP__=38");
+  DefineBuiltinMacro(Buf, "__FLT_MAX_EXP__=128");
+  DefineBuiltinMacro(Buf, "__FLT_MAX__=3.40282347e+38F");
+  DefineBuiltinMacro(Buf, "__FLT_MIN_10_EXP__=(-37)");
+  DefineBuiltinMacro(Buf, "__FLT_MIN_EXP__=(-125)");
+  DefineBuiltinMacro(Buf, "__FLT_MIN__=1.17549435e-38F");
+  DefineBuiltinMacro(Buf, "__FLT_RADIX__=2");
+  DefineBuiltinMacro(Buf, "__GNUC_MINOR__=0");
+  DefineBuiltinMacro(Buf, "__GNUC_PATCHLEVEL__=1");
+  DefineBuiltinMacro(Buf, "__GNUC__=4");
+  DefineBuiltinMacro(Buf, "__GXX_ABI_VERSION=1002");
+  DefineBuiltinMacro(Buf, "__INTMAX_MAX__=9223372036854775807LL");
+  DefineBuiltinMacro(Buf, "__INTMAX_TYPE__=long long int");
+  DefineBuiltinMacro(Buf, "__INT_MAX__=2147483647");
+  DefineBuiltinMacro(Buf, "__LDBL_DENORM_MIN__=4.940656458412465441765687"
+                        "92868221e-324L");
+  DefineBuiltinMacro(Buf, "__LDBL_DIG__=31");
+  DefineBuiltinMacro(Buf, "__LDBL_EPSILON__=4.9406564584124654417656879286822"
+                        "1e-324L");
+  DefineBuiltinMacro(Buf, "__LDBL_HAS_INFINITY__=1");
+  DefineBuiltinMacro(Buf, "__LDBL_HAS_QUIET_NAN__=1");
+  DefineBuiltinMacro(Buf, "__LDBL_MANT_DIG__=106");
+  DefineBuiltinMacro(Buf, "__LDBL_MAX_10_EXP__=308");
+  DefineBuiltinMacro(Buf, "__LDBL_MAX_EXP__=1024");
+  DefineBuiltinMacro(Buf, "__LDBL_MAX__=1.7976931348623158079372897140"
+                        "5301e+308L");
+  DefineBuiltinMacro(Buf, "__LDBL_MIN_10_EXP__=(-291)");
+  DefineBuiltinMacro(Buf, "__LDBL_MIN_EXP__=(-968)");
+  DefineBuiltinMacro(Buf, "__LDBL_MIN__=2.004168360008972777996108051350"
+                        "16e-292L");
+  DefineBuiltinMacro(Buf, "__LONG_DOUBLE_128__=1");
+  DefineBuiltinMacro(Buf, "__LONG_LONG_MAX__=9223372036854775807LL");
+  DefineBuiltinMacro(Buf, "__LONG_MAX__=2147483647L");
+  DefineBuiltinMacro(Buf, "__MACH__=1");
+  DefineBuiltinMacro(Buf, "__NATURAL_ALIGNMENT__=1");
+  DefineBuiltinMacro(Buf, "__NO_INLINE__=1");
+  DefineBuiltinMacro(Buf, "__PIC__=1");
+  DefineBuiltinMacro(Buf, "__POWERPC__=1");
+  DefineBuiltinMacro(Buf, "__PTRDIFF_TYPE__=int");
+  DefineBuiltinMacro(Buf, "__REGISTER_PREFIX__");
+  DefineBuiltinMacro(Buf, "__SCHAR_MAX__=127");
+  DefineBuiltinMacro(Buf, "__SHRT_MAX__=32767");
+  DefineBuiltinMacro(Buf, "__SIZE_TYPE__=long unsigned int");
+  DefineBuiltinMacro(Buf, "__STDC_HOSTED__=1");
+  DefineBuiltinMacro(Buf, "__UINTMAX_TYPE__=long long unsigned int");
+  DefineBuiltinMacro(Buf, "__USER_LABEL_PREFIX__=_");
+  DefineBuiltinMacro(Buf, "__VERSION__=\"4.0.1 (Apple Computer, Inc. "
+                        "build 5250)\"");
+  DefineBuiltinMacro(Buf, "__WCHAR_MAX__=2147483647");
+  DefineBuiltinMacro(Buf, "__WCHAR_TYPE__=int");
+  DefineBuiltinMacro(Buf, "__WINT_TYPE__=int");
+  DefineBuiltinMacro(Buf, "__ppc__=1");
+  DefineBuiltinMacro(Buf, "__strong");
+  DefineBuiltinMacro(Buf, "__weak");
+  if (PP.getLangOptions().CPlusPlus) {
+    DefineBuiltinMacro(Buf, "__DEPRECATED=1");
+    DefineBuiltinMacro(Buf, "__EXCEPTIONS=1");
+    DefineBuiltinMacro(Buf, "__GNUG__=4");
+    DefineBuiltinMacro(Buf, "__GXX_WEAK__=1");
+    DefineBuiltinMacro(Buf, "__cplusplus=1");
+    DefineBuiltinMacro(Buf, "__private_extern__=extern");
+  }
+  
+  // FIXME: Should emit a #line directive here.
+
+  // Add macros from the command line.
+  // FIXME: Should traverse the #define/#undef lists in parallel.
+  for (unsigned i = 0, e = D_macros.size(); i != e; ++i)
+    DefineBuiltinMacro(Buf, D_macros[i].c_str());
+  for (unsigned i = 0, e = U_macros.size(); i != e; ++i)
+    DefineBuiltinMacro(Buf, U_macros[i].c_str(), "#undef ");
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor include path information.
+//===----------------------------------------------------------------------===//
+
+// This tool exports a large number of command line options to control how the
+// preprocessor searches for header files.  At root, however, the Preprocessor
+// object takes a very simple interface: a list of directories to search for
+// 
+// FIXME: -nostdinc,-nostdinc++
+// FIXME: -isysroot,-imultilib
+//
+// FIXME: -include,-imacros
+
+static cl::opt<bool>
+nostdinc("nostdinc", cl::desc("Disable standard #include directories"));
+
+// Various command line options.  These four add directories to each chain.
+static cl::list<std::string>
+I_dirs("I", cl::value_desc("directory"), cl::Prefix,
+       cl::desc("Add directory to include search path"));
+static cl::list<std::string>
+idirafter_dirs("idirafter", cl::value_desc("directory"), cl::Prefix,
+               cl::desc("Add directory to AFTER include search path"));
+static cl::list<std::string>
+iquote_dirs("iquote", cl::value_desc("directory"), cl::Prefix,
+               cl::desc("Add directory to QUOTE include search path"));
+static cl::list<std::string>
+isystem_dirs("isystem", cl::value_desc("directory"), cl::Prefix,
+            cl::desc("Add directory to SYSTEM include search path"));
+
+// These handle -iprefix/-iwithprefix/-iwithprefixbefore.
+static cl::list<std::string>
+iprefix_vals("iprefix", cl::value_desc("prefix"), cl::Prefix,
+             cl::desc("Set the -iwithprefix/-iwithprefixbefore prefix"));
+static cl::list<std::string>
+iwithprefix_vals("iwithprefix", cl::value_desc("dir"), cl::Prefix,
+          cl::desc("Set directory to SYSTEM include search path with prefix"));
+static cl::list<std::string>
+iwithprefixbefore_vals("iwithprefixbefore", cl::value_desc("dir"), cl::Prefix,
+                 cl::desc("Set directory to include search path with prefix"));
+
+// Finally, implement the code that groks the options above.
+enum IncludeDirGroup {
+  Quoted = 0,
+  Angled,
+  System,
+  After
+};
+
+static std::vector<DirectoryLookup> IncludeGroup[4];
+
+/// AddPath - Add the specified path to the specified group list.
+///
+static void AddPath(const std::string &Path, IncludeDirGroup Group,
+                    bool isCXXAware, bool isUserSupplied,
+                    FileManager &FM) {
+  const DirectoryEntry *DE = FM.getDirectory(Path);
+  if (DE == 0) {
+    if (Verbose)
+      std::cerr << "ignoring nonexistent directory \"" << Path << "\"\n";
+    return;
+  }
+  
+  DirectoryLookup::DirType Type;
+  if (Group == Quoted || Group == Angled)
+    Type = DirectoryLookup::NormalHeaderDir;
+  else if (isCXXAware)
+    Type = DirectoryLookup::SystemHeaderDir;
+  else
+    Type = DirectoryLookup::ExternCSystemHeaderDir;
+  
+  IncludeGroup[Group].push_back(DirectoryLookup(DE, Type, isUserSupplied));
+}
+
+/// RemoveDuplicates - If there are duplicate directory entries in the specified
+/// search list, remove the later (dead) ones.
+static void RemoveDuplicates(std::vector<DirectoryLookup> &SearchList) {
+  std::set<const DirectoryEntry *> SeenDirs;
+  for (unsigned i = 0; i != SearchList.size(); ++i) {
+    // If this isn't the first time we've seen this dir, remove it.
+    if (!SeenDirs.insert(SearchList[i].getDir()).second) {
+      if (Verbose)
+        std::cerr << "ignoring duplicate directory \""
+                  << SearchList[i].getDir()->getName() << "\"\n";
+      SearchList.erase(SearchList.begin()+i);
+      --i;
+    }
+  }
+}
+
+// Process the -I options and set them in the preprocessor.
+static void InitializeIncludePaths(Preprocessor &PP) {
+  FileManager &FM = PP.getFileManager();
+
+  // Handle -I... options.
+  for (unsigned i = 0, e = I_dirs.size(); i != e; ++i) {
+    if (I_dirs[i] == "-") {
+      // -I- is a deprecated GCC feature.
+      PP.getDiagnostics().Report(SourceLocation(),
+                                 diag::err_pp_I_dash_not_supported);
+    } else {
+      AddPath(I_dirs[i], Angled, false, true, FM);
+    }
+  }
+  
+  // Handle -idirafter... options.
+  for (unsigned i = 0, e = idirafter_dirs.size(); i != e; ++i)
+    AddPath(idirafter_dirs[i], After, false, true, FM);
+  
+  // Handle -iquote... options.
+  for (unsigned i = 0, e = iquote_dirs.size(); i != e; ++i)
+    AddPath(iquote_dirs[i], Quoted, false, true, FM);
+  
+  // Handle -isystem... options.
+  for (unsigned i = 0, e = isystem_dirs.size(); i != e; ++i)
+    AddPath(isystem_dirs[i], System, false, true, FM);
+
+  // Walk the -iprefix/-iwithprefix/-iwithprefixbefore argument lists in
+  // parallel, processing the values in order of occurance to get the right
+  // prefixes.
+  {
+    std::string Prefix = "";  // FIXME: this isn't the correct default prefix.
+    unsigned iprefix_idx = 0;
+    unsigned iwithprefix_idx = 0;
+    unsigned iwithprefixbefore_idx = 0;
+    bool iprefix_done           = iprefix_vals.empty();
+    bool iwithprefix_done       = iwithprefix_vals.empty();
+    bool iwithprefixbefore_done = iwithprefixbefore_vals.empty();
+    while (!iprefix_done || !iwithprefix_done || !iwithprefixbefore_done) {
+      if (!iprefix_done &&
+          (iwithprefix_done || 
+           iprefix_vals.getPosition(iprefix_idx) < 
+           iwithprefix_vals.getPosition(iwithprefix_idx)) &&
+          (iwithprefixbefore_done || 
+           iprefix_vals.getPosition(iprefix_idx) < 
+           iwithprefixbefore_vals.getPosition(iwithprefixbefore_idx))) {
+        Prefix = iprefix_vals[iprefix_idx];
+        ++iprefix_idx;
+        iprefix_done = iprefix_idx == iprefix_vals.size();
+      } else if (!iwithprefix_done &&
+                 (iwithprefixbefore_done || 
+                  iwithprefix_vals.getPosition(iwithprefix_idx) < 
+                  iwithprefixbefore_vals.getPosition(iwithprefixbefore_idx))) {
+        AddPath(Prefix+iwithprefix_vals[iwithprefix_idx], 
+                System, false, false, FM);
+        ++iwithprefix_idx;
+        iwithprefix_done = iwithprefix_idx == iwithprefix_vals.size();
+      } else {
+        AddPath(Prefix+iwithprefixbefore_vals[iwithprefixbefore_idx], 
+                Angled, false, false, FM);
+        ++iwithprefixbefore_idx;
+        iwithprefixbefore_done = 
+          iwithprefixbefore_idx == iwithprefixbefore_vals.size();
+      }
+    }
+  }
+  
+  // FIXME: Add contents of the CPATH, C_INCLUDE_PATH, CPLUS_INCLUDE_PATH,
+  // OBJC_INCLUDE_PATH, OBJCPLUS_INCLUDE_PATH environment variables.
+  
+  // FIXME: temporary hack: hard-coded paths.
+  if (!nostdinc) {
+    AddPath("/usr/local/include", System, false, false, FM);
+    AddPath("/usr/lib/gcc/powerpc-apple-darwin8/4.0.1/include", 
+            System, false, false, FM);
+    AddPath("/usr/lib/gcc/powerpc-apple-darwin8/"
+            "4.0.1/../../../../powerpc-apple-darwin8/include", 
+            System, false, false, FM);
+    AddPath("/usr/include", System, false, false, FM);
+    AddPath("/System/Library/Frameworks", System, false, false, FM);
+    AddPath("/Library/Frameworks", System, false, false, FM);
+  }
+
+  // Now that we have collected all of the include paths, merge them all
+  // together and tell the preprocessor about them.
+  
+  // Concatenate ANGLE+SYSTEM+AFTER chains together into SearchList.
+  std::vector<DirectoryLookup> SearchList;
+  SearchList = IncludeGroup[Angled];
+  SearchList.insert(SearchList.end(), IncludeGroup[System].begin(),
+                    IncludeGroup[System].end());
+  SearchList.insert(SearchList.end(), IncludeGroup[After].begin(),
+                    IncludeGroup[After].end());
+  RemoveDuplicates(SearchList);
+  RemoveDuplicates(IncludeGroup[Quoted]);
+  
+  // Prepend QUOTED list on the search list.
+  SearchList.insert(SearchList.begin(), IncludeGroup[Quoted].begin(), 
+                    IncludeGroup[Quoted].end());
+  
+
+  bool DontSearchCurDir = false;  // TODO: set to true if -I- is set?
+  PP.SetSearchPaths(SearchList, IncludeGroup[Quoted].size(),
+                    DontSearchCurDir);
+
+  // If verbose, print the list of directories that will be searched.
+  if (Verbose) {
+    std::cerr << "#include \"...\" search starts here:\n";
+    unsigned QuotedIdx = IncludeGroup[Quoted].size();
+    for (unsigned i = 0, e = SearchList.size(); i != e; ++i) {
+      if (i == QuotedIdx)
+        std::cerr << "#include <...> search starts here:\n";
+      std::cerr << " " << SearchList[i].getDir()->getName() << "\n";
+    }
+  }
+}
+
+
+// Read any files specified by -imacros or -include.
+static void ReadPrologFiles(Preprocessor &PP, std::vector<char> &Buf) {
+  // FIXME: IMPLEMENT
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessed output mode.
+//===----------------------------------------------------------------------===//
+
+/// DoPrintPreprocessedInput - This implements -E mode.
+void DoPrintPreprocessedInput(Preprocessor &PP) {
+  LexerToken Tok;
+  char Buffer[256];
+  bool isFirstToken = true;
+  do {
+    if (PP.Lex(Tok)) return;
+
+    // If this token is at the start of a line.  Emit the \n and indentation.
+    // FIXME: this shouldn't use the isAtStartOfLine flag.  This should use a
+    // "newline callback" from the lexer.
+    // FIXME: For some tests, this fails just because there is no col# info from
+    // macro expansions!
+    if (Tok.isAtStartOfLine()) {
+      if (!isFirstToken)
+        std::cout << "\n";
+      // Print out space characters so that the first token on a line is
+      // indented for easy reading.
+      unsigned ColNo = 
+        PP.getSourceManager().getColumnNumber(Tok.getSourceLocation());
+      
+      // This hack prevents stuff like:
+      // #define HASH #
+      // HASH define foo bar
+      // From having the # character end up at column 1, which makes it so it
+      // is not handled as a #define next time through the preprocessor if in
+      // -fpreprocessed mode.
+      if (ColNo <= 1 && Tok.getKind() == tok::hash)
+        std::cout << ' ';
+      
+      for (; ColNo > 1; --ColNo)
+        std::cout << ' ';
+      
+    } else if (Tok.hasLeadingSpace()) {
+      std::cout << ' ';
+    }
+    isFirstToken = false;    
+    
+    if (Tok.getEnd()-Tok.getStart() < 256) {
+      unsigned Len = Lexer::getSpelling(Tok, Buffer, PP.getLangOptions());
+      Buffer[Len] = 0;
+      std::cout << Buffer;
+    } else {
+      std::string S = Lexer::getSpelling(Tok, PP.getLangOptions());
+      std::cout << S;
+    }
+  } while (Tok.getKind() != tok::eof);
+  std::cout << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver
+//===----------------------------------------------------------------------===//
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+void PrintIdentStats();
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv, " llvm cfe\n");
+  sys::PrintStackTraceOnErrorSignal();
+  
+  /// Create a SourceManager object.  This tracks and owns all the file buffers
+  /// allocated to the program.
+  SourceManager SourceMgr;
+  
+  // Print diagnostics to stderr.
+  DiagnosticPrinterSTDERR OurDiagnosticClient(SourceMgr);
+  
+  // Configure our handling of diagnostics.
+  Diagnostic OurDiagnostics(OurDiagnosticClient);
+  InitializeDiagnostics(OurDiagnostics);
+  
+  // Turn all options on.
+  // FIXME: add -ansi and -std= options.
+  LangOptions Options;
+  Options.Trigraphs = 1;
+  Options.BCPLComment = 1;  // Only for C99/C++.
+  Options.C99 = 1;
+  Options.DollarIdents = Options.Digraphs = 1;
+  Options.ObjC1 = Options.ObjC2 = 1;
+
+  // Create a file manager object to provide access to and cache the filesystem.
+  FileManager FileMgr;
+  
+  // Set up the preprocessor with these options.
+  Preprocessor PP(OurDiagnostics, Options, FileMgr, SourceMgr);
+  
+  // Install things like __POWERPC__, __GNUC__, etc into the macro table.
+  std::vector<char> PrologMacros;
+  InitializePredefinedMacros(PP, PrologMacros);
+  
+  // Process the -I options and set them in the preprocessor.
+  InitializeIncludePaths(PP);
+
+  // Read any files specified by -imacros or -include.
+  ReadPrologFiles(PP, PrologMacros);
+  
+  // Set up keywords.
+  PP.AddKeywords();
+  
+  // Now that we have emitted the predefined macros, #includes, etc into
+  // PrologMacros, preprocess it to populate the initial preprocessor state.
+  {
+    // Memory buffer must end with a null byte!
+    PrologMacros.push_back(0);
+
+    SourceBuffer *SB = SourceBuffer::getMemBuffer(&PrologMacros.front(),
+                                                  &PrologMacros.back(),
+                                                  "<predefines>");
+    assert(SB && "Cannot fail to create predefined source buffer");
+    unsigned FileID = SourceMgr.createFileIDForMemBuffer(SB);
+    assert(FileID && "Could not create FileID for predefines?");
+    
+    // Start parsing the predefines.
+    PP.EnterSourceFile(FileID, 0);
+
+    // Lex the file, which will read all the macros.
+    LexerToken Tok;
+    if (PP.Lex(Tok)) return 1;
+    assert(Tok.getKind() == tok::eof && "Didn't read entire file!");
+    
+    // Once we've read this, we're done.
+  }
+  
+  unsigned MainFileID = 0;
+  if (InputFilename != "-") {
+    const FileEntry *File = FileMgr.getFile(InputFilename);
+    if (File) MainFileID = SourceMgr.createFileID(File, SourceLocation());
+    if (MainFileID == 0) {
+      std::cerr << "Error reading '" << InputFilename << "'!\n";
+      return 1;
+    }
+  } else {
+    SourceBuffer *SB = SourceBuffer::getSTDIN();
+    if (SB) MainFileID = SourceMgr.createFileIDForMemBuffer(SB);
+    if (MainFileID == 0) {
+      std::cerr << "Error reading standard input!  Empty?\n";
+      return 1;
+    }
+  }
+  
+  // Start parsing the specified input file.
+  PP.EnterSourceFile(MainFileID, 0);
+  
+  switch (ProgAction) {
+  case RunPreprocessorOnly: {        // Just lex as fast as we can, no output.
+    LexerToken Tok;
+    do {
+      if (PP.Lex(Tok))
+        break;
+    } while (Tok.getKind() != tok::eof);
+    break;
+  }
+    
+  case PrintPreprocessedInput:       // -E mode.
+    DoPrintPreprocessedInput(PP);
+    break;
+                  
+  case DumpTokens: {                 // Token dump mode.
+    LexerToken Tok;
+    do {
+      if (PP.Lex(Tok))
+        break;
+      Tok.dump(true);
+      std::cerr << "\n";
+    } while (Tok.getKind() != tok::eof);
+    break;
+  }
+  }
+  
+  if (Stats) {
+    // Printed from low-to-high level.
+    PP.getFileManager().PrintStats();
+    PP.getSourceManager().PrintStats();
+    PP.getIdentifierTable().PrintStats();
+    PP.PrintStats();
+    std::cerr << "\n";
+  }
+}

Propchange: cfe/cfe/trunk/Driver/clang.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Driver/clang.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Lex/IdentifierTable.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/IdentifierTable.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Lex/IdentifierTable.cpp (added)
+++ cfe/cfe/trunk/Lex/IdentifierTable.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,268 @@
+//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IdentifierTokenInfo and IdentifierTable interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/IdentifierTable.h"
+#include "clang/Lex/MacroInfo.h"
+#include <iostream>
+using namespace llvm;
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// IdentifierTokenInfo Implementation
+//===----------------------------------------------------------------------===//
+
+void IdentifierTokenInfo::Destroy() {
+  delete Macro;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Memory Allocation Support
+//===----------------------------------------------------------------------===//
+
+/// The identifier table has a very simple memory allocation pattern: it just
+/// keeps allocating identifiers, then never frees them unless it frees them
+/// all.  As such, we use a simple bump-pointer memory allocator to make
+/// allocation speedy.  Shark showed that malloc was 27% of the time spent in
+/// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this.
+#define USE_ALLOCATOR 1
+#if USE_ALLOCATOR
+
+namespace {
+class MemRegion {
+  unsigned RegionSize;
+  MemRegion *Next;
+  char *NextPtr;
+public:
+  void Init(unsigned size, MemRegion *next) {
+    RegionSize = size;
+    Next = next;
+    NextPtr = (char*)(this+1);
+    
+    // FIXME: uses GCC extension.
+    unsigned Alignment = __alignof__(IdentifierTokenInfo);
+    NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
+                      ~(intptr_t)(Alignment-1));
+  }
+  
+  const MemRegion *getNext() const { return Next; }
+  unsigned getNumBytesAllocated() const {
+    return NextPtr-(const char*)this;
+  }
+  
+  /// Allocate - Allocate and return at least the specified number of bytes.
+  ///
+  void *Allocate(unsigned AllocSize, MemRegion **RegPtr) {
+    // FIXME: uses GCC extension.
+    unsigned Alignment = __alignof__(IdentifierTokenInfo);
+    // Round size up to an even multiple of the alignment.
+    AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1);
+    
+    // If there is space in this region for the identifier, return it.
+    if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) {
+      void *Result = NextPtr;
+      NextPtr += AllocSize;
+      return Result;
+    }
+    
+    // Otherwise, we have to allocate a new chunk.  Create one twice as big as
+    // this one.
+    MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
+    NewRegion->Init(RegionSize*2, this);
+
+    // Update the current "first region" pointer  to point to the new region.
+    *RegPtr = NewRegion;
+    
+    // Try allocating from it now.
+    return NewRegion->Allocate(AllocSize, RegPtr);
+  }
+  
+  /// Deallocate - Release all memory for this region to the system.
+  ///
+  void Deallocate() {
+    MemRegion *next = Next;
+    free(this);
+    if (next)
+      next->Deallocate();
+  }
+};
+}
+
+#endif
+
+//===----------------------------------------------------------------------===//
+// IdentifierTable Implementation
+//===----------------------------------------------------------------------===//
+
+
+/// IdentifierLink - There is one of these allocated by IdentifierTokenInfo.
+/// These form the linked list of buckets for the hash table.
+struct IdentifierBucket {
+  /// Next - This is the next bucket in the linked list.
+  IdentifierBucket *Next;
+  
+  IdentifierTokenInfo TokInfo;
+  // NOTE: TokInfo must be the last element in this structure, as the string
+  // information for the identifier is allocated right after it.
+};
+
+// FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3.
+static unsigned HASH_TABLE_SIZE = 8096;
+
+IdentifierTable::IdentifierTable() {
+  IdentifierBucket **TableArray = new IdentifierBucket*[HASH_TABLE_SIZE]();
+  TheTable = TableArray;
+  NumIdentifiers = 0;
+#if USE_ALLOCATOR
+  TheMemory = malloc(8*4096);
+  ((MemRegion*)TheMemory)->Init(8*4096, 0);
+#endif
+  
+  memset(TheTable, 0, HASH_TABLE_SIZE*sizeof(IdentifierBucket*));
+}
+
+IdentifierTable::~IdentifierTable() {
+  IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
+  for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
+    IdentifierBucket *Id = TableArray[i]; 
+    while (Id) {
+      // Free memory referenced by the identifier (e.g. macro info).
+      Id->TokInfo.Destroy();
+      
+      IdentifierBucket *Next = Id->Next;
+#if !USE_ALLOCATOR
+      free(Id);
+#endif
+      Id = Next;
+    }
+  }
+#if USE_ALLOCATOR
+  ((MemRegion*)TheMemory)->Deallocate();
+#endif
+  delete [] TableArray;
+}
+
+/// HashString - Compute a hash code for the specified string.
+///
+static unsigned HashString(const char *Start, const char *End) {
+  unsigned int Result = 0;
+  // Perl hash function.
+  while (Start != End)
+    Result = Result * 33 + *Start++;
+  Result = Result + (Result >> 5);
+  return Result;
+}
+
+IdentifierTokenInfo &IdentifierTable::get(const char *NameStart,
+                                          const char *NameEnd) {
+  IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
+
+  unsigned Hash = HashString(NameStart, NameEnd) % HASH_TABLE_SIZE;
+  unsigned Length = NameEnd-NameStart;
+  
+  IdentifierBucket *IdentHead = TableArray[Hash];
+  for (IdentifierBucket *Identifier = IdentHead; Identifier; 
+       Identifier = Identifier->Next) {
+    if (Identifier->TokInfo.getNameLength() == Length &&
+        memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0)
+      return Identifier->TokInfo;
+  }
+
+  // Allocate a new identifier, with space for the null-terminated string at the
+  // end.
+  unsigned AllocSize = sizeof(IdentifierBucket)+Length+1;
+#if USE_ALLOCATOR
+  IdentifierBucket *Identifier = (IdentifierBucket*)
+    ((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory);
+#else
+  IdentifierBucket *Identifier = (IdentifierBucket*)malloc(AllocSize);
+#endif
+  Identifier->TokInfo.NameLen = Length;
+  Identifier->TokInfo.Macro = 0;
+  Identifier->TokInfo.TokenID = tok::identifier;
+  Identifier->TokInfo.IsExtension = false;
+  Identifier->TokInfo.FETokenInfo = 0;
+
+  // Copy the string information.
+  char *StrBuffer = (char*)(Identifier+1);
+  memcpy(StrBuffer, NameStart, Length);
+  StrBuffer[Length] = 0;  // Null terminate string.
+  
+  // Link it into the hash table.
+  Identifier->Next = IdentHead;
+  TableArray[Hash] = Identifier;
+  return Identifier->TokInfo;
+}
+
+IdentifierTokenInfo &IdentifierTable::get(const std::string &Name) {
+  // Don't use c_str() here: no need to be null terminated.
+  const char *NameBytes = &Name[0];
+  unsigned Size = Name.size();
+  return get(NameBytes, NameBytes+Size);
+}
+
+
+
+/// PrintStats - Print statistics about how well the identifier table is doing
+/// at hashing identifiers.
+void IdentifierTable::PrintStats() const {
+  unsigned NumIdentifiers = 0;
+  unsigned NumEmptyBuckets = 0;
+  unsigned MaxBucketLength = 0;
+  unsigned AverageIdentifierSize = 0;
+  unsigned MaxIdentifierLength = 0;
+  
+  IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
+  for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
+    
+    unsigned NumIdentifiersInBucket = 0;
+    for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
+      AverageIdentifierSize += Id->TokInfo.getNameLength();
+      if (MaxIdentifierLength < Id->TokInfo.getNameLength())
+        MaxIdentifierLength = Id->TokInfo.getNameLength();
+      ++NumIdentifiersInBucket;
+    }
+    if (NumIdentifiersInBucket > MaxBucketLength) 
+      MaxBucketLength = NumIdentifiersInBucket;
+    if (NumIdentifiersInBucket == 0)
+      ++NumEmptyBuckets;
+
+    NumIdentifiers += NumIdentifiersInBucket;
+  }
+  
+  std::cerr << "\n*** Identifier Table Stats:\n";
+  std::cerr << "# Identifiers:   " << NumIdentifiers << "\n";
+  std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
+  std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n";
+  std::cerr << "Hash density (#identifiers per bucket): "
+            << NumIdentifiers/(double)HASH_TABLE_SIZE << "\n";
+  std::cerr << "Nonempty hash density (average chain length): "
+            << NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n";
+  std::cerr << "Ave identifier length: "
+            << (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
+  std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
+  
+  // Compute statistics about the memory allocated for identifiers.
+#if USE_ALLOCATOR
+  unsigned BytesUsed = 0;
+  unsigned NumRegions = 0;
+  const MemRegion *R = (MemRegion*)TheMemory;
+  for (; R; R = R->getNext(), ++NumRegions) {
+    BytesUsed += R->getNumBytesAllocated();
+  }
+  std::cerr << "\nNumber of memory regions: " << NumRegions << "\n";
+  std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n";
+#endif
+}
+
+

Propchange: cfe/cfe/trunk/Lex/IdentifierTable.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Lex/IdentifierTable.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Lexer.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Lex/Lexer.cpp (added)
+++ cfe/cfe/trunk/Lex/Lexer.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,1473 @@
+//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Lexer and LexerToken interfaces.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: GCC Diagnostics emitted by the lexer:
+// PEDWARN: (form feed|vertical tab) in preprocessing directive
+//
+// Universal characters, unicode, char mapping:
+// WARNING: `%.*s' is not in NFKC
+// WARNING: `%.*s' is not in NFC
+//
+// Other:
+// ERROR  : attempt to use poisoned \"%s\"
+//
+// TODO: Options to support:
+//    -fexec-charset,-fwide-exec-charset
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceBuffer.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/Config/alloca.h"
+#include <cassert>
+#include <cctype>
+#include <iostream>
+using namespace llvm;
+using namespace clang;
+
+static void InitCharacterInfo();
+
+Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp)
+  : BufferPtr(File->getBufferStart()), BufferStart(BufferPtr),
+    BufferEnd(File->getBufferEnd()), InputFile(File), CurFileID(fileid), PP(pp),
+    Features(PP.getLangOptions()) {
+  InitCharacterInfo();
+      
+  assert(BufferEnd[0] == 0 &&
+         "We assume that the input buffer has a null character at the end"
+         " to simplify lexing!");
+      
+  // Start of the file is a start of line.
+  IsAtStartOfLine = true;
+
+  // We are not after parsing a #.
+  ParsingPreprocessorDirective = false;
+
+  // We are not after parsing #include.
+  ParsingFilename = false;
+}
+
+//===----------------------------------------------------------------------===//
+// LexerToken implementation.
+//===----------------------------------------------------------------------===//
+
+/// getSourceLocation - Return a source location identifier for the specified
+/// offset in the current file.
+SourceLocation LexerToken::getSourceLocation() const {
+  if (TheLexer)
+    return TheLexer->getSourceLocation(Start);
+  return SourceLocation();
+}
+
+
+/// dump - Print the token to stderr, used for debugging.
+///
+void LexerToken::dump(bool DumpFlags) const {
+  std::cerr << clang::tok::getTokenName(Kind) << " '";
+  
+  if (needsCleaning()) {
+    if (getLexer())
+      std::cerr << getLexer()->getSpelling(*this);
+    else {
+      // FIXME: expansion from macros clears location info. Testcase:
+      // #define TWELVE 1\    <whitespace only>
+      // 2
+      // TWELVE
+      std::cerr << "*unspelled*" << std::string(Start, End);
+    }
+  } else
+    std::cerr << std::string(Start, End);
+  std::cerr << "'";
+  
+  if (DumpFlags) {
+    std::cerr << "\t";
+    if (isAtStartOfLine())
+      std::cerr << " [StartOfLine]";
+    if (hasLeadingSpace())
+      std::cerr << " [LeadingSpace]";
+    if (needsCleaning())
+      std::cerr << " [Spelling='" << std::string(Start, End) << "']";
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Character information.
+//===----------------------------------------------------------------------===//
+
+static unsigned char CharInfo[256];
+
+enum {
+  CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0'
+  CHAR_VERT_WS  = 0x02,  // '\r', '\n'
+  CHAR_LETTER   = 0x04,  // a-z,A-Z
+  CHAR_NUMBER   = 0x08,  // 0-9
+  CHAR_UNDER    = 0x10,  // _
+  CHAR_PERIOD   = 0x20   // .
+};
+
+static void InitCharacterInfo() {
+  static bool isInited = false;
+  if (isInited) return;
+  isInited = true;
+  
+  // Intiialize the CharInfo table.
+  // TODO: statically initialize this.
+  CharInfo[(int)' '] = CharInfo[(int)'\t'] = 
+  CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
+  CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
+  
+  CharInfo[(int)'_'] = CHAR_UNDER;
+  for (unsigned i = 'a'; i <= 'z'; ++i)
+    CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
+  for (unsigned i = '0'; i <= '9'; ++i)
+    CharInfo[i] = CHAR_NUMBER;
+}
+
+/// isIdentifierBody - Return true if this is the body character of an
+/// identifier, which is [a-zA-Z0-9_].
+static inline bool isIdentifierBody(unsigned char c) {
+  return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER);
+}
+
+/// isHorizontalWhitespace - Return true if this character is horizontal
+/// whitespace: ' ', '\t', '\f', '\v'.  Note that this returns false for '\0'.
+static inline bool isHorizontalWhitespace(unsigned char c) {
+  return CharInfo[c] & CHAR_HORZ_WS;
+}
+
+/// isWhitespace - Return true if this character is horizontal or vertical
+/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.  Note that this returns false
+/// for '\0'.
+static inline bool isWhitespace(unsigned char c) {
+  return CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS);
+}
+
+/// isNumberBody - Return true if this is the body character of an
+/// preprocessing number, which is [a-zA-Z0-9_.].
+static inline bool isNumberBody(unsigned char c) {
+  return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD);
+}
+
+//===----------------------------------------------------------------------===//
+// Diagnostics forwarding code.
+//===----------------------------------------------------------------------===//
+
+/// getSourceLocation - Return a source location identifier for the specified
+/// offset in the current file.
+SourceLocation Lexer::getSourceLocation(const char *Loc) const {
+  assert(Loc >= InputFile->getBufferStart() && Loc <= InputFile->getBufferEnd()
+         && "Location out of range for this buffer!");
+  return SourceLocation(CurFileID, Loc-InputFile->getBufferStart());
+}
+
+
+/// Diag - Forwarding function for diagnostics.  This translate a source
+/// position in the current buffer into a SourceLocation object for rendering.
+bool Lexer::Diag(const char *Loc, unsigned DiagID,
+                 const std::string &Msg) const {
+  return PP.Diag(getSourceLocation(Loc), DiagID, Msg);
+}
+
+//===----------------------------------------------------------------------===//
+// Trigraph and Escaped Newline Handling Code.
+//===----------------------------------------------------------------------===//
+
+/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
+/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
+static char GetTrigraphCharForLetter(char Letter) {
+  switch (Letter) {
+  default:   return 0;
+  case '=':  return '#';
+  case ')':  return ']';
+  case '(':  return '[';
+  case '!':  return '|';
+  case '\'': return '^';
+  case '>':  return '}';
+  case '/':  return '\\';
+  case '<':  return '{';
+  case '-':  return '~';
+  }
+}
+
+/// DecodeTrigraphChar - If the specified character is a legal trigraph when
+/// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled,
+/// return the result character.  Finally, emit a warning about trigraph use
+/// whether trigraphs are enabled or not.
+static char DecodeTrigraphChar(const char *CP, Lexer *L) {
+  char Res = GetTrigraphCharForLetter(*CP);
+  if (Res && L) {
+    if (!L->getFeatures().Trigraphs) {
+      L->Diag(CP-2, diag::trigraph_ignored);
+      return 0;
+    } else {
+      L->Diag(CP-2, diag::trigraph_converted, std::string()+Res);
+    }
+  }
+  return Res;
+}
+
+/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
+/// get its size, and return it.  This is tricky in several cases:
+///   1. If currently at the start of a trigraph, we warn about the trigraph,
+///      then either return the trigraph (skipping 3 chars) or the '?',
+///      depending on whether trigraphs are enabled or not.
+///   2. If this is an escaped newline (potentially with whitespace between
+///      the backslash and newline), implicitly skip the newline and return
+///      the char after it.
+///   3. If this is a UCN, return it.  FIXME: for C++?
+///
+/// This handles the slow/uncommon case of the getCharAndSize method.  Here we
+/// know that we can accumulate into Size, and that we have already incremented
+/// Ptr by Size bytes.
+///
+/// When this method is updated, getCharAndSizeSlowNoWarn (below) should be
+/// updated to match.
+///
+char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
+                               LexerToken *Tok) {
+  // If we have a slash, look for an escaped newline.
+  if (Ptr[0] == '\\') {
+    ++Size;
+    ++Ptr;
+Slash:
+    // Common case, backslash-char where the char is not whitespace.
+    if (!isWhitespace(Ptr[0])) return '\\';
+    
+    // See if we have optional whitespace characters followed by a newline.
+    {
+      unsigned SizeTmp = 0;
+      do {
+        ++SizeTmp;
+        if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
+          // Remember that this token needs to be cleaned.
+          if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
+
+          // Warn if there was whitespace between the backslash and newline.
+          if (SizeTmp != 1 && Tok)
+            Diag(Ptr, diag::backslash_newline_space);
+          
+          // If this is a \r\n or \n\r, skip the newlines.
+          if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
+              Ptr[SizeTmp-1] != Ptr[SizeTmp])
+            ++SizeTmp;
+          
+          // Found backslash<whitespace><newline>.  Parse the char after it.
+          Size += SizeTmp;
+          Ptr  += SizeTmp;
+          // Use slow version to accumulate a correct size field.
+          return getCharAndSizeSlow(Ptr, Size, Tok);
+        }
+      } while (isWhitespace(Ptr[SizeTmp]));
+    }
+      
+    // Otherwise, this is not an escaped newline, just return the slash.
+    return '\\';
+  }
+  
+  // If this is a trigraph, process it.
+  if (Ptr[0] == '?' && Ptr[1] == '?') {
+    // If this is actually a legal trigraph (not something like "??x"), emit
+    // a trigraph warning.  If so, and if trigraphs are enabled, return it.
+    if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
+      // Remember that this token needs to be cleaned.
+      if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
+
+      Ptr += 3;
+      Size += 3;
+      if (C == '\\') goto Slash;
+      return C;
+    }
+  }
+  
+  // If this is neither, return a single character.
+  ++Size;
+  return *Ptr;
+}
+
+/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
+/// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size,
+/// and that we have already incremented Ptr by Size bytes.
+///
+/// When this method is updated, getCharAndSizeSlow (above) should be updated to
+/// match.
+static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+                                     const LangOptions &Features) {
+  // If we have a slash, look for an escaped newline.
+  if (Ptr[0] == '\\') {
+    ++Size;
+    ++Ptr;
+Slash:
+    // Common case, backslash-char where the char is not whitespace.
+    if (!isWhitespace(Ptr[0])) return '\\';
+    
+    // See if we have optional whitespace characters followed by a newline.
+    {
+      unsigned SizeTmp = 0;
+      do {
+        ++SizeTmp;
+        if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
+          
+          // If this is a \r\n or \n\r, skip the newlines.
+          if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
+              Ptr[SizeTmp-1] != Ptr[SizeTmp])
+            ++SizeTmp;
+          
+          // Found backslash<whitespace><newline>.  Parse the char after it.
+          Size += SizeTmp;
+          Ptr  += SizeTmp;
+          
+          // Use slow version to accumulate a correct size field.
+          return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
+        }
+      } while (isWhitespace(Ptr[SizeTmp]));
+    }
+    
+    // Otherwise, this is not an escaped newline, just return the slash.
+    return '\\';
+  }
+  
+  // If this is a trigraph, process it.
+  if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
+    // If this is actually a legal trigraph (not something like "??x"), return
+    // it.
+    if (char C = GetTrigraphCharForLetter(Ptr[2])) {
+      Ptr += 3;
+      Size += 3;
+      if (C == '\\') goto Slash;
+      return C;
+    }
+  }
+  
+  // If this is neither, return a single character.
+  ++Size;
+  return *Ptr;
+}
+
+/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever
+/// emit a warning.
+static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,
+                                        const LangOptions &Features) {
+  // If this is not a trigraph and not a UCN or escaped newline, return
+  // quickly.
+  if (Ptr[0] != '?' && Ptr[0] != '\\') {
+    Size = 1;
+    return *Ptr;
+  }
+  
+  Size = 0;
+  return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
+}
+
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Lexer::getSpelling(const LexerToken &Tok,
+                               const LangOptions &Features) {
+  assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!");
+  
+  // If this token contains nothing interesting, return it directly.
+  if (!Tok.needsCleaning())
+    return std::string(Tok.getStart(), Tok.getEnd());
+  
+  // Otherwise, hard case, relex the characters into the string.
+  std::string Result;
+  Result.reserve(Tok.getEnd()-Tok.getStart());
+  
+  for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) {
+    unsigned CharSize;
+    Result.push_back(getCharAndSizeNoWarn(Ptr, CharSize, Features));
+    Ptr += CharSize;
+  }
+  assert(Result.size() != unsigned(Tok.getEnd()-Tok.getStart()) &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string.  The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at most
+/// Tok.End-Tok.Start bytes long.  The actual length of the token is returned.
+unsigned Lexer::getSpelling(const LexerToken &Tok, char *Buffer,
+                            const LangOptions &Features) {
+  assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!");
+
+  // If this token contains nothing interesting, return it directly.
+  if (!Tok.needsCleaning()) {
+    unsigned Size = Tok.getEnd()-Tok.getStart();
+    memcpy(Buffer, Tok.getStart(), Size);
+    return Size;
+  }
+  // Otherwise, hard case, relex the characters into the string.
+  std::string Result;
+  Result.reserve(Tok.getEnd()-Tok.getStart());
+  
+  char *OutBuf = Buffer;
+  for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) {
+    unsigned CharSize;
+    *OutBuf++ = getCharAndSizeNoWarn(Ptr, CharSize, Features);
+    Ptr += CharSize;
+  }
+  assert(OutBuf-Buffer != Tok.getEnd()-Tok.getStart() &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  
+  return OutBuf-Buffer;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper methods for lexing.
+//===----------------------------------------------------------------------===//
+
+bool Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) {
+  // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
+  unsigned Size;
+  unsigned char C = *CurPtr++;
+  while (isIdentifierBody(C)) {
+    C = *CurPtr++;
+  }
+  --CurPtr;   // Back up over the skipped character.
+
+  // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline
+  // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
+  // FIXME: universal chars.
+  if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
+FinishIdentifier:
+    Result.SetEnd(BufferPtr = CurPtr);
+    Result.SetKind(tok::identifier);
+    
+    // Look up this token, see if it is a macro, or if it is a language keyword.
+    const char *SpelledTokStart, *SpelledTokEnd;
+    if (!Result.needsCleaning()) {
+      // No cleaning needed, just use the characters from the lexed buffer.
+      SpelledTokStart = Result.getStart();
+      SpelledTokEnd   = Result.getEnd();
+    } else {
+      // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
+      char *TmpBuf = (char*)alloca(Result.getEnd()-Result.getStart());
+      unsigned Size = getSpelling(Result, TmpBuf);
+      SpelledTokStart = TmpBuf;
+      SpelledTokEnd = TmpBuf+Size;
+    }
+    
+    Result.SetIdentifierInfo(PP.getIdentifierInfo(SpelledTokStart,
+                                                  SpelledTokEnd));
+    return PP.HandleIdentifier(Result);
+  }
+  
+  // Otherwise, $,\,? in identifier found.  Enter slower path.
+  
+  C = getCharAndSize(CurPtr, Size);
+  while (1) {
+    if (C == '$') {
+      // If we hit a $ and they are not supported in identifiers, we are done.
+      if (!Features.DollarIdents) goto FinishIdentifier;
+      
+      // Otherwise, emit a diagnostic and continue.
+      if (Diag(CurPtr, diag::ext_dollar_in_identifier))
+        return true;
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+      continue;
+    } else if (!isIdentifierBody(C)) { // FIXME: universal chars.
+      // Found end of identifier.
+      goto FinishIdentifier;
+    }
+
+    // Otherwise, this character is good, consume it.
+    CurPtr = ConsumeChar(CurPtr, Size, Result);
+
+    C = getCharAndSize(CurPtr, Size);
+    while (isIdentifierBody(C)) { // FIXME: universal chars.
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+    }
+  }
+}
+
+
+/// LexNumericConstant - Lex the remainer of a integer or floating point
+/// constant. From[-1] is the first character lexed.  Return the end of the
+/// constant.
+bool Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) {
+  unsigned Size;
+  char C = getCharAndSize(CurPtr, Size);
+  char PrevCh = 0;
+  while (isNumberBody(C)) { // FIXME: universal chars?
+    CurPtr = ConsumeChar(CurPtr, Size, Result);
+    PrevCh = C;
+    C = getCharAndSize(CurPtr, Size);
+  }
+  
+  // If we fell out, check for a sign, due to 1e+12.  If we have one, continue.
+  if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e'))
+    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+
+  // If we have a hex FP constant, continue.
+  if (Features.HexFloats &&
+      (C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p'))
+    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+  
+  Result.SetKind(tok::numeric_constant);
+
+  // Update the end of token position as well as the BufferPtr instance var.
+  Result.SetEnd(BufferPtr = CurPtr);
+  return false;
+}
+
+/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
+/// either " or L".
+bool Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
+  const char *NulCharacter = 0; // Does this string contain the \0 character?
+  
+  char C = getAndAdvanceChar(CurPtr, Result);
+  while (C != '"') {
+    // Skip escaped characters.
+    if (C == '\\') {
+      // Skip the escaped character.
+      C = getAndAdvanceChar(CurPtr, Result);
+    } else if (C == '\n' || C == '\r' ||             // Newline.
+               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+      if (Diag(Result.getStart(), diag::err_unterminated_string))
+        return true;
+      BufferPtr = CurPtr-1;
+      return LexTokenInternal(Result);
+    } else if (C == 0) {
+      NulCharacter = CurPtr-1;
+    }
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+  
+  if (NulCharacter && Diag(NulCharacter, diag::null_in_string))
+    return true;
+
+  Result.SetKind(tok::string_literal);
+
+  // Update the end of token position as well as the BufferPtr instance var.
+  Result.SetEnd(BufferPtr = CurPtr);
+  return false;
+}
+
+/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
+/// after having lexed the '<' character.  This is used for #include filenames.
+bool Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) {
+  const char *NulCharacter = 0; // Does this string contain the \0 character?
+  
+  char C = getAndAdvanceChar(CurPtr, Result);
+  while (C != '>') {
+    // Skip escaped characters.
+    if (C == '\\') {
+      // Skip the escaped character.
+      C = getAndAdvanceChar(CurPtr, Result);
+    } else if (C == '\n' || C == '\r' ||             // Newline.
+               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+      if (Diag(Result.getStart(), diag::err_unterminated_string))
+        return true;
+      BufferPtr = CurPtr-1;
+      return LexTokenInternal(Result);
+    } else if (C == 0) {
+      NulCharacter = CurPtr-1;
+    }
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+  
+  if (NulCharacter && Diag(NulCharacter, diag::null_in_string))
+    return true;
+  
+  Result.SetKind(tok::angle_string_literal);
+  
+  // Update the end of token position as well as the BufferPtr instance var.
+  Result.SetEnd(BufferPtr = CurPtr);
+  return false;
+}
+
+
+/// LexCharConstant - Lex the remainder of a character constant, after having
+/// lexed either ' or L'.
+bool Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) {
+  const char *NulCharacter = 0; // Does this character contain the \0 character?
+
+  // Handle the common case of 'x' and '\y' efficiently.
+  char C = getAndAdvanceChar(CurPtr, Result);
+  if (C == '\'') {
+    if (Diag(Result.getStart(), diag::err_empty_character))
+      return true;
+    BufferPtr = CurPtr;
+    return LexTokenInternal(Result);
+  } else if (C == '\\') {
+    // Skip the escaped character.
+    // FIXME: UCN's.
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+  
+  if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
+    ++CurPtr;
+  } else {
+    // Fall back on generic code for embedded nulls, newlines, wide chars.
+    do {
+      // Skip escaped characters.
+      if (C == '\\') {
+        // Skip the escaped character.
+        C = getAndAdvanceChar(CurPtr, Result);
+      } else if (C == '\n' || C == '\r' ||               // Newline.
+                 (C == 0 && CurPtr-1 == BufferEnd)) {    // End of file.
+        if (Diag(Result.getStart(), diag::err_unterminated_char))
+          return true;
+        BufferPtr = CurPtr-1;
+        return LexTokenInternal(Result);
+      } else if (C == 0) {
+        NulCharacter = CurPtr-1;
+      }
+      C = getAndAdvanceChar(CurPtr, Result);
+    } while (C != '\'');
+  }
+  
+  if (NulCharacter && Diag(NulCharacter, diag::null_in_char))
+    return true;
+
+  Result.SetKind(tok::char_constant);
+  
+  // Update the end of token position as well as the BufferPtr instance var.
+  Result.SetEnd(BufferPtr = CurPtr);
+  return false;
+}
+
+/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
+/// Update BufferPtr to point to the next non-whitespace character and return.
+bool Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) {
+  // Whitespace - Skip it, then return the token after the whitespace.
+  unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
+  while (1) {
+    // Skip horizontal whitespace very aggressively.
+    while (isHorizontalWhitespace(Char))
+      Char = *++CurPtr;
+    
+    // Otherwise if we something other than whitespace, we're done.
+    if (Char != '\n' && Char != '\r')
+      break;
+    
+    if (ParsingPreprocessorDirective) {
+      // End of preprocessor directive line, let LexTokenInternal handle this.
+      BufferPtr = CurPtr;
+      return false;
+    }
+    
+    // ok, but handle newline.
+    // The returned token is at the start of the line.
+    Result.SetFlag(LexerToken::StartOfLine);
+    // No leading whitespace seen so far.
+    Result.ClearFlag(LexerToken::LeadingSpace);
+    Char = *++CurPtr;
+  }
+
+  // If this isn't immediately after a newline, there is leading space.
+  char PrevChar = CurPtr[-1];
+  if (PrevChar != '\n' && PrevChar != '\r')
+    Result.SetFlag(LexerToken::LeadingSpace);
+
+  // If the next token is obviously a // or /* */ comment, skip it efficiently
+  // too (without going through the big switch stmt).
+  if (Char == '/' && CurPtr[1] == '/') {
+    Result.SetStart(CurPtr);
+    return SkipBCPLComment(Result, CurPtr+1);
+  }
+  if (Char == '/' && CurPtr[1] == '*') {
+    Result.SetStart(CurPtr);
+    return SkipBlockComment(Result, CurPtr+2);
+  }
+  BufferPtr = CurPtr;
+  return false;
+}
+
+// SkipBCPLComment - We have just read the // characters from input.  Skip until
+// we find the newline character thats terminate the comment.  Then update
+/// BufferPtr and return.
+bool Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
+  // If BCPL comments aren't explicitly enabled for this language, emit an
+  // extension warning.
+  if (!Features.BCPLComment) {
+    if (Diag(Result.getStart(), diag::ext_bcpl_comment))
+      return true;
+    
+    // Mark them enabled so we only emit one warning for this translation
+    // unit.
+    Features.BCPLComment = true;
+  }
+  
+  // Scan over the body of the comment.  The common case, when scanning, is that
+  // the comment contains normal ascii characters with nothing interesting in
+  // them.  As such, optimize for this case with the inner loop.
+  char C;
+  do {
+    C = *CurPtr;
+    // FIXME: just scan for a \n or \r character.  If we find a \n character,
+    // scan backwards, checking to see if it's an escaped newline, like we do
+    // for block comments.
+    
+    // Skip over characters in the fast loop.
+    while (C != 0 &&                // Potentially EOF.
+           C != '\\' &&             // Potentially escaped newline.
+           C != '?' &&              // Potentially trigraph.
+           C != '\n' && C != '\r')  // Newline or DOS-style newline.
+      C = *++CurPtr;
+
+    // If this is a newline, we're done.
+    if (C == '\n' || C == '\r')
+      break;  // Found the newline? Break out!
+    
+    // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to
+    // properly decode the character.
+    const char *OldPtr = CurPtr;
+    C = getAndAdvanceChar(CurPtr, Result);
+    
+    // If we read multiple characters, and one of those characters was a \r or
+    // \n, then we had an escaped newline within the comment.  Emit diagnostic.
+    if (CurPtr != OldPtr+1) {
+      for (; OldPtr != CurPtr; ++OldPtr)
+        if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
+          if (Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment))
+            return true;
+        }
+    }
+    
+    if (CurPtr == BufferEnd+1) goto FoundEOF;
+  } while (C != '\n' && C != '\r');
+
+  // Found and did not consume a newline.
+
+  // If we are inside a preprocessor directive and we see the end of line,
+  // return immediately, so that the lexer can return this as an EOM token.
+  if (ParsingPreprocessorDirective) {
+    BufferPtr = CurPtr;
+    return false;
+  }
+  
+  // Otherwise, eat the \n character.  We don't care if this is a \n\r or
+  // \r\n sequence.
+  ++CurPtr;
+    
+  // The next returned token is at the start of the line.
+  Result.SetFlag(LexerToken::StartOfLine);
+  // No leading whitespace seen so far.
+  Result.ClearFlag(LexerToken::LeadingSpace);
+    
+  // It is common for the tokens immediately after a // comment to be
+  // whitespace (indentation for the next line).  Instead of going through the
+  // big switch, handle it efficiently now.
+  if (isWhitespace(*CurPtr)) {
+    Result.SetFlag(LexerToken::LeadingSpace);
+    return SkipWhitespace(Result, CurPtr+1);
+  }
+
+  BufferPtr = CurPtr;
+  return false;
+
+FoundEOF:   // If we ran off the end of the buffer, return EOF.
+  BufferPtr = CurPtr-1;
+  return false;
+}
+
+/// isEndOfEscapedNewLine - Return true if the specified newline character
+/// (either \n or \r) is part of an escaped newline sequence.  Issue a
+/// diagnostic if so.  We know that the is inside of a block comment.
+bool Lexer::isBlockCommentEndOfEscapedNewLine(const char *CurPtr,
+                                              char &PrevChar) {
+  assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
+  PrevChar = 0;
+  
+  // Back up off the newline.
+  --CurPtr;
+  
+  // If this is a two-character newline sequence, skip the other character.
+  if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+    // \n\n or \r\r -> not escaped newline.
+    if (CurPtr[0] == CurPtr[1])
+      return false;
+    // \n\r or \r\n -> skip the newline.
+    --CurPtr;
+  }
+  
+  // If we have horizontal whitespace, skip over it.  We allow whitespace
+  // between the slash and newline.
+  bool HasSpace = false;
+  while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+    --CurPtr;
+    HasSpace = true;
+  }
+  
+  // If we have a slash, we know this is an escaped newline.
+  if (*CurPtr == '\\') {
+    PrevChar = CurPtr[-1];
+    if (PrevChar != '*') return false;
+  } else {
+    // It isn't a slash, is it the ?? / trigraph?
+    if (*CurPtr != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?')
+      return false;
+    // This is the trigraph.  Emit a stern warning!
+    if ((PrevChar = CurPtr[-3]) != '*') return false;
+    CurPtr -= 2;
+
+    // If no trigraphs are enabled, warn that we ignored this trigraph and
+    // ignore this * character.
+    if (!Features.Trigraphs) {
+      PrevChar = 0;
+      return Diag(CurPtr, diag::trigraph_ignored_block_comment);
+    } else {
+      if (Diag(CurPtr, diag::trigraph_ends_block_comment))
+        return true;
+    }
+  }
+  
+  // Warn about having an escaped newline between the */ characters.
+  if (Diag(CurPtr, diag::escaped_newline_block_comment_end))
+    return true;
+  
+  // If there was space between the backslash and newline, warn about it.
+  if (HasSpace &&
+      Diag(CurPtr, diag::backslash_newline_space))
+    return true;
+  
+  return false;
+}
+
+/// SkipBlockComment - We have just read the /* characters from input.  Read
+/// until we find the */ characters that terminate the comment.  Note that we
+/// don't bother decoding trigraphs or escaped newlines in block comments,
+/// because they cannot cause the comment to end.  The only thing that can
+/// happen is the comment could end with an escaped newline between the */ end
+/// of comment.
+bool Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
+  // Scan one character past where we should, looking for a '/' character.  Once
+  // we find it, check to see if it was preceeded by a *.  This common
+  // optimization helps people who like to put a lot of * characters in their
+  // comments.
+  unsigned char C = *CurPtr++;
+  if (C == 0 && CurPtr == BufferEnd+1) {
+    if (Diag(Result.getStart(), diag::err_unterminated_block_comment))
+      return true;
+    BufferPtr = CurPtr-1;
+    return false;
+  }
+  
+  while (1) {
+    // Skip over all non-interesting characters.
+    // TODO: Vectorize this.  Note: memchr on Darwin is slower than this loop.
+    while (C != '/' && C != '\0')
+      C = *CurPtr++;
+    
+    if (C == '/') {
+      char T;
+      if (CurPtr[-2] == '*')  // We found the final */.  We're done!
+        break;
+      
+      if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
+        char Prev;
+        if (isBlockCommentEndOfEscapedNewLine(CurPtr-2, Prev))
+          return true;
+        if (Prev == '*') {
+          // We found the final */, though it had an escaped newline between the
+          // * and /.  We're done!
+          break;
+        }
+      }
+      if (CurPtr[0] == '*' && CurPtr[1] != '/') {
+        // If this is a /* inside of the comment, emit a warning.  Don't do this
+        // if this is a /*/, which will end the comment.  This misses cases with
+        // embedded escaped newlines, but oh well.
+        if (Diag(CurPtr-1, diag::nested_block_comment))
+          return true;
+      }
+    } else if (C == 0 && CurPtr == BufferEnd+1) {
+      if (Diag(Result.getStart(), diag::err_unterminated_block_comment))
+        return true;
+      // Note: the user probably forgot a */.  We could continue immediately
+      // after the /*, but this would involve lexing a lot of what really is the
+      // comment, which surely would confuse the parser.
+      BufferPtr = CurPtr-1;
+      return false;
+    }
+    C = *CurPtr++;
+  }
+
+  // It is common for the tokens immediately after a /**/ comment to be
+  // whitespace.  Instead of going through the big switch, handle it
+  // efficiently now.
+  if (isHorizontalWhitespace(*CurPtr)) {
+    Result.SetFlag(LexerToken::LeadingSpace);
+    return SkipWhitespace(Result, CurPtr+1);
+  }
+
+  // Otherwise, just return so that the next character will be lexed as a token.
+  BufferPtr = CurPtr;
+  Result.SetFlag(LexerToken::LeadingSpace);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Primary Lexing Entry Points
+//===----------------------------------------------------------------------===//
+
+/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
+/// (potentially) macro expand the filename.
+bool Lexer::LexIncludeFilename(LexerToken &Result) {
+  assert(ParsingPreprocessorDirective &&
+         ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+
+  // We are now parsing a filename!
+  ParsingFilename = true;
+  
+  // There should be exactly two tokens here if everything is good: first the
+  // filename, then the EOM.
+  if (Lex(Result)) return true;
+
+  // We should have gotten the filename now.
+  ParsingFilename = false;
+
+  // No filename?
+  if (Result.getKind() == tok::eom)
+    return Diag(Result.getStart(), diag::err_pp_expects_filename);
+  
+  // Verify that there is nothing after the filename, other than EOM.
+  LexerToken EndTok;
+  if (Lex(EndTok)) return true;
+
+  if (EndTok.getKind() != tok::eom) {
+    if (Diag(Result.getStart(), diag::err_pp_expects_filename))
+      return true;
+    
+    // Lex until the end of the preprocessor directive line.
+    while (EndTok.getKind() != tok::eom) {
+      if (Lex(EndTok)) return true;
+    }
+    
+    Result.SetKind(tok::eom);
+  }
+  
+  // We're done now.
+  return false;
+}
+
+/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
+/// uninterpreted string.  This switches the lexer out of directive mode.
+std::string Lexer::ReadToEndOfLine() {
+  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+  std::string Result;
+  LexerToken Tmp;
+
+  // CurPtr - Cache BufferPtr in an automatic variable.
+  const char *CurPtr = BufferPtr;
+  Tmp.SetStart(CurPtr);
+
+  while (1) {
+    char Char = getAndAdvanceChar(CurPtr, Tmp);
+    switch (Char) {
+    default:
+      Result += Char;
+      break;
+    case 0:  // Null.
+      // Found end of file?
+      if (CurPtr-1 != BufferEnd) {
+        // Nope, normal character, continue.
+        Result += Char;
+        break;
+      }
+      // FALL THROUGH.
+    case '\r':
+    case '\n':
+      // Okay, we found the end of the line. First, back up past the \0, \r, \n.
+      assert(CurPtr[-1] == Char && "Trigraphs for newline?");
+      BufferPtr = CurPtr-1;
+      
+      // Next, lex the character, which should handle the EOM transition.
+      bool Err = Lex(Tmp);
+      assert(Tmp.getKind() == tok::eom && "Unexpected token!");
+      assert(!Err && "Shouldn't have error exiting macro!");
+      
+      // Finally, we're done, return the string we found.
+      return Result;
+    }
+  }
+}
+
+/// LexEndOfFile - CurPtr points to the end of this file.  Handle this
+/// condition, reporting diagnostics and handling other edge cases as required.
+bool Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) {
+  // If we hit the end of the file while parsing a preprocessor directive,
+  // end the preprocessor directive first.  The next token returned will
+  // then be the end of file.
+  if (ParsingPreprocessorDirective) {
+    // Done parsing the "line".
+    ParsingPreprocessorDirective = false;
+    Result.SetKind(tok::eom);
+    // Update the end of token position as well as the BufferPtr instance var.
+    Result.SetEnd(BufferPtr = CurPtr);
+    return false;
+  }        
+
+  // If we are in a #if directive, emit an error.
+  while (!ConditionalStack.empty()) {
+    if (Diag(ConditionalStack.back().IfLoc,
+             diag::err_pp_unterminated_conditional))
+      return true;
+    ConditionalStack.pop_back();
+  }  
+  
+  // If the file was empty or didn't end in a newline, issue a pedwarn.
+  if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r' && 
+      Diag(BufferEnd, diag::ext_no_newline_eof))
+    return true;
+  
+  BufferPtr = CurPtr;
+  return PP.HandleEndOfFile(Result);
+}
+
+
+/// LexTokenInternal - This implements a simple C family lexer.  It is an
+/// extremely performance critical piece of code.  This assumes that the buffer
+/// has a null character at the end of the file.  Return true if an error
+/// occurred and compilation should terminate, false if normal.  This returns a
+/// preprocessing token, not a normal token, as such, it is an internal
+/// interface.  It assumes that the Flags of result have been cleared before
+/// calling this.
+bool Lexer::LexTokenInternal(LexerToken &Result) {
+LexNextToken:
+  // New token, can't need cleaning yet.
+  Result.ClearFlag(LexerToken::NeedsCleaning);
+  
+  // CurPtr - Cache BufferPtr in an automatic variable.
+  const char *CurPtr = BufferPtr;
+  Result.SetStart(CurPtr);
+
+  unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below.
+  
+  // Read a character, advancing over it.
+  char Char = getAndAdvanceChar(CurPtr, Result);
+  switch (Char) {
+  case 0:  // Null.
+    // Found end of file?
+    if (CurPtr-1 == BufferEnd)
+      return LexEndOfFile(Result, CurPtr-1);  // Retreat back into the file.
+    
+    if (Diag(CurPtr-1, diag::null_in_file))
+      return true;
+    Result.SetFlag(LexerToken::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr)) return true;
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+  case '\n':
+  case '\r':
+    // If we are inside a preprocessor directive and we see the end of line,
+    // we know we are done with the directive, so return an EOM token.
+    if (ParsingPreprocessorDirective) {
+      // Done parsing the "line".
+      ParsingPreprocessorDirective = false;
+      
+      // Since we consumed a newline, we are back at the start of a line.
+      IsAtStartOfLine = true;
+      
+      Result.SetKind(tok::eom);
+      break;
+    }
+    // The returned token is at the start of the line.
+    Result.SetFlag(LexerToken::StartOfLine);
+    // No leading whitespace seen so far.
+    Result.ClearFlag(LexerToken::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr)) return true;
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+  case ' ':
+  case '\t':
+  case '\f':
+  case '\v':
+    Result.SetFlag(LexerToken::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr)) return true;
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+
+  case 'L':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+
+    // Wide string literal.
+    if (Char == '"')
+      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+
+    // Wide character constant.
+    if (Char == '\'')
+      return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+    // FALL THROUGH, treating L like the start of an identifier.
+    
+  // C99 6.4.2: Identifiers.
+  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+  case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
+  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+  case 'V': case 'W': case 'X': case 'Y': case 'Z':
+  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+  case 'v': case 'w': case 'x': case 'y': case 'z':
+  case '_':
+    return LexIdentifier(Result, CurPtr);
+    
+  // C99 6.4.4.1: Integer Constants.
+  // C99 6.4.4.2: Floating Constants.
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    return LexNumericConstant(Result, CurPtr);
+    
+  // C99 6.4.4: Character Constants.
+  case '\'':
+    return LexCharConstant(Result, CurPtr);
+
+  // C99 6.4.5: String Literals.
+  case '"':
+    return LexStringLiteral(Result, CurPtr);
+
+  // C99 6.4.6: Punctuators.
+  case '?':
+    Result.SetKind(tok::question);
+    break;
+  case '[':
+    Result.SetKind(tok::l_square);
+    break;
+  case ']':
+    Result.SetKind(tok::r_square);
+    break;
+  case '(':
+    Result.SetKind(tok::l_paren);
+    break;
+  case ')':
+    Result.SetKind(tok::r_paren);
+    break;
+  case '{':
+    Result.SetKind(tok::l_brace);
+    break;
+  case '}':
+    Result.SetKind(tok::r_brace);
+    break;
+  case '.':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char >= '0' && Char <= '9') {
+      return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+    } else if (Features.CPlusPlus && Char == '*') {
+      Result.SetKind(tok::periodstar);
+      CurPtr += SizeTmp;
+    } else if (Char == '.' &&
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
+      Result.SetKind(tok::ellipsis);
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else {
+      Result.SetKind(tok::period);
+    }
+    break;
+  case '&':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '&') {
+      Result.SetKind(tok::ampamp);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.SetKind(tok::ampequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::amp);
+    }
+    break;
+  case '*': 
+    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+      Result.SetKind(tok::starequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::star);
+    }
+    break;
+  case '+':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '+') {
+      Result.SetKind(tok::plusplus);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.SetKind(tok::plusequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::plus);
+    }
+    break;
+  case '-':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '-') {
+      Result.SetKind(tok::minusminus);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '>' && Features.CPlusPlus && 
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {
+      Result.SetKind(tok::arrowstar);  // C++ ->*
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else if (Char == '>') {
+      Result.SetKind(tok::arrow);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.SetKind(tok::minusequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::minus);
+    }
+    break;
+  case '~':
+    Result.SetKind(tok::tilde);
+    break;
+  case '!':
+    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+      Result.SetKind(tok::exclaimequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::exclaim);
+    }
+    break;
+  case '/':
+    // 6.4.9: Comments
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '/') {         // BCPL comment.
+      Result.SetFlag(LexerToken::LeadingSpace);
+      if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+        return true;
+      goto LexNextToken;   // GCC isn't tail call eliminating.
+    } else if (Char == '*') {  // /**/ comment.
+      Result.SetFlag(LexerToken::LeadingSpace);
+      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+        return true;
+      goto LexNextToken;   // GCC isn't tail call eliminating.
+    } else if (Char == '=') {
+      Result.SetKind(tok::slashequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::slash);
+    }
+    break;
+  case '%':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.SetKind(tok::percentequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == '>') {
+      Result.SetKind(tok::r_brace);    // '%>' -> '}'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == ':') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      if (getCharAndSize(CurPtr, SizeTmp) == '%' &&
+          getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
+        Result.SetKind(tok::hashhash);   // '%:%:' -> '##'
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
+      } else {
+        Result.SetKind(tok::hash);       // '%:' -> '#'
+        
+        // We parsed a # character.  If this occurs at the start of the line,
+        // it's actually the start of a preprocessing directive.  Callback to
+        // the preprocessor to handle it.
+        // FIXME: -fpreprocessed mode??
+        if (Result.isAtStartOfLine() && !PP.isSkipping()) {
+          BufferPtr = CurPtr;
+          if (PP.HandleDirective(Result)) return true;
+          
+          // As an optimization, if the preprocessor didn't switch lexers, tail
+          // recurse.
+          if (PP.isCurrentLexer(this)) {
+            // Start a new token. If this is a #include or something, the PP may
+            // want us starting at the beginning of the line again.  If so, set
+            // the StartOfLine flag.
+            if (IsAtStartOfLine) {
+              Result.SetFlag(LexerToken::StartOfLine);
+              IsAtStartOfLine = false;
+            }
+            goto LexNextToken;   // GCC isn't tail call eliminating.
+          }
+          
+          return PP.Lex(Result);
+        }
+      }
+    } else {
+      Result.SetKind(tok::percent);
+    }
+    break;
+  case '<':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (ParsingFilename) {
+      return LexAngledStringLiteral(Result, CurPtr+SizeTmp);
+    } else if (Char == '<' &&
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
+      Result.SetKind(tok::lesslessequal);
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else if (Char == '<') {
+      Result.SetKind(tok::lessless);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Result.SetKind(tok::lessequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == ':') {
+      Result.SetKind(tok::l_square); // '<:' -> '['
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == '>') {
+      Result.SetKind(tok::l_brace); // '<%' -> '{'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.CPPMinMax && Char == '?') {     // <?
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      if (Diag(Result.getStart(), diag::min_max_deprecated))
+        return true;
+
+      if (getCharAndSize(CurPtr, SizeTmp) == '=') {     // <?= 
+        Result.SetKind(tok::lessquestionequal);
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      } else {
+        Result.SetKind(tok::lessquestion);
+      }
+    } else {
+      Result.SetKind(tok::less);
+    }
+    break;
+  case '>':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.SetKind(tok::greaterequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '>' && 
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
+      Result.SetKind(tok::greatergreaterequal);
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else if (Char == '>') {
+      Result.SetKind(tok::greatergreater);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.CPPMinMax && Char == '?') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      if (Diag(Result.getStart(), diag::min_max_deprecated))
+        return true;
+
+      if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+        Result.SetKind(tok::greaterquestionequal);    // >?=
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      } else {
+        Result.SetKind(tok::greaterquestion);         // >?
+      }
+    } else {
+      Result.SetKind(tok::greater);
+    }
+    break;
+  case '^':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.SetKind(tok::caretequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::caret);
+    }
+    break;
+  case '|':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.SetKind(tok::pipeequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '|') {
+      Result.SetKind(tok::pipepipe);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::pipe);
+    }
+    break;
+  case ':':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Features.Digraphs && Char == '>') {
+      Result.SetKind(tok::r_square); // ':>' -> ']'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.CPlusPlus && Char == ':') {
+      Result.SetKind(tok::coloncolon);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {    
+      Result.SetKind(tok::colon);
+    }
+    break;
+  case ';':
+    Result.SetKind(tok::semi);
+    break;
+  case '=':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Result.SetKind(tok::equalequal);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {      
+      Result.SetKind(tok::equal);
+    }
+    break;
+  case ',':
+    Result.SetKind(tok::comma);
+    break;
+  case '#':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '#') {
+      Result.SetKind(tok::hashhash);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Result.SetKind(tok::hash);
+      // We parsed a # character.  If this occurs at the start of the line,
+      // it's actually the start of a preprocessing directive.  Callback to
+      // the preprocessor to handle it.
+      // FIXME: not in preprocessed mode??
+      if (Result.isAtStartOfLine() && !PP.isSkipping()) {
+        BufferPtr = CurPtr;
+        if (PP.HandleDirective(Result)) return true;
+        
+        // As an optimization, if the preprocessor didn't switch lexers, tail
+        // recurse.
+        if (PP.isCurrentLexer(this)) {
+          // Start a new token.  If this is a #include or something, the PP may
+          // want us starting at the beginning of the line again.  If so, set
+          // the StartOfLine flag.
+          if (IsAtStartOfLine) {
+            Result.SetFlag(LexerToken::StartOfLine);
+            IsAtStartOfLine = false;
+          }
+          goto LexNextToken;   // GCC isn't tail call eliminating.
+        }
+        return PP.Lex(Result);
+      }
+    }
+    break;
+
+  case '\\':
+    // FIXME: handle UCN's.
+    // FALL THROUGH.
+  default:
+    // Objective C support.
+    if (CurPtr[-1] == '@' && Features.ObjC1) {
+      Result.SetKind(tok::at);
+      break;
+    } else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers.
+      if (Diag(CurPtr-1, diag::ext_dollar_in_identifier))
+        return true;
+      return LexIdentifier(Result, CurPtr);
+    }
+    
+    if (!PP.isSkipping() && Diag(CurPtr-1, diag::err_stray_character))
+      return true;
+    BufferPtr = CurPtr;
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+  }
+  
+  // Update the end of token position as well as the BufferPtr instance var.
+  Result.SetEnd(BufferPtr = CurPtr);
+  return false;
+}

Propchange: cfe/cfe/trunk/Lex/Lexer.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Lex/Lexer.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Lex/MacroExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/MacroExpander.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Lex/MacroExpander.cpp (added)
+++ cfe/cfe/trunk/Lex/MacroExpander.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,42 @@
+//===--- MacroExpander.cpp - Lex from a macro expansion -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MacroExpander interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/MacroExpander.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+using namespace llvm;
+using namespace clang;
+
+/// Lex - Lex and return a token from this macro stream.
+bool MacroExpander::Lex(LexerToken &Tok) {
+  // Lexing off the end of the macro, pop this macro off the expansion stack.
+  if (CurToken == Macro.getNumTokens())
+    return PP.HandleEndOfMacro(Tok);
+  
+  // Get the next token to return.
+  Tok = Macro.getReplacementToken(CurToken++);
+
+  // If this is the first token, set the lexical properties of the token to
+  // match the lexical properties of the macro identifier.
+  if (CurToken == 1) {
+    Tok.SetFlagValue(LexerToken::StartOfLine , AtStartOfLine);
+    Tok.SetFlagValue(LexerToken::LeadingSpace, HasLeadingSpace);
+  }
+  
+  // Handle recursive expansion!
+  if (Tok.getIdentifierInfo())
+    return PP.HandleIdentifier(Tok);
+
+  // Otherwise, return a normal token.
+  return false;
+}

Propchange: cfe/cfe/trunk/Lex/MacroExpander.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Lex/MacroExpander.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Lex/MacroInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/MacroInfo.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Lex/MacroInfo.cpp (added)
+++ cfe/cfe/trunk/Lex/MacroInfo.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,28 @@
+//===--- MacroInfo.cpp - Information about #defined identifiers -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MacroInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/MacroInfo.h"
+#include <iostream>
+using namespace llvm;
+using namespace clang;
+
+/// dump - Print the macro to stderr, used for debugging.
+///
+void MacroInfo::dump() const {
+  std::cerr << "MACRO: ";
+  for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
+    ReplacementTokens[i].dump();
+    std::cerr << "  ";
+  }
+  std::cerr << "\n";
+}

Propchange: cfe/cfe/trunk/Lex/MacroInfo.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Lex/MacroInfo.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Lex/Makefile
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Makefile?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Lex/Makefile (added)
+++ cfe/cfe/trunk/Lex/Makefile Wed Jul 11 11:22:17 2007
@@ -0,0 +1,21 @@
+##===- clang/Lex/Makefile ----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file was developed by Chris Lattner and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+#  This implements the Lexer library for the C-Language front-end.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME := clangLex
+BUILD_ARCHIVE = 1
+
+CPPFLAGS += -I$(LEVEL)/tools/clang/include
+
+include $(LEVEL)/Makefile.common
+

Propchange: cfe/cfe/trunk/Lex/Makefile

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Lex/Makefile

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Lex/PPExpressions.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/PPExpressions.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Lex/PPExpressions.cpp (added)
+++ cfe/cfe/trunk/Lex/PPExpressions.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,367 @@
+//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Preprocessor::EvaluateDirectiveExpression method.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: implement testing for asserts.
+// FIXME: Parse integer constants correctly.  Reject 123.0, etc.
+// FIXME: Track signed/unsigned correctly.
+// FIXME: Track and report integer overflow correctly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Basic/Diagnostic.h"
+using namespace llvm;
+using namespace clang;
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive.  Sets Result to the result of
+/// the expression.  Returns false normally, true if lexing must be aborted.
+///
+/// MinPrec is the minimum precedence that this range of the expression is
+/// allowed to include.
+bool Preprocessor::EvaluateDirectiveExpression(bool &Result) {
+  // Peek ahead one token.
+  LexerToken Tok;
+  if (Lex(Tok)) return true;
+
+  // In error cases, bail out with false value.
+  Result = false;
+  
+  bool StopParse = false;
+  
+  int ResVal = 0;
+  if (EvaluateValue(ResVal, Tok, StopParse)) {
+    // Skip the rest of the macro line.
+    if (!StopParse && Tok.getKind() != tok::eom)
+      StopParse |= DiscardUntilEndOfDirective();
+    return StopParse;
+  }
+  
+  if (EvaluateDirectiveSubExpr(ResVal, 1, Tok, StopParse)) {
+    // Skip the rest of the macro line.
+    if (!StopParse && Tok.getKind() != tok::eom)
+      StopParse |= DiscardUntilEndOfDirective();
+    return StopParse;
+  }
+  
+  // If we aren't at the tok::eom token, something bad happened, like an extra
+  // ')' token.
+  if (Tok.getKind() != tok::eom) {
+    return Diag(Tok, diag::err_pp_expected_eol) ||
+           DiscardUntilEndOfDirective();
+  }
+  
+  Result = ResVal != 0;
+  return false;
+}
+
+/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and
+/// return the computed value in Result.  Return true if there was an error
+/// parsing, setting StopParse if parsing should be aborted.
+bool Preprocessor::EvaluateValue(int &Result, LexerToken &PeekTok, 
+                                 bool &StopParse) {
+  Result = 0;
+  
+  // If this token's spelling is a pp-identifier, check to see if it is
+  // 'defined' or if it is a macro.  Note that we check here because many
+  // keywords are pp-identifiers, so we can't check the kind.
+  if (const IdentifierTokenInfo *II = PeekTok.getIdentifierInfo()) {
+    // If this identifier isn't 'defined' and it wasn't macro expanded, it turns
+    // into a simple 0.
+    if (strcmp(II->getName(), "defined")) {
+      Result = 0;
+      return (StopParse = Lex(PeekTok));
+    }
+
+    // Handle "defined X" and "defined(X)".
+    assert(!DisableMacroExpansion &&
+           "How could macro exp already be disabled?");
+    // Turn off macro expansion.
+    DisableMacroExpansion = true;
+
+    // Get the next token.
+    if ((StopParse = Lex(PeekTok))) return true;
+
+    // Two options, it can either be a pp-identifier or a (.
+    bool InParens = false;
+    if (PeekTok.getKind() == tok::l_paren) {
+      // Found a paren, remember we saw it and skip it.
+      InParens = true;
+      if ((StopParse = Lex(PeekTok))) return true;
+    }
+    
+    // If we don't have a pp-identifier now, this is an error.
+    if ((II = PeekTok.getIdentifierInfo()) == 0) {
+      DisableMacroExpansion = false;
+      StopParse = Diag(PeekTok, diag::err_pp_defined_requires_identifier);
+      return true;
+    }
+    
+    // Otherwise, we got an identifier, is it defined to something?
+    Result = II->getMacroInfo() != 0;
+
+    // Consume identifier.
+    if ((StopParse = Lex(PeekTok))) return true;
+
+    // If we are in parens, ensure we have a trailing ).
+    if (InParens) {
+      if (PeekTok.getKind() != tok::r_paren) {
+        StopParse = Diag(PeekTok, diag::err_pp_missing_rparen);
+        return true;
+      }
+      // Consume the ).
+      if ((StopParse = Lex(PeekTok))) return true;
+    }
+    
+    DisableMacroExpansion = false;
+    return false;
+  }
+  
+  switch (PeekTok.getKind()) {
+  default:  // Non-value token.
+    StopParse = Diag(PeekTok, diag::err_pp_expr_bad_token);
+    return true;
+  case tok::eom:
+  case tok::r_paren:
+    // If there is no expression, report and exit.
+    StopParse = Diag(PeekTok, diag::err_pp_expected_value_in_expr);
+    return true;
+  case tok::numeric_constant: {
+    // FIXME: faster.  FIXME: track signs.
+    std::string Spell = Lexer::getSpelling(PeekTok, getLangOptions());
+    // FIXME: COMPUTE integer constants CORRECTLY.
+    Result = atoi(Spell.c_str());
+    return (StopParse = Lex(PeekTok));
+  }
+  case tok::l_paren:
+    if (StopParse = Lex(PeekTok)) return true;  // Eat the (.
+    // Parse the value.
+    if (EvaluateValue(Result, PeekTok, StopParse)) return true;
+      
+    // If there are any binary operators involved, parse them.
+    if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, StopParse))
+      return StopParse;
+
+    if (PeekTok.getKind() != tok::r_paren) {
+      StopParse = Diag(PeekTok, diag::err_pp_expected_rparen);
+      return true;
+    }
+    if (StopParse = Lex(PeekTok)) return true;  // Eat the ).
+    return false;
+ 
+  case tok::plus:
+    // Unary plus doesn't modify the value.
+    if (StopParse = Lex(PeekTok)) return true;
+    return EvaluateValue(Result, PeekTok, StopParse);
+  case tok::minus:
+    if (StopParse = Lex(PeekTok)) return true;
+    if (EvaluateValue(Result, PeekTok, StopParse)) return true;
+    Result = -Result;
+    return false;
+    
+  case tok::tilde:
+    if (StopParse = Lex(PeekTok)) return true;
+    if (EvaluateValue(Result, PeekTok, StopParse)) return true;
+    Result = ~Result;
+    return false;
+    
+  case tok::exclaim:
+    if (StopParse = Lex(PeekTok)) return true;
+    if (EvaluateValue(Result, PeekTok, StopParse)) return true;
+    Result = !Result;
+    return false;
+    
+  // FIXME: Handle #assert
+  }
+}
+
+
+
+/// getPrecedence - Return the precedence of the specified binary operator
+/// token.  This returns:
+///   ~0 - Invalid token.
+///   15 - *,/,%
+///   14 - -,+
+///   13 - <<,>>
+///   12 - >=, <=, >, <
+///   11 - ==, !=
+///   10 - <?, >?           min, max (GCC extensions)
+///    9 - &
+///    8 - ^
+///    7 - |
+///    6 - &&
+///    5 - ||
+///    4 - ?
+///    3 - :
+///    0 - eom, )
+static unsigned getPrecedence(tok::TokenKind Kind) {
+  switch (Kind) {
+  default: return ~0U;
+  case tok::percent:
+  case tok::slash:
+  case tok::star:                 return 15;
+  case tok::plus:
+  case tok::minus:                return 14;
+  case tok::lessless:
+  case tok::greatergreater:       return 13;
+  case tok::lessequal:
+  case tok::less:
+  case tok::greaterequal:
+  case tok::greater:              return 12;
+  case tok::exclaimequal:
+  case tok::equalequal:           return 11;
+  case tok::lessquestion:
+  case tok::greaterquestion:      return 10;
+  case tok::amp:                  return 9;
+  case tok::caret:                return 8;
+  case tok::pipe:                 return 7;
+  case tok::ampamp:               return 6;
+  case tok::pipepipe:             return 5;
+  case tok::question:             return 4;
+  case tok::colon:                return 3;
+  case tok::comma:                return 2;
+  case tok::r_paren:              return 0;   // Lowest priority, end of expr.
+  case tok::eom:                  return 0;   // Lowest priority, end of macro.
+  }
+}
+
+
+/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is
+/// PeekTok, and whose precedence is PeekPrec.
+bool Preprocessor::EvaluateDirectiveSubExpr(int &LHS, unsigned MinPrec,
+                                            LexerToken &PeekTok,
+                                            bool &StopParse) {
+  unsigned PeekPrec = getPrecedence(PeekTok.getKind());
+  // If this token isn't valid, report the error.
+  if (PeekPrec == ~0U) {
+    StopParse = Diag(PeekTok, diag::err_pp_expr_bad_token);
+    return true;
+  }
+  
+  while (1) {
+    // If this token has a lower precedence than we are allowed to parse, return
+    // it so that higher levels of the recursion can parse it.
+    if (PeekPrec < MinPrec)
+      return false;
+    
+    tok::TokenKind Operator = PeekTok.getKind();
+
+    // Consume the operator, saving the operator token for error reporting.
+    LexerToken OpToken = PeekTok;
+    if (StopParse = Lex(PeekTok)) return true;
+
+    int RHS;
+    // Parse the RHS of the operator.
+    if (EvaluateValue(RHS, PeekTok, StopParse)) return true;
+
+    // Remember the precedence of this operator and get the precedence of the
+    // operator immediately to the right of the RHS.
+    unsigned ThisPrec = PeekPrec;
+    PeekPrec = getPrecedence(PeekTok.getKind());
+
+    // If this token isn't valid, report the error.
+    if (PeekPrec == ~0U) {
+      StopParse = Diag(PeekTok, diag::err_pp_expr_bad_token);
+      return true;
+    }
+    
+    bool isRightAssoc = Operator == tok::question;
+    
+    // Get the precedence of the operator to the right of the RHS.  If it binds
+    // more tightly with RHS than we do, evaluate it completely first.
+    if (ThisPrec < PeekPrec ||
+        (ThisPrec == PeekPrec && isRightAssoc)) {
+      if (EvaluateDirectiveSubExpr(RHS, ThisPrec+1, PeekTok, StopParse))
+        return true;
+      PeekPrec = getPrecedence(PeekTok.getKind());
+    }
+    assert(PeekPrec <= ThisPrec && "Recursion didn't work!");
+    
+    switch (Operator) {
+    default: assert(0 && "Unknown operator token!");
+    case tok::percent:
+      if (RHS == 0) {
+        StopParse = Diag(OpToken, diag::err_pp_remainder_by_zero);
+        return true;
+      }
+      LHS %= RHS;
+      break;
+    case tok::slash:
+      if (RHS == 0) {
+        StopParse = Diag(OpToken, diag::err_pp_division_by_zero);
+        return true;
+      }
+      LHS /= RHS;
+      break;
+    case tok::star :           LHS *= RHS; break;
+    case tok::lessless:        LHS << RHS; break;  // FIXME: shift amt overflow?
+    case tok::greatergreater:  LHS >> RHS; break;  // FIXME: signed vs unsigned
+    case tok::plus :           LHS += RHS; break;
+    case tok::minus:           LHS -= RHS; break;
+    case tok::lessequal:       LHS = LHS <= RHS; break;
+    case tok::less:            LHS = LHS <  RHS; break;
+    case tok::greaterequal:    LHS = LHS >= RHS; break;
+    case tok::greater:         LHS = LHS >  RHS; break;
+    case tok::exclaimequal:    LHS = LHS != RHS; break;
+    case tok::equalequal:      LHS = LHS == RHS; break;
+    case tok::lessquestion:    // Deprecation warning emitted by the lexer.
+      LHS = std::min(LHS, RHS);
+      break; 
+    case tok::greaterquestion: // Deprecation warning emitted by the lexer.
+      LHS = std::max(LHS, RHS);
+      break;
+    case tok::amp:             LHS &= RHS; break;
+    case tok::caret:           LHS ^= RHS; break;
+    case tok::pipe:            LHS |= RHS; break;
+    case tok::ampamp:          LHS = LHS && RHS; break;
+    case tok::pipepipe:        LHS = LHS || RHS; break;
+    case tok::comma:
+      if ((StopParse = Diag(OpToken, diag::ext_pp_comma_expr)))
+        return true;
+      LHS = RHS; // LHS = LHS,RHS -> RHS.
+      break; 
+    case tok::question: {
+      // Parse the : part of the expression.
+      if (PeekTok.getKind() != tok::colon) {
+        StopParse = Diag(OpToken, diag::err_pp_question_without_colon);
+        return true;
+      }
+      // Consume the :.
+      if (StopParse = Lex(PeekTok)) return true;
+
+      // Evaluate the value after the :.
+      int AfterColonVal = 0;
+      if (EvaluateValue(AfterColonVal, PeekTok, StopParse)) return true;
+
+      // Parse anything after the : RHS that has a higher precedence than ?.
+      if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec+1,
+                                   PeekTok, StopParse))
+        return true;
+      
+      // Now that we have the condition, the LHS and the RHS of the :, evaluate.
+      LHS = LHS ? RHS : AfterColonVal;
+      
+      // Figure out the precedence of the token after the : part.
+      PeekPrec = getPrecedence(PeekTok.getKind());
+      break;
+    }
+    case tok::colon:
+      // Don't allow :'s to float around without being part of ?: exprs.
+      StopParse = Diag(OpToken, diag::err_pp_colon_without_question);
+      return true;
+    }
+  }
+  
+  return false;
+}

Propchange: cfe/cfe/trunk/Lex/PPExpressions.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Lex/PPExpressions.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Preprocessor.cpp?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Lex/Preprocessor.cpp (added)
+++ cfe/cfe/trunk/Lex/Preprocessor.cpp Wed Jul 11 11:22:17 2007
@@ -0,0 +1,1121 @@
+//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Preprocessor interface.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: GCC Diagnostics emitted by the lexer:
+//
+// ERROR  : __VA_ARGS__ can only appear in the expansion of a C99 variadic macro
+//
+// Options to support:
+//   -H       - Print the name of each header file used.
+//   -C -CC   - Do not discard comments for cpp.
+//   -P       - Do not emit #line directives.
+//   -d[MDNI] - Dump various things.
+//   -fworking-directory - #line's with preprocessor's working dir.
+//   -fpreprocessed
+//   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
+//   -W*
+//   -w
+//
+// Messages to emit:
+//   "Multiple include guards may be useful for:\n"
+//
+// TODO: Implement the include guard optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include <iostream>
+using namespace llvm;
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+
+Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, 
+                           FileManager &FM, SourceManager &SM) 
+  : Diags(diags), Features(opts), FileMgr(FM), SourceMgr(SM),
+    SystemDirIdx(0), NoCurDirSearch(false),
+    CurLexer(0), CurNextDirLookup(0), CurMacroExpander(0) {
+  // Clear stats.
+  NumDirectives = NumIncluded = NumDefined = NumUndefined = NumPragma = 0;
+  NumIf = NumElse = NumEndif = 0;
+  NumEnteredSourceFiles = NumMacroExpanded = NumFastMacroExpanded = 0;
+  MaxIncludeStackDepth = MaxMacroStackDepth = 0;
+  NumSkipped = 0;
+      
+  // Macro expansion is enabled.
+  DisableMacroExpansion = false;
+  SkippingContents = false;
+}
+
+Preprocessor::~Preprocessor() {
+  // Free any active lexers.
+  delete CurLexer;
+  
+  while (!IncludeStack.empty()) {
+    delete IncludeStack.back().TheLexer;
+    IncludeStack.pop_back();
+  }
+}
+
+/// getFileInfo - Return the PerFileInfo structure for the specified
+/// FileEntry.
+Preprocessor::PerFileInfo &Preprocessor::getFileInfo(const FileEntry *FE) {
+  if (FE->getUID() >= FileInfo.size())
+    FileInfo.resize(FE->getUID()+1);
+  return FileInfo[FE->getUID()];
+}  
+
+
+/// AddKeywords - Add all keywords to the symbol table.
+///
+void Preprocessor::AddKeywords() {
+  enum {
+    C90Shift = 0,
+    EXTC90   = 1 << C90Shift,
+    NOTC90   = 2 << C90Shift,
+    C99Shift = 2,
+    EXTC99   = 1 << C99Shift,
+    NOTC99   = 2 << C99Shift,
+    CPPShift = 4,
+    EXTCPP   = 1 << CPPShift,
+    NOTCPP   = 2 << CPPShift,
+    Mask     = 3
+  };
+
+  // Add keywords and tokens for the current language.
+#define KEYWORD(NAME, FLAGS) \
+  AddKeyword(#NAME+1, tok::kw##NAME,     \
+             (FLAGS >> C90Shift) & Mask, \
+             (FLAGS >> C99Shift) & Mask, \
+             (FLAGS >> CPPShift) & Mask);
+#define ALIAS(NAME, TOK) \
+  AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0);
+#include "clang/Basic/TokenKinds.def"
+}
+
+/// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
+/// the specified LexerToken's location, translating the token's start
+/// position in the current buffer into a SourcePosition object for rendering.
+bool Preprocessor::Diag(SourceLocation Loc, unsigned DiagID, 
+                        const std::string &Msg) {
+  // If we are in a '#if 0' block, don't emit any diagnostics for notes,
+  // warnings or extensions.
+  if (isSkipping() && Diagnostic::isNoteWarningOrExtension(DiagID))
+    return false;
+  
+  return Diags.Report(Loc, DiagID, Msg);
+}
+bool Preprocessor::Diag(const LexerToken &Tok, unsigned DiagID,
+                        const std::string &Msg) {
+  // If we are in a '#if 0' block, don't emit any diagnostics for notes,
+  // warnings or extensions.
+  if (isSkipping() && Diagnostic::isNoteWarningOrExtension(DiagID))
+    return false;
+  
+  return Diag(Tok.getSourceLocation(), DiagID, Msg);
+}
+
+void Preprocessor::PrintStats() {
+  std::cerr << "\n*** Preprocessor Stats:\n";
+  std::cerr << FileInfo.size() << " files tracked.\n";
+  unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
+  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+    NumOnceOnlyFiles += FileInfo[i].isImport;
+    if (MaxNumIncludes < FileInfo[i].NumIncludes)
+      MaxNumIncludes = FileInfo[i].NumIncludes;
+    NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
+  }
+  std::cerr << "  " << NumOnceOnlyFiles << " #import/#pragma once files.\n";
+  std::cerr << "  " << NumSingleIncludedFiles << " included exactly once.\n";
+  std::cerr << "  " << MaxNumIncludes << " max times a file is included.\n";
+  
+  std::cerr << NumDirectives << " directives found:\n";
+  std::cerr << "  " << NumDefined << " #define.\n";
+  std::cerr << "  " << NumUndefined << " #undef.\n";
+  std::cerr << "  " << NumIncluded << " #include/#include_next/#import.\n";
+  std::cerr << "    " << NumEnteredSourceFiles << " source files entered.\n";
+  std::cerr << "    " << MaxIncludeStackDepth << " max include stack depth\n";
+  std::cerr << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
+  std::cerr << "  " << NumElse << " #else/#elif.\n";
+  std::cerr << "  " << NumEndif << " #endif.\n";
+  std::cerr << "  " << NumPragma << " #pragma.\n";
+  std::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
+
+  std::cerr << NumMacroExpanded << " macros expanded, "
+            << NumFastMacroExpanded << " on the fast path.\n";
+  if (MaxMacroStackDepth > 1)
+    std::cerr << "  " << MaxMacroStackDepth << " max macroexpand stack depth\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Source File Location Methods.
+//===----------------------------------------------------------------------===//
+
+
+/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+/// return null on failure.  isAngled indicates whether the file reference is
+/// for system #include's or not (i.e. using <> instead of "").
+const FileEntry *Preprocessor::LookupFile(const std::string &Filename, 
+                                          bool isSystem,
+                                          const DirectoryLookup *FromDir,
+                                          const DirectoryLookup *&NextDir) {
+  assert(CurLexer && "Cannot enter a #include inside a macro expansion!");
+  NextDir = 0;
+  
+  // If 'Filename' is absolute, check to see if it exists and no searching.
+  // FIXME: this should be a sys::Path interface, this doesn't handle things
+  // like C:\foo.txt right, nor win32 \\network\device\blah.
+  if (Filename[0] == '/') {
+    // If this was an #include_next "/absolute/file", fail.
+    if (FromDir) return 0;
+
+    // Otherwise, just return the file.
+    return FileMgr.getFile(Filename);
+  }
+  
+  // Step #0, unless disabled, check to see if the file is in the #includer's
+  // directory.  This search is not done for <> headers.
+  if (!isSystem && !FromDir && !NoCurDirSearch) {
+    const FileEntry *CurFE = 
+      SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID());
+    if (CurFE) {
+      if (const FileEntry *FE = 
+            FileMgr.getFile(CurFE->getDir()->getName()+"/"+Filename)) {
+        if (CurNextDirLookup)
+          NextDir = CurNextDirLookup;
+        else
+          NextDir = &SearchDirs[0];
+        return FE;
+      }
+    }
+  }
+  
+  // If this is a system #include, ignore the user #include locs.
+  unsigned i = isSystem ? SystemDirIdx : 0;
+
+  // If this is a #include_next request, start searching after the directory the
+  // file was found in.
+  if (FromDir)
+    i = FromDir-&SearchDirs[0];
+  
+  // Check each directory in sequence to see if it contains this file.
+  for (; i != SearchDirs.size(); ++i) {
+    // Concatenate the requested file onto the directory.
+    // FIXME: should be in sys::Path.
+    if (const FileEntry *FE = 
+          FileMgr.getFile(SearchDirs[i].getDir()->getName()+"/"+Filename)) {
+      NextDir = &SearchDirs[i+1];
+      return FE;
+    }
+  }
+  
+  // Otherwise, didn't find it.
+  return 0;
+}
+
+/// EnterSourceFile - Add a source file to the top of the include stack and
+/// start lexing tokens from it instead of the current buffer.  Return true
+/// on failure.
+void Preprocessor::EnterSourceFile(unsigned FileID,
+                                   const DirectoryLookup *NextDir) {
+  ++NumEnteredSourceFiles;
+  
+  // Add the current lexer to the include stack.
+  if (CurLexer) {
+    IncludeStack.push_back(IncludeStackInfo(CurLexer, CurNextDirLookup));
+  } else {
+    assert(CurMacroExpander == 0 && "Cannot #include a file inside a macro!");
+  }
+
+  if (MaxIncludeStackDepth < IncludeStack.size())
+    MaxIncludeStackDepth = IncludeStack.size();
+  
+  const SourceBuffer *Buffer = SourceMgr.getBuffer(FileID);
+  
+  CurLexer         = new Lexer(Buffer, FileID, *this);
+  CurNextDirLookup = NextDir;
+}
+
+/// EnterMacro - Add a Macro to the top of the include stack and start lexing
+/// tokens from it instead of the current buffer.  Return true on failure.
+bool Preprocessor::EnterMacro(LexerToken &Tok) {
+  IdentifierTokenInfo *Identifier = Tok.getIdentifierInfo();
+  MacroInfo &MI = *Identifier->getMacroInfo();
+  SourceLocation ExpandLoc = Tok.getSourceLocation();
+  unsigned MacroID = SourceMgr.getMacroID(Identifier, ExpandLoc);
+  if (CurLexer) {
+    IncludeStack.push_back(IncludeStackInfo(CurLexer, CurNextDirLookup));
+    CurLexer         = 0;
+    CurNextDirLookup = 0;
+  } else if (CurMacroExpander) {
+    MacroStack.push_back(CurMacroExpander);
+  }
+
+  if (MaxMacroStackDepth < MacroStack.size())
+    MaxMacroStackDepth = MacroStack.size();
+  
+  // TODO: Figure out arguments.
+  
+  // Mark the macro as currently disabled, so that it is not recursively
+  // expanded.
+  MI.DisableMacro();
+  
+  CurMacroExpander = new MacroExpander(MI, MacroID, *this,
+                                       Tok.isAtStartOfLine(), 
+                                       Tok.hasLeadingSpace());
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Event Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleIdentifier - This callback is invoked when the lexer reads an
+/// identifier.  This callback looks up the identifier in the map and/or
+/// potentially macro expands it or turns it into a named token (like 'for').
+bool Preprocessor::HandleIdentifier(LexerToken &Identifier) {
+  if (Identifier.getIdentifierInfo() == 0) {
+    // If we are skipping tokens (because we are in a #if 0 block), there will
+    // be no identifier info, just return the token.
+    assert(isSkipping() && "Token isn't an identifier?");
+    return false;
+  }
+  IdentifierTokenInfo &ITI = *Identifier.getIdentifierInfo();
+  
+  // FIXME: Check for poisoning in ITI?
+  
+  if (MacroInfo *MI = ITI.getMacroInfo()) {
+    if (MI->isEnabled() && !DisableMacroExpansion) {
+      ++NumMacroExpanded;
+      // If we started lexing a macro, enter the macro expansion body.
+      // FIXME: Read/Validate the argument list here!
+      
+      // If this macro expands to no tokens, don't bother to push it onto the
+      // expansion stack, only to take it right back off.
+      if (MI->getNumTokens() == 0) {
+        // Ignore this macro use, just return the next token in the current
+        // buffer.
+        bool HadLeadingSpace = Identifier.hasLeadingSpace();
+        bool IsAtStartOfLine = Identifier.isAtStartOfLine();
+        
+        if (Lex(Identifier)) return true;
+        
+        // If the identifier isn't on some OTHER line, inherit the leading
+        // whitespace/first-on-a-line property of this token.  This handles
+        // stuff like "! XX," -> "! ," and "   XX," -> "    ,", when XX is
+        // empty.
+        if (!Identifier.isAtStartOfLine()) {
+          if (IsAtStartOfLine) Identifier.SetFlag(LexerToken::StartOfLine);
+          if (HadLeadingSpace) Identifier.SetFlag(LexerToken::LeadingSpace);
+        }
+        ++NumFastMacroExpanded;
+        return false;
+        
+      } else if (MI->getNumTokens() == 1 &&
+                 // Don't handle identifiers, which might need recursive
+                 // expansion.
+                 MI->getReplacementToken(0).getIdentifierInfo() == 0) {
+        // FIXME: Function-style macros only if no arguments?
+
+        // Otherwise, if this macro expands into a single trivially-expanded
+        // token: expand it now.  This handles common cases like 
+        // "#define VAL 42".
+        
+        // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
+        // identifier to the expanded token.
+        bool isAtStartOfLine = Identifier.isAtStartOfLine();
+        bool hasLeadingSpace = Identifier.hasLeadingSpace();
+
+        // Replace the result token.
+        Identifier = MI->getReplacementToken(0);
+
+        // Restore the StartOfLine/LeadingSpace markers.
+        Identifier.SetFlagValue(LexerToken::StartOfLine , isAtStartOfLine);
+        Identifier.SetFlagValue(LexerToken::LeadingSpace, hasLeadingSpace);
+        
+        // FIXME: Get correct macro expansion stack location info!
+        
+        // Since this is not an identifier token, it can't be macro expanded, so
+        // we're done.
+        ++NumFastMacroExpanded;
+        return false;
+      }
+    
+      // Start expanding the macro (FIXME, pass arguments).
+      if (EnterMacro(Identifier))
+        return true;
+    
+      // Now that the macro is at the top of the include stack, ask the
+      // preprocessor to read the next token from it.
+      return Lex(Identifier);
+    }
+  }
+
+  // Change the kind of this identifier to the appropriate token kind, e.g.
+  // turning "for" into a keyword.
+  Identifier.SetKind(ITI.getTokenID());
+    
+  // If this is an extension token, diagnose its use.
+  if (ITI.isExtensionToken() && Diag(Identifier, diag::ext_token_used))
+    return true;
+  return false;  
+}
+
+/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
+/// the current file.  This either returns the EOF token or pops a level off
+/// the include stack and keeps going.
+bool Preprocessor::HandleEndOfFile(LexerToken &Result) {
+  assert(!CurMacroExpander &&
+         "Ending a file when currently in a macro!");
+  
+  // If we are in a #if 0 block skipping tokens, and we see the end of the file,
+  // this is an error condition.  Just return the EOF token up to
+  // SkipExcludedConditionalBlock.  The Lexer will have already have issued
+  // errors for the unterminated #if's on the conditional stack.
+  if (isSkipping()) {
+    Result.StartToken(CurLexer);
+    Result.SetKind(tok::eof);
+    Result.SetStart(CurLexer->BufferEnd);
+    Result.SetEnd(CurLexer->BufferEnd);
+    return false;
+  }
+  
+  // If this is a #include'd file, pop it off the include stack and continue
+  // lexing the #includer file.
+  if (!IncludeStack.empty()) {
+    // We're done with the #included file.
+    delete CurLexer;
+    CurLexer         = IncludeStack.back().TheLexer;
+    CurNextDirLookup = IncludeStack.back().TheDirLookup;
+    IncludeStack.pop_back();
+    return Lex(Result);
+  }
+  
+  Result.StartToken(CurLexer);
+  Result.SetKind(tok::eof);
+  Result.SetStart(CurLexer->BufferEnd);
+  Result.SetEnd(CurLexer->BufferEnd);
+  
+  // We're done with the #included file.
+  delete CurLexer;
+  CurLexer = 0;
+  return false;
+}
+
+/// HandleEndOfMacro - This callback is invoked when the lexer hits the end of
+/// the current macro.  This either returns the EOF token or pops a level off
+/// the include stack and keeps going.
+bool Preprocessor::HandleEndOfMacro(LexerToken &Result) {
+  assert(CurMacroExpander && !CurLexer &&
+         "Ending a macro when currently in a #include file!");
+
+  // Mark macro not ignored now that it is no longer being expanded.
+  CurMacroExpander->getMacro().EnableMacro();
+  delete CurMacroExpander;
+
+  if (!MacroStack.empty()) {
+    // In a nested macro invocation, continue lexing from the macro.
+    CurMacroExpander = MacroStack.back();
+    MacroStack.pop_back();
+    return Lex(Result);
+  } else {
+    CurMacroExpander = 0;
+    // Handle this like a #include file being popped off the stack.
+    return HandleEndOfFile(Result);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utility Methods for Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
+/// current line until the tok::eom token is found.
+bool Preprocessor::DiscardUntilEndOfDirective() {
+  LexerToken Tmp;
+  do {
+    if (LexUnexpandedToken(Tmp)) return true;
+  } while (Tmp.getKind() != tok::eom);
+  return false;
+}
+
+/// ReadMacroName - Lex and validate a macro name, which occurs after a
+/// #define or #undef.  This sets the token kind to eom and discards the rest
+/// of the macro line if the macro name is invalid.
+bool Preprocessor::ReadMacroName(LexerToken &MacroNameTok) {
+  // Read the token, don't allow macro expansion on it.
+  if (LexUnexpandedToken(MacroNameTok))
+    return true;
+  
+  // Missing macro name?
+  if (MacroNameTok.getKind() == tok::eom)
+    return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
+  
+  if (MacroNameTok.getIdentifierInfo() == 0) {
+    if (Diag(MacroNameTok, diag::err_pp_macro_not_identifier))
+      return true;
+    // Fall through on error.
+  } else if (0) {
+    // FIXME: Error if defining a C++ named operator.
+    
+  } else if (0) {
+    // FIXME: Error if defining "defined", "__DATE__", and other predef macros
+    // in C99 6.10.8.4.
+  } else {
+    // Okay, we got a good identifier node.  Return it.
+    return false;
+  }
+  
+  
+  // Invalid macro name, read and discard the rest of the line.  Then set the
+  // token kind to tok::eom.
+  MacroNameTok.SetKind(tok::eom);
+  return DiscardUntilEndOfDirective();
+}
+
+/// CheckEndOfDirective - Ensure that the next token is a tok::eom token.  If
+/// not, emit a diagnostic and consume up until the eom.
+bool Preprocessor::CheckEndOfDirective(const char *DirType) {
+  LexerToken Tmp;
+  if (Lex(Tmp)) return true;
+  // There should be no tokens after the directive, but we allow them as an
+  // extension.
+  if (Tmp.getKind() != tok::eom) {
+    if (Diag(Tmp, diag::ext_pp_extra_tokens_at_eol, DirType) ||
+        DiscardUntilEndOfDirective())
+      return true;
+  }
+  return false;
+}
+
+
+
+/// SkipExcludedConditionalBlock - We just read a #if or related directive and
+/// decided that the subsequent tokens are in the #if'd out portion of the
+/// file.  Lex the rest of the file, until we see an #endif.  If
+/// FoundNonSkipPortion is true, then we have already emitted code for part of
+/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk
+/// is true, then #else directives are ok, if not, then we have already seen one
+/// so a #else directive is a duplicate.  When this returns, the caller can lex
+/// the first valid token.
+bool Preprocessor::SkipExcludedConditionalBlock(const char *IfTokenLoc,
+                                                bool FoundNonSkipPortion,
+                                                bool FoundElse) {
+  ++NumSkipped;
+  assert(MacroStack.empty() && CurMacroExpander == 0 && CurLexer &&
+         "Lexing a macro, not a file?");
+
+  CurLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false,
+                                 FoundNonSkipPortion, FoundElse);
+  
+  // Know that we are going to be skipping tokens.  Set this flag to indicate
+  // this, which has a couple of effects:
+  //  1. If EOF of the current lexer is found, the include stack isn't popped.
+  //  2. Identifier information is not looked up for identifier tokens.  As an
+  //     effect of this, implicit macro expansion is naturally disabled.
+  //  3. "#" tokens at the start of a line are treated as normal tokens, not
+  //     implicitly transformed by the lexer.
+  //  4. All notes, warnings, and extension messages are disabled.
+  //
+  SkippingContents = true;
+  LexerToken Tok;
+  while (1) {
+    if (CurLexer->Lex(Tok)) return true;
+    
+    // If this is the end of the buffer, we have an error.  The lexer will have
+    // already handled this error condition, so just return and let the caller
+    // lex after this #include.
+    if (Tok.getKind() == tok::eof) break;
+    
+    // If this token is not a preprocessor directive, just skip it.
+    if (Tok.getKind() != tok::hash || !Tok.isAtStartOfLine())
+      continue;
+      
+    // We just parsed a # character at the start of a line, so we're in
+    // directive mode.  Tell the lexer this so any newlines we see will be
+    // converted into an EOM token (this terminates the macro).
+    CurLexer->ParsingPreprocessorDirective = true;
+    
+    // Read the next token, the directive flavor.
+    if (LexUnexpandedToken(Tok)) return true;
+    
+    // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
+    // something bogus), skip it.
+    if (Tok.getKind() != tok::identifier) {
+      CurLexer->ParsingPreprocessorDirective = false;
+      continue;
+    }
+    
+    // If the first letter isn't i or e, it isn't intesting to us.  We know that
+    // this is safe in the face of spelling differences, because there is no way
+    // to spell an i/e in a strange way that is another letter.  Skipping this
+    // allows us to avoid computing the spelling for #define/#undef and other
+    // common directives.
+    char FirstChar = Tok.getStart()[0];
+    if (FirstChar >= 'a' && FirstChar <= 'z' && 
+        FirstChar != 'i' && FirstChar != 'e') {
+      CurLexer->ParsingPreprocessorDirective = false;
+      continue;
+    }
+    
+    // Strip out trigraphs and embedded newlines.
+    std::string Directive = Lexer::getSpelling(Tok, Features);
+    FirstChar = Directive[0];
+    if (FirstChar == 'i' && Directive[1] == 'f') {
+      if (Directive == "if" || Directive == "ifdef" || Directive == "ifndef") {
+        // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
+        // bother parsing the condition.
+        if (DiscardUntilEndOfDirective()) return true;
+        CurLexer->pushConditionalLevel(Tok.getStart(), /*wasskipping*/true,
+                                       /*foundnonskip*/false,/*fnddelse*/false);
+      }
+    } else if (FirstChar == 'e') {
+      if (Directive == "endif") {
+        if (CheckEndOfDirective("#endif")) return true;
+        PPConditionalInfo CondInfo;
+        CondInfo.WasSkipping = true; // Silence bogus warning.
+        bool InCond = CurLexer->popConditionalLevel(CondInfo);
+        assert(!InCond && "Can't be skipping if not in a conditional!");
+        
+        // If we popped the outermost skipping block, we're done skipping!
+        if (!CondInfo.WasSkipping)
+          break;
+      } else if (Directive == "else") {
+        // #else directive in a skipping conditional.  If not in some other
+        // skipping conditional, and if #else hasn't already been seen, enter it
+        // as a non-skipping conditional.
+        if (CheckEndOfDirective("#else")) return true;
+        PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
+        
+        // If this is a #else with a #else before it, report the error.
+        if (CondInfo.FoundElse && Diag(Tok, diag::pp_err_else_after_else))
+          return true;
+        
+        // Note that we've seen a #else in this conditional.
+        CondInfo.FoundElse = true;
+        
+        // If the conditional is at the top level, and the #if block wasn't
+        // entered, enter the #else block now.
+        if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
+          CondInfo.FoundNonSkip = true;
+          break;
+        }
+      } else if (Directive == "elif") {
+        PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
+
+        bool ShouldEnter;
+        // If this is in a skipping block or if we're already handled this #if
+        // block, don't bother parsing the condition.
+        if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
+          if (DiscardUntilEndOfDirective()) return true;
+          ShouldEnter = false;
+        } else {
+          // Evaluate the #elif condition!
+          const char *Start = CurLexer->BufferPtr;
+
+          // Restore the value of SkippingContents so that identifiers are
+          // looked up, etc, inside the #elif expression.
+          assert(SkippingContents && "We have to be skipping here!");
+          SkippingContents = false;
+          if (EvaluateDirectiveExpression(ShouldEnter))
+            return true;
+          SkippingContents = true;
+        }
+        
+        // If this is a #elif with a #else before it, report the error.
+        if (CondInfo.FoundElse && Diag(Tok, diag::pp_err_elif_after_else))
+          return true;
+        
+        // If this condition is true, enter it!
+        if (ShouldEnter) {
+          CondInfo.FoundNonSkip = true;
+          break;
+        }
+      }
+    }
+    
+    CurLexer->ParsingPreprocessorDirective = false;
+  }
+
+  // Finally, if we are out of the conditional (saw an #endif or ran off the end
+  // of the file, just stop skipping and return to lexing whatever came after
+  // the #if block.
+  SkippingContents = false;
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleDirective - This callback is invoked when the lexer sees a # token
+/// at the start of a line.  This consumes the directive, modifies the 
+/// lexer/preprocessor state, and advances the lexer(s) so that the next token
+/// read is the correct one.
+bool Preprocessor::HandleDirective(LexerToken &Result) {
+  // FIXME: TRADITIONAL: # with whitespace before it not recognized by K&R?
+  
+  // We just parsed a # character at the start of a line, so we're in directive
+  // mode.  Tell the lexer this so any newlines we see will be converted into an
+  // EOM token (this terminates the macro).
+  CurLexer->ParsingPreprocessorDirective = true;
+  
+  ++NumDirectives;
+  
+  // Read the next token, the directive flavor.
+  if (LexUnexpandedToken(Result))
+    return true;   // Bail out.
+  
+  switch (Result.getKind()) {
+  default: break;
+  case tok::eom:
+    return false;   // null directive.
+
+#if 0
+  case tok::numeric_constant:
+    // FIXME: implement # 7 line numbers!
+    break;
+#endif
+  case tok::kw_else:
+    return HandleElseDirective(Result);
+  case tok::kw_if:
+    return HandleIfDirective(Result);
+  case tok::identifier:
+    // Strip out trigraphs and embedded newlines.
+    std::string Directive = Lexer::getSpelling(Result, Features);
+    bool isExtension = false;
+    switch (Directive.size()) {
+    case 4:
+      if (Directive == "line")
+        ;
+      if (Directive == "elif")
+        return HandleElifDirective(Result);
+      if (Directive == "sccs") {
+        isExtension = true;
+        // SCCS is the same as #ident.
+      }
+      break;
+    case 5:
+      if (Directive == "endif")
+        return HandleEndifDirective(Result);
+      if (Directive == "ifdef")
+        return HandleIfdefDirective(Result, false);
+      if (Directive == "undef")
+        return HandleUndefDirective(Result);
+      if (Directive == "error")
+        return HandleUserDiagnosticDirective(Result, false);
+      if (Directive == "ident")
+        isExtension = true;
+      break;
+    case 6:
+      if (Directive == "define")
+        return HandleDefineDirective(Result);
+      if (Directive == "ifndef")
+        return HandleIfdefDirective(Result, true);
+      if (Directive == "import")
+        return HandleImportDirective(Result);
+      if (Directive == "pragma") {
+        // FIXME: implement #pragma
+        ++NumPragma;
+#if 1
+        // Read the rest of the PP line.
+        do {
+          if (Lex(Result)) return true;
+        } while (Result.getKind() != tok::eom);
+        
+        return false;
+#endif
+      } else if (Directive == "assert") {
+        isExtension = true;
+      }
+      break;
+    case 7:
+      if (Directive == "include")  // Handle #include.
+        return HandleIncludeDirective(Result);
+      if (Directive == "warning")
+        return Diag(Result, diag::ext_pp_warning_directive) ||
+               HandleUserDiagnosticDirective(Result, true);
+      break;
+    case 8:
+      if (Directive == "unassert") {
+        isExtension = true;
+      }
+      break;
+    case 12:
+      if (Directive == "include_next") // Handle #include_next.
+        return HandleIncludeNextDirective(Result);
+      break;
+    }
+    break;
+  }
+  
+  // If we reached here, the preprocessing token is not valid!
+  if (Diag(Result, diag::err_pp_invalid_directive))
+    return true;
+  
+  // Read the rest of the PP line.
+  do {
+    if (Lex(Result)) return true;
+  } while (Result.getKind() != tok::eom);
+  
+  // Okay, we're done parsing the directive.
+  return false;
+}
+
+bool Preprocessor::HandleUserDiagnosticDirective(LexerToken &Result, 
+                                                 bool isWarning) {
+  // Read the rest of the line raw.  We do this because we don't want macros
+  // to be expanded and we don't require that the tokens be valid preprocessing
+  // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
+  // collapse multiple consequtive white space between tokens, but this isn't
+  // specified by the standard.
+  std::string Message = CurLexer->ReadToEndOfLine();
+
+  unsigned DiagID = isWarning ? diag::pp_hash_warning : diag::err_pp_hash_error;
+  return Diag(Result, DiagID, Message);
+}
+
+/// HandleIncludeDirective - The "#include" tokens have just been read, read the
+/// file to be included from the lexer, then include it!  This is a common
+/// routine with functionality shared between #include, #include_next and
+/// #import.
+bool Preprocessor::HandleIncludeDirective(LexerToken &IncludeTok,
+                                          const DirectoryLookup *LookupFrom,
+                                          bool isImport) {
+  ++NumIncluded;
+  LexerToken FilenameTok;
+  if (CurLexer->LexIncludeFilename(FilenameTok))
+    return true;
+  
+  // If the token kind is EOM, the error has already been diagnosed.
+  if (FilenameTok.getKind() == tok::eom)
+    return false;
+
+  // Check that we don't have infinite #include recursion.
+  if (IncludeStack.size() == MaxAllowedIncludeStackDepth-1)
+    return Diag(FilenameTok, diag::err_pp_include_too_deep);
+  
+  // Get the text form of the filename.
+  std::string Filename = CurLexer->getSpelling(FilenameTok);
+  assert(!Filename.empty() && "Can't have tokens with empty spellings!");
+  
+  // Make sure the filename is <x> or "x".
+  bool isAngled;
+  if (Filename[0] == '<') {
+    isAngled = true;
+    if (Filename[Filename.size()-1] != '>')
+      return Diag(FilenameTok, diag::err_pp_expects_filename);
+  } else if (Filename[0] == '"') {
+    isAngled = false;
+    if (Filename[Filename.size()-1] != '"')
+      return Diag(FilenameTok, diag::err_pp_expects_filename);
+  } else {
+    return Diag(FilenameTok, diag::err_pp_expects_filename);
+  }
+  
+  // Remove the quotes.
+  Filename = std::string(Filename.begin()+1, Filename.end()-1);
+  
+  // Diagnose #include "" as invalid.
+  if (Filename.empty())
+    return Diag(FilenameTok, diag::err_pp_empty_filename);
+  
+  // Search include directories.
+  const DirectoryLookup *NextDir;
+  const FileEntry *File = LookupFile(Filename, isAngled, LookupFrom, NextDir);
+  if (File == 0)
+    return Diag(FilenameTok, diag::err_pp_file_not_found);
+  
+  // Get information about this file.
+  PerFileInfo &FileInfo = getFileInfo(File);
+  
+  // If this is a #import directive, check that we have not already imported
+  // this header.
+  if (isImport) {
+    // If this has already been imported, don't import it again.
+    FileInfo.isImport = true;
+    
+    // Has this already been #import'ed or #include'd?
+    if (FileInfo.NumIncludes) return false;
+  } else {
+    // Otherwise, if this is a #include of a file that was previously #import'd
+    // or if this is the second #include of a #pragma once file, ignore it.
+    if (FileInfo.isImport)
+      return false;
+  }
+
+  // Look up the file, create a File ID for it.
+  unsigned FileID = 
+    SourceMgr.createFileID(File, FilenameTok.getSourceLocation());
+  if (FileID == 0)
+    return Diag(FilenameTok, diag::err_pp_file_not_found);
+
+  // Finally, if all is good, enter the new file!
+  EnterSourceFile(FileID, NextDir);
+
+  // Increment the number of times this file has been included.
+  ++FileInfo.NumIncludes;
+  
+  return false;
+}
+
+/// HandleIncludeNextDirective - Implements #include_next.
+///
+bool Preprocessor::HandleIncludeNextDirective(LexerToken &IncludeNextTok) {
+  if (Diag(IncludeNextTok, diag::ext_pp_include_next_directive))
+    return true;
+  
+  // #include_next is like #include, except that we start searching after
+  // the current found directory.  If we can't do this, issue a
+  // diagnostic.
+  const DirectoryLookup *Lookup = CurNextDirLookup;
+  if (IncludeStack.empty()) {
+    Lookup = 0;
+    if (Diag(IncludeNextTok, diag::pp_include_next_in_primary))
+      return true;
+  } else if (Lookup == 0) {
+    if (Diag(IncludeNextTok, diag::pp_include_next_absolute_path))
+      return true;
+  }
+  
+  return HandleIncludeDirective(IncludeNextTok, Lookup);
+}
+
+/// HandleImportDirective - Implements #import.
+///
+bool Preprocessor::HandleImportDirective(LexerToken &ImportTok) {
+  if (Diag(ImportTok, diag::ext_pp_import_directive)) return true;
+  
+  return HandleIncludeDirective(ImportTok, 0, true);
+}
+
+/// HandleDefineDirective - Implements #define.  This consumes the entire macro
+/// line then lets the caller lex the next real token.
+///
+bool Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
+  ++NumDefined;
+  LexerToken MacroNameTok;
+  if (ReadMacroName(MacroNameTok))
+    return true;
+  
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.getKind() == tok::eom)
+    return false;
+  
+  MacroInfo *MI = new MacroInfo(MacroNameTok.getSourceLocation());
+  
+  LexerToken Tok;
+  if (LexUnexpandedToken(Tok)) return true;
+  
+  if (Tok.getKind() == tok::eom) {
+    // If there is no body to this macro, we have no special handling here.
+  } else if (Tok.getKind() == tok::l_paren && !Tok.hasLeadingSpace()) {
+    // This is a function-like macro definition.
+    //assert(0 && "Function-like macros not implemented!");
+#warning function like macros
+    return DiscardUntilEndOfDirective();
+
+  } else if (!Tok.hasLeadingSpace()) {
+    // C99 requires whitespace between the macro definition and the body.  Emit
+    // a diagnostic for something like "#define X+".
+    if (Features.C99) {
+      if (Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name))
+        return true;
+    } else {
+      // FIXME: C90/C++ do not get this diagnostic, but it does get a similar
+      // one in some cases!
+    }
+  } else {
+    // This is a normal token with leading space.  Clear the leading space
+    // marker on the first token to get proper expansion.
+    Tok.ClearFlag(LexerToken::LeadingSpace);
+  }
+  
+  // Read the rest of the macro body.
+  while (Tok.getKind() != tok::eom) {
+    MI->AddTokenToBody(Tok);
+    
+    // FIXME: See create_iso_definition.
+    
+    // Get the next token of the macro.
+    if (LexUnexpandedToken(Tok)) return true;
+  }
+  
+  // Finally, if this identifier already had a macro defined for it, verify that
+  // the macro bodies are identical and free the old definition.
+  if (MacroInfo *OtherMI = MacroNameTok.getIdentifierInfo()->getMacroInfo()) {
+    // FIXME: Verify the definition is the same.
+    // Macros must be identical.  This means all tokes and whitespace separation
+    // must be the same.
+    delete OtherMI;
+  }
+  
+  MacroNameTok.getIdentifierInfo()->setMacroInfo(MI);
+  return false;
+}
+
+
+/// HandleUndefDirective - Implements #undef.
+///
+bool Preprocessor::HandleUndefDirective(LexerToken &UndefTok) {
+  ++NumUndefined;
+  LexerToken MacroNameTok;
+  if (ReadMacroName(MacroNameTok))
+    return true;
+  
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.getKind() == tok::eom)
+    return false;
+  
+  // Check to see if this is the last token on the #undef line.
+  if (CheckEndOfDirective("#undef")) return true;
+  
+  // Okay, we finally have a valid identifier to undef.
+  MacroInfo *MI = MacroNameTok.getIdentifierInfo()->getMacroInfo();
+  
+  // If the macro is not defined, this is a noop undef, just return.
+  if (MI == 0) return false;
+  
+#if 0 // FIXME: implement warn_unused_macros.
+  if (CPP_OPTION (pfile, warn_unused_macros))
+    _cpp_warn_if_unused_macro (pfile, node, NULL);
+#endif
+  
+  // Free macro definition.
+  delete MI;
+  MacroNameTok.getIdentifierInfo()->setMacroInfo(0);
+  return false;
+}
+
+
+/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive.  isIfndef is
+/// true when this is a #ifndef directive.
+///
+bool Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
+  ++NumIf;
+  LexerToken DirectiveTok = Result;
+  
+  LexerToken MacroNameTok;
+  if (ReadMacroName(MacroNameTok))
+    return true;
+  
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.getKind() == tok::eom)
+    return false;
+  
+  // Check to see if this is the last token on the #if[n]def line.
+  if (CheckEndOfDirective("#ifdef")) return true;
+  
+  // Should we include the stuff contained by this directive?
+  if (!MacroNameTok.getIdentifierInfo()->getMacroInfo() == isIfndef) {
+    // Yes, remember that we are inside a conditional, then lex the next token.
+    CurLexer->pushConditionalLevel(DirectiveTok.getStart(), /*wasskip*/false,
+                                   /*foundnonskip*/true, /*foundelse*/false);
+    return false;
+  } else {
+    // No, skip the contents of this block and return the first token after it.
+    return SkipExcludedConditionalBlock(DirectiveTok.getStart(),
+                                        /*Foundnonskip*/false, 
+                                        /*FoundElse*/false);
+  }
+}
+
+/// HandleIfDirective - Implements the #if directive.
+///
+bool Preprocessor::HandleIfDirective(LexerToken &IfToken) {
+  ++NumIf;
+  const char *Start = CurLexer->BufferPtr;
+
+  bool ConditionalTrue = false;
+  if (EvaluateDirectiveExpression(ConditionalTrue))
+    return true;
+  
+  // Should we include the stuff contained by this directive?
+  if (ConditionalTrue) {
+    // Yes, remember that we are inside a conditional, then lex the next token.
+    CurLexer->pushConditionalLevel(IfToken.getStart(), /*wasskip*/false,
+                                   /*foundnonskip*/true, /*foundelse*/false);
+    return false;
+  } else {
+    // No, skip the contents of this block and return the first token after it.
+    return SkipExcludedConditionalBlock(IfToken.getStart(),
+                                        /*Foundnonskip*/false, 
+                                        /*FoundElse*/false);
+  }
+}
+
+/// HandleEndifDirective - Implements the #endif directive.
+///
+bool Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
+  ++NumEndif;
+  // Check that this is the whole directive.
+  if (CheckEndOfDirective("#endif")) return true;
+  
+  PPConditionalInfo CondInfo;
+  if (CurLexer->popConditionalLevel(CondInfo)) {
+    // No conditionals on the stack: this is an #endif without an #if.
+    return Diag(EndifToken, diag::err_pp_endif_without_if);
+  }
+  
+  assert(!CondInfo.WasSkipping && !isSkipping() &&
+         "This code should only be reachable in the non-skipping case!");
+  return false;
+}
+
+
+bool Preprocessor::HandleElseDirective(LexerToken &Result) {
+  ++NumElse;
+  // #else directive in a non-skipping conditional... start skipping.
+  if (CheckEndOfDirective("#else")) return true;
+  
+  PPConditionalInfo CI;
+  if (CurLexer->popConditionalLevel(CI))
+    return Diag(Result, diag::pp_err_else_without_if);
+
+  // If this is a #else with a #else before it, report the error.
+  if (CI.FoundElse && Diag(Result, diag::pp_err_else_after_else))
+    return true;
+  
+  // Finally, skip the rest of the contents of this block and return the first
+  // token after it.
+  return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+                                      /*FoundElse*/true);
+}
+
+bool Preprocessor::HandleElifDirective(LexerToken &ElifToken) {
+  ++NumElse;
+  // #elif directive in a non-skipping conditional... start skipping.
+  // We don't care what the condition is, because we will always skip it (since
+  // the block immediately before it was included).
+  if (DiscardUntilEndOfDirective()) return true;
+
+  PPConditionalInfo CI;
+  if (CurLexer->popConditionalLevel(CI))
+    return Diag(ElifToken, diag::pp_err_elif_without_if);
+  
+  // If this is a #elif with a #else before it, report the error.
+  if (CI.FoundElse && Diag(ElifToken, diag::pp_err_elif_after_else))
+    return true;
+
+  // Finally, skip the rest of the contents of this block and return the first
+  // token after it.
+  return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+                                      /*FoundElse*/CI.FoundElse);
+}

Propchange: cfe/cfe/trunk/Lex/Preprocessor.cpp

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Lex/Preprocessor.cpp

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/Makefile
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Makefile?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/Makefile (added)
+++ cfe/cfe/trunk/Makefile Wed Jul 11 11:22:17 2007
@@ -0,0 +1,9 @@
+LEVEL = ../..
+PARALLEL_DIRS := Basic Lex
+CPPFLAGS += -I$(LEVEL)/tools/clang/include
+
+TOOLNAME = clang
+
+USEDLIBS = clangLex.a clangBasic.a LLVMSupport.a LLVMSystem.a
+
+include $(LEVEL)/Makefile.common

Propchange: cfe/cfe/trunk/Makefile

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/Makefile

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/README.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/README.txt?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/README.txt (added)
+++ cfe/cfe/trunk/README.txt Wed Jul 11 11:22:17 2007
@@ -0,0 +1,60 @@
+//===----------------------------------------------------------------------===//
+// C Language Family Front-end
+//===----------------------------------------------------------------------===//
+
+I. Introduction:
+ 
+ clang: noun
+    1. A loud, resonant, metallic sound.
+    2. The strident call of a crane or goose.
+    3. C-language front-end toolkit.
+    
+ Why?
+ Supports Objective-C.
+
+
+II. Current advantages over GCC:
+
+ * Full column number support in diagnostics.
+ * Caret diagnostics.
+ * Full diagnostic customization by client (can format diagnostics however they
+   like, e.g. in an IDE or refactoring tool).
+ * Built as a framework, can be reused by multiple tools.
+ * All languages supported linked into same library (no cc1,cc1obj, ...).
+ * mmap's code in read-only, does not dirty the pages like GCC (mem footprint).
+ * BSD License, can be linked into non-GPL projects.
+ 
+Future Features:
+ * Full diagnostic control, per diagnostic (use enums).
+ * Fine grained control within the source (#pragma enable/disable warning)
+ * Faster than GCC, preprocessing, parsing, IR generation.
+ * Better token tracking within macros?  (Token came from this line, which is
+   a macro argument instantiated here, recursively instantiated here).
+ * Fast #import!!
+
+
+III. Critical Missing Functionality
+
+Lexer:
+ * Source character mapping.  GCC supports ASCII and UTF-8.
+   See GCC options: -ftarget-charset and -ftarget-wide-charset.
+ * Universal character support.  Experimental in GCC, enabled with
+   -fextended-identifiers.
+ * Poisoned identifiers.
+ * -fpreprocessed mode.
+
+Preprocessor:
+ * #line / #file directives
+ * Detection of "atomic" headers (#ifndef/#define), #pragma once support.
+ * Function-style #define & macro expansion
+ * -E & -C & -P output.
+
+Traditional Preprocessor:
+ * All.
+    
+Parser Callbacks:
+ * All.
+ 
+Parser Actions:
+ * All.
+ 
\ No newline at end of file

Propchange: cfe/cfe/trunk/README.txt

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/README.txt

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/Diagnostic.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/Diagnostic.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/Diagnostic.h (added)
+++ cfe/cfe/trunk/include/clang/Basic/Diagnostic.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,117 @@
+//===--- Diagnostic.h - C Language Family Diagnostic Handling ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the Diagnostic-related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_DIAGNOSTIC_H
+#define LLVM_CLANG_DIAGNOSTIC_H
+
+#include <string>
+
+namespace llvm {
+namespace clang {
+  class DiagnosticClient;
+  class SourceBuffer;
+  class SourceLocation;
+  
+  // Import the diagnostic enums themselves.
+  namespace diag {
+    enum kind {
+#define DIAG(ENUM,FLAGS,DESC) ENUM,
+#include "DiagnosticKinds.def"
+      NUM_DIAGNOSTICS
+    };
+  }
+  
+/// Diagnostic - This concrete class is used by the front-end to report
+/// problems and issues.  It massages the diagnostics (e.g. handling things like
+/// "report warnings as errors" and passes them off to the DiagnosticClient for
+/// reporting to the user.
+class Diagnostic {
+  bool WarningsAsErrors;      // Treat warnings like errors: 
+  bool WarnOnExtensions;      // Enables warnings for gcc extensions: -pedantic.
+  bool ErrorOnExtensions;     // Error on extensions: -pedantic-errors.
+  DiagnosticClient &Client;
+public:
+  Diagnostic(DiagnosticClient &client) : Client(client) {
+    WarningsAsErrors = false;
+    WarnOnExtensions = false;
+    ErrorOnExtensions = false;
+  }
+  
+  //===--------------------------------------------------------------------===//
+  //  Diagnostic characterization methods, used by a client to customize how
+  //
+
+  /// setWarningsAsErrors - When set to true, any warnings reported are issued
+  /// as errors.
+  void setWarningsAsErrors(bool Val) { WarningsAsErrors = Val; }
+  bool getWarningsAsErrors() const { return WarningsAsErrors; }
+  
+  /// setWarnOnExtensions - When set to true, issue warnings on GCC extensions,
+  /// the equivalent of GCC's -pedantic.
+  void setWarnOnExtensions(bool Val) { WarnOnExtensions = Val; }
+  bool getWarnOnExtensions() const { return WarnOnExtensions; }
+  
+  /// setErrorOnExtensions - When set to true issue errors for GCC extensions
+  /// instead of warnings.  This is the equivalent to GCC's -pedantic-errors.
+  void setErrorOnExtensions(bool Val) { ErrorOnExtensions = Val; }
+  bool getErrorOnExtensions() const { return ErrorOnExtensions; }
+
+  
+  //===--------------------------------------------------------------------===//
+  // Diagnostic classification and reporting interfaces.
+  //
+
+  /// getDescription - Given a diagnostic ID, return a description of the
+  /// issue.
+  static const char *getDescription(unsigned DiagID);
+  
+  /// Level - The level of the diagnostic 
+  enum Level {
+    // FIXME: Anachronism?
+    Ignored, Note, Warning, Error, Fatal, Sorry
+  };
+  
+  /// isNoteWarningOrExtension - Return true if the unmapped diagnostic level of
+  /// the specified diagnostic ID is a Note, Warning, or Extension.
+  static bool isNoteWarningOrExtension(unsigned DiagID);
+
+  /// getDiagnosticLevel - Based on the way the client configured the Diagnostic
+  /// object, classify the specified diagnostic ID into a Level, consumable by
+  /// the DiagnosticClient.
+  Level getDiagnosticLevel(unsigned DiagID) const;
+  
+  /// Report - Issue the message to the client. If the client wants us to stop
+  /// compilation, return true, otherwise return false.  DiagID is a member of
+  /// the diag::kind enum.  
+  bool Report(SourceLocation Pos, unsigned DiagID,
+              const std::string &Extra = "");
+};
+
+/// DiagnosticClient - This is an abstract interface implemented by clients of
+/// the front-end, which formats and prints fully processed diagnostics.
+class DiagnosticClient {
+public:
+  
+  virtual ~DiagnosticClient();
+  
+  /// HandleDiagnostic - Handle this diagnostic, reporting it to the user or 
+  /// capturing it to a log as needed.  If this returns true, compilation will
+  /// be gracefully terminated, otherwise compilation will continue.
+  virtual bool HandleDiagnostic(Diagnostic::Level DiagLevel, SourceLocation Pos,
+                                diag::kind ID, const std::string &Msg) = 0;
+};
+
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Basic/Diagnostic.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/Diagnostic.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def (added)
+++ cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def Wed Jul 11 11:22:17 2007
@@ -0,0 +1,156 @@
+//===-- DiagnosticKinds.def - C Family Diagnostic Kind Database -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the DiagnosticKind database.
+//
+//===----------------------------------------------------------------------===//
+
+// Flags for diagnostic:
+//
+//   DIAG_TYPE - Allows one of:
+//     NOTE      - Informational message.
+//     WARNING   - Warning.
+//     EXTENSION - Notification that an extension to the language is being used.
+//     ERROR     - Error, compilation will stop after parsing completes.
+//     FATAL     - Fatal error: parsing must stop.
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Diagnostics
+//===----------------------------------------------------------------------===//
+
+DIAG(null_in_string, WARNING,
+     "null character(s) preserved in string literal")
+DIAG(null_in_char  , WARNING,
+     "null character(s) preserved in character literal")
+DIAG(null_in_file  , WARNING,
+     "null character ignored")
+DIAG(nested_block_comment, WARNING,
+     "\"/*\" within block comment")
+DIAG(escaped_newline_block_comment_end, WARNING,
+     "escaped newline between */ characters at block comment end")
+DIAG(min_max_deprecated, WARNING,
+     "minimum/maximum operators are deprecated")
+DIAG(backslash_newline_space, WARNING,
+     "backslash and newline separated by space")
+
+// Trigraphs.
+DIAG(trigraph_ignored, WARNING, "trigraph ignored")
+DIAG(trigraph_ignored_block_comment, WARNING,
+     "ignored trigraph would end block comment")
+DIAG(trigraph_ends_block_comment, WARNING,
+     "trigraph ends block comment")
+DIAG(trigraph_converted, WARNING,
+     "trigraph converted to '%s' character")
+
+DIAG(ext_multi_line_bcpl_comment, EXTENSION,
+     "multi-line // comment")
+DIAG(ext_bcpl_comment, EXTENSION,
+     "// comments are not allowed in this language")
+DIAG(ext_no_newline_eof, EXTENSION,
+     "no newline at end of file")
+DIAG(ext_backslash_newline_eof, EXTENSION,
+     "backslash-newline at end of file")
+DIAG(ext_dollar_in_identifier, EXTENSION,
+     "'$' in identifier")
+
+DIAG(ext_token_used, EXTENSION,
+     "Extension used")
+
+DIAG(err_unterminated_string, ERROR,
+     "missing terminating \" character")
+DIAG(err_unterminated_char, ERROR,
+     "missing terminating ' character")
+DIAG(err_empty_character, ERROR,
+     "empty character constant")
+DIAG(err_unterminated_block_comment, ERROR,
+     "unterminated /* comment")
+DIAG(err_stray_character, ERROR,
+     "stray character in program")
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Diagnostics
+//===----------------------------------------------------------------------===//
+
+DIAG(pp_hash_warning, WARNING,
+     "#warning%s")
+DIAG(pp_include_next_in_primary, WARNING,
+     "#include_next in primary source file")
+DIAG(pp_include_next_absolute_path, WARNING,
+     "#include_next with absolute path")
+DIAG(ext_c99_whitespace_required_after_macro_name, WARNING,
+     "ISO C99 requires whitespace after the macro name")
+
+DIAG(ext_pp_import_directive, EXTENSION,
+     "#import is a language extension")
+DIAG(ext_pp_include_next_directive, EXTENSION,
+     "#include_next is a language extension")
+DIAG(ext_pp_warning_directive, EXTENSION,
+     "#warning is a language extension")
+DIAG(ext_pp_extra_tokens_at_eol, EXTENSION,
+     "extra tokens at end of %s directive")
+DIAG(ext_pp_comma_expr, EXTENSION,
+     "comma operator in operand of #if")
+
+DIAG(err_pp_invalid_directive, ERROR,
+     "invalid preprocessing directive")
+DIAG(err_pp_hash_error, ERROR,
+     "#error%s")
+DIAG(err_pp_file_not_found, ERROR,
+     "file not found")
+DIAG(err_pp_empty_filename, ERROR,
+     "empty filename")
+DIAG(err_pp_include_too_deep, ERROR,
+     "#include nested too deeply")
+DIAG(err_pp_expects_filename, ERROR,
+     "expected \"FILENAME\" or <FILENAME>")
+DIAG(err_pp_macro_not_identifier, ERROR,
+     "macro names must be identifiers")
+DIAG(err_pp_missing_macro_name, ERROR,
+     "macro name missing")
+DIAG(err_pp_unterminated_conditional, ERROR,
+     "unterminated conditional directive")
+DIAG(pp_err_else_after_else, ERROR,
+     "#else after #else")
+DIAG(pp_err_elif_after_else, ERROR,
+     "#elif after #else")
+DIAG(pp_err_else_without_if, ERROR,
+     "#else without #if")
+DIAG(pp_err_elif_without_if, ERROR,
+     "#elif without #if")
+DIAG(err_pp_endif_without_if, ERROR,
+     "#endif without #if")
+DIAG(err_pp_expected_value_in_expr, ERROR,
+     "expected value in expression")
+DIAG(err_pp_missing_val_before_operator, ERROR,
+     "missing value before operator")
+DIAG(err_pp_expected_rparen, ERROR,
+     "expected ')' in preprocessor expression")
+DIAG(err_pp_expected_eol, ERROR,
+     "expected end of line in preprocessor expression")
+DIAG(err_pp_defined_requires_identifier, ERROR,
+     "operator \"defined\" requires an identifier")
+DIAG(err_pp_missing_rparen, ERROR,
+     "missing ')' after \"defined\"")
+DIAG(err_pp_colon_without_question, ERROR,
+     "':' without preceding '?'")
+DIAG(err_pp_question_without_colon, ERROR,
+     "'?' without following ':'")
+DIAG(err_pp_division_by_zero, ERROR,
+     "division by zero in preprocessor expression")
+DIAG(err_pp_remainder_by_zero, ERROR,
+     "remainder by zero in preprocessor expression")
+
+DIAG(err_pp_expr_bad_token, ERROR,
+     "token is not valid in preprocessor expressions")
+
+// Should be a sorry?
+DIAG(err_pp_I_dash_not_supported, ERROR,
+     "-I- not supported, please use -iquote instead")
+#undef DIAG

Propchange: cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/FileManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/FileManager.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/FileManager.h (added)
+++ cfe/cfe/trunk/include/clang/Basic/FileManager.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,103 @@
+//===--- FileManager.h - File System Probing and Caching --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the FileManager interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FILEMANAGER_H
+#define LLVM_CLANG_FILEMANAGER_H
+
+#include <map>
+#include <string>
+// FIXME: Enhance libsystem to support inode and other fields in stat.
+#include <sys/types.h>
+
+namespace llvm {
+namespace clang {
+class FileManager;
+  
+/// DirectoryEntry - Cached information about one directory on the disk.
+///
+class DirectoryEntry {
+  std::string Name;   // Name of the directory.
+  DirectoryEntry() {}
+  friend class FileManager;
+public:
+  const std::string &getName() const { return Name; }
+};
+
+/// FileEntry - Cached information about one file on the disk.
+///
+class FileEntry {
+  std::string Name;           // Name of the directory.
+  off_t Size;                 // File size in bytes.
+  const DirectoryEntry *Dir;  // Directory file lives in.
+  unsigned UID;               // A unique (small) ID for the file.
+  FileEntry() {}
+  friend class FileManager;
+public:
+  
+  const std::string &getName() const { return Name; }
+  off_t getSize() const { return Size; }
+  unsigned getUID() const { return UID; }
+  
+  /// getDir - Return the directory the file lives in.
+  ///
+  const DirectoryEntry *getDir() const { return Dir; }
+};
+
+ 
+/// FileManager - Implements support for file system lookup, file system
+/// caching, and directory search management.  This also handles more advanced
+/// properties, such as uniquing files based on "inode", so that a file with two
+/// names (e.g. symlinked) will be treated as a single file.
+///
+class FileManager {
+  /// DirEntries/FileEntries - This is a cache of directory/file entries we have
+  /// looked up.
+  ///
+  std::map<std::string, DirectoryEntry*> DirEntries;
+  std::map<std::string, FileEntry*> FileEntries;
+  
+  /// UniqueDirs/UniqueFiles - Cache from ID's to existing directories/files.
+  ///
+  std::map<std::pair<dev_t, ino_t>, DirectoryEntry*> UniqueDirs;
+  std::map<std::pair<dev_t, ino_t>, FileEntry*> UniqueFiles;
+  
+  /// NextFileUID - Each FileEntry we create is assigned a unique ID #.
+  ///
+  unsigned NextFileUID;
+  
+  // Statistics.
+  unsigned NumDirLookups, NumFileLookups;
+  unsigned NumDirCacheMisses, NumFileCacheMisses;
+public:
+  FileManager() : NextFileUID(0) {
+    NumDirLookups = NumFileLookups = 0;
+    NumDirCacheMisses = NumFileCacheMisses = 0;
+  }
+
+  /// getDirectory - Lookup, cache, and verify the specified directory.  This
+  /// returns null if the directory doesn't exist.
+  /// 
+  const DirectoryEntry *getDirectory(const std::string &Filename);
+  
+  /// getFile - Lookup, cache, and verify the specified file.  This returns null
+  /// if the file doesn't exist.
+  /// 
+  const FileEntry *getFile(const std::string &Filename);
+  
+  void PrintStats() const;
+};
+
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Basic/FileManager.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/FileManager.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/SourceBuffer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/SourceBuffer.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/SourceBuffer.h (added)
+++ cfe/cfe/trunk/include/clang/Basic/SourceBuffer.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,68 @@
+//===--- SourceBuffer.h - C Language Family Source Buffer -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the SourceBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SOURCEBUFFER_H
+#define LLVM_CLANG_SOURCEBUFFER_H
+
+namespace llvm {
+namespace sys { class Path; }
+namespace clang {
+  class FileEntry;
+
+/// SourceFile - This interface provides simple read-only access to the raw bits
+/// in a source file in a memory efficient way.  In addition to basic access to
+/// the characters in the file, this interface guarantees you can read one
+/// character past the end of the file, and that this character will read as
+/// '\0'.
+class SourceBuffer {
+  const char *BufferStart; // Start of the buffer.
+  const char *BufferEnd;   // End of the buffer.
+
+  /// MustDeleteBuffer - True if we allocated this buffer.  If so, the
+  /// destructor must know the delete[] it.
+  bool MustDeleteBuffer;
+protected:
+  SourceBuffer() : MustDeleteBuffer(false) {}
+  void init(const char *BufStart, const char *BufEnd);
+  void initCopyOf(const char *BufStart, const char *BufEnd);
+public:
+  virtual ~SourceBuffer();
+  
+  const char *getBufferStart() const { return BufferStart; }
+  const char *getBufferEnd() const   { return BufferEnd; }
+  unsigned getBufferSize() const { return BufferEnd-BufferStart; }
+  
+  /// getBufferIdentifier - Return an identifier for this buffer, typically the
+  /// filename it was read from.
+  virtual const char *getBufferIdentifier() const {
+    return "Unknown buffer";
+  }
+    
+  /// getFile - Open the specified file as a SourceBuffer, returning a new
+  /// SourceBuffer if successful, otherwise returning null.
+  static SourceBuffer *getFile(const FileEntry *FileEnt);
+
+  /// getMemBuffer - Open the specified memory range as a SourceBuffer.  Note
+  /// that EndPtr[0] must be a null byte and be accessible!
+  static SourceBuffer *getMemBuffer(const char *StartPtr, const char *EndPtr,
+                                    const char *BufferName = "");
+  
+  /// getSTDIN - Read all of stdin into a file buffer, and return it.  This
+  /// fails if stdin is empty.
+  static SourceBuffer *getSTDIN();
+};
+
+} // end namespace clang
+} // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Basic/SourceBuffer.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/SourceBuffer.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/SourceLocation.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/SourceLocation.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/SourceLocation.h (added)
+++ cfe/cfe/trunk/include/clang/Basic/SourceLocation.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,81 @@
+//===--- SourceLocation.h - Compact identifier for Source Files -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the SourceLocation class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SOURCELOCATION_H
+#define LLVM_CLANG_SOURCELOCATION_H
+
+namespace llvm {
+namespace clang {
+    
+/// SourceLocation - This is a carefully crafted 32-bit identifier that encodes
+/// a full include stack, line and column number information for a position in
+/// an input translation unit.
+class SourceLocation {
+  unsigned ID;
+public:
+  enum {
+    FileIDBits  = 12,
+    FilePosBits = 32-FileIDBits
+  };
+
+  SourceLocation() : ID(0) {}  // 0 is an invalid FileID.
+  
+  /// SourceLocation constructor - Create a new SourceLocation object with the 
+  /// specified FileID and FilePos.
+  SourceLocation(unsigned FileID, unsigned FilePos) {
+    // If a FilePos is larger than (1<<FilePosBits), the SourceManager makes
+    // enough consequtive FileIDs that we have one for each chunk.
+    if (FilePos >= (1 << FilePosBits)) {
+      FileID += FilePos >> FilePosBits;
+      FilePos &= (1 << FilePosBits)-1;
+    }
+    
+    // FIXME: Find a way to handle out of FileID bits!  Maybe MaxFileID is an
+    // escape of some sort?
+    if (FileID >= (1 << FileIDBits))
+      FileID = (1 << FileIDBits)-1;
+    
+    ID = (FileID << FilePosBits) | FilePos;
+  }
+  
+  /// isValid - Return true if this is a valid SourceLocation object.  Invalid
+  /// SourceLocations are often used when events have no corresponding location
+  /// in the source (e.g. a diagnostic is required for a command line option).
+  ///
+  bool isValid() const { return ID != 0; }
+  
+  /// getFileID - Return the file identifier for this SourceLocation.  This
+  /// FileID can be used with the SourceManager object to obtain an entire
+  /// include stack for a file position reference.
+  unsigned getFileID() const { return ID >> FilePosBits; }
+  
+  /// getRawFilePos - Return the byte offset from the start of the file-chunk
+  /// referred to by FileID.  This method should not be used to get the offset
+  /// from the start of the file, instead you should use
+  /// SourceManager::getFilePos.  This method will be incorrect for large files.
+  unsigned getRawFilePos() const { return ID & ((1 << FilePosBits)-1); }
+};
+
+inline bool operator==(const SourceLocation &LHS, const SourceLocation &RHS) {
+  return LHS.getFileID() == RHS.getFileID() &&
+         LHS.getRawFilePos() == RHS.getRawFilePos();
+}
+
+inline bool operator!=(const SourceLocation &LHS, const SourceLocation &RHS) {
+  return !(LHS == RHS);
+}
+    
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Basic/SourceLocation.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/SourceLocation.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/SourceManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/SourceManager.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/SourceManager.h (added)
+++ cfe/cfe/trunk/include/clang/Basic/SourceManager.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,195 @@
+//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the SourceManager interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SOURCEMANAGER_H
+#define LLVM_CLANG_SOURCEMANAGER_H
+
+#include "clang/Basic/SourceLocation.h"
+#include <vector>
+#include <map>
+#include <list>
+
+namespace llvm {
+namespace clang {
+  
+class SourceBuffer;
+class SourceManager;
+class FileEntry;
+class IdentifierTokenInfo;
+  
+/// SourceManager - This file handles loading and caching of source files into
+/// memory.  This object owns the SourceBuffer objects for all of the loaded
+/// files and assigns unique FileID's for each unique #include chain.
+class SourceManager {
+  /// FileInfo - Once instance of this struct is kept for every file loaded or
+  /// used.  This object owns the SourceBuffer object.
+  struct FileInfo {
+    /// Buffer - The actual buffer containing the characters from the input
+    /// file.
+    const SourceBuffer *Buffer;
+    
+    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
+    /// is lazily computed.
+    ///
+    unsigned *SourceLineCache;
+    
+    /// NumLines - The number of lines in this FileInfo.  This is only valid if
+    /// SourceLineCache is non-null.
+    unsigned NumLines;
+  };
+  
+  typedef std::pair<const FileEntry * const, FileInfo> InfoRec;
+  
+  /// FileIDInfo - Information about a FileID, basically just the file that it
+  /// represents and include stack information.
+  struct FileIDInfo {
+    /// IncludeLoc - The location of the #include that brought in this file.
+    /// This SourceLocation object has a FileId of 0 for the main file.
+    SourceLocation IncludeLoc;
+    
+    /// ChunkNo - Really large files are broken up into chunks that are each
+    /// (1 << SourceLocation::FilePosBits) in size.  This specifies the chunk
+    /// number of this FileID.
+    unsigned ChunkNo;
+    
+    /// FileInfo - Information about the file itself.
+    ///
+    const InfoRec *Info;
+    
+    FileIDInfo(SourceLocation IL, unsigned CN, const InfoRec *Inf)
+      : IncludeLoc(IL), ChunkNo(CN), Info(Inf) {}
+  };
+  
+  /// FileInfos - Memoized information about all of the files tracked by this
+  /// SourceManager.
+  std::map<const FileEntry *, FileInfo> FileInfos;
+  
+  /// MemBufferInfos - Information about various memory buffers that we have
+  /// read in.  This is a list, instead of a vector, because we need pointers to
+  /// the FileInfo objects to be stable.
+  std::list<InfoRec> MemBufferInfos;
+  
+  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
+  /// entries are off by one.
+  std::vector<FileIDInfo> FileIDs;
+public:
+  ~SourceManager();
+  
+  /// createFileID - Create a new FileID that represents the specified file
+  /// being #included from the specified IncludePosition.  This returns 0 on
+  /// error and translates NULL into standard input.
+  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
+    const InfoRec *IR = getInfoRec(SourceFile);
+    if (IR == 0) return 0;    // Error opening file?
+    return createFileID(IR, IncludePos);
+  }
+  
+  /// createFileIDForMemBuffer - Create a new FileID that represents the
+  /// specified memory buffer.  This does no caching of the buffer and takes
+  /// ownership of the SourceBuffer, so only pass a SourceBuffer to this once.
+  unsigned createFileIDForMemBuffer(const SourceBuffer *Buffer) {
+    const InfoRec *IR = createMemBufferInfoRec(Buffer);
+    return createFileID(IR, SourceLocation());
+  }
+  
+  
+  /// getMacroID - Get or create a new FileID that represents a macro with the
+  /// specified identifier being expanded at the specified position.  This can
+  /// never fail.
+  unsigned getMacroID(const IdentifierTokenInfo *Identifier,
+                      SourceLocation ExpandPos) {
+    // FIXME: Implement ID's for macro expansions!
+    return ExpandPos.getFileID();
+  }
+  
+  /// getBuffer - Return the buffer for the specified FileID.
+  ///
+  const SourceBuffer *getBuffer(unsigned FileID) {
+    return getFileInfo(FileID)->Buffer;
+  }
+
+  /// getIncludeLoc - Return the location of the #include for the specified
+  /// FileID.
+  SourceLocation getIncludeLoc(unsigned FileID) const {
+    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
+    return FileIDs[FileID-1].IncludeLoc;
+  }
+  
+  /// getFilePos - This (efficient) method returns the offset from the start of
+  /// the file that the specified SourceLocation represents.
+  unsigned getFilePos(SourceLocation IncludePos) const {
+    assert(IncludePos.getFileID()-1 < FileIDs.size() && "Invalid FileID!");
+    // If this file has been split up into chunks, factor in the chunk number
+    // that the FileID references.
+    unsigned ChunkNo = FileIDs[IncludePos.getFileID()-1].ChunkNo;
+    return IncludePos.getRawFilePos() +
+           (ChunkNo << SourceLocation::FilePosBits);
+  }
+  
+  /// getColumnNumber - Return the column # for the specified include position.
+  /// this is significantly cheaper to compute than the line number.  This
+  /// returns zero if the column number isn't known.
+  unsigned getColumnNumber(SourceLocation IncludePos) const;
+  
+  /// getLineNumber - Given a SourceLocation, return the physical line number
+  /// for the position indicated.  This requires building and caching a table of
+  /// line offsets for the SourceBuffer, so this is not cheap: use only when
+  /// about to emit a diagnostic.
+  unsigned getLineNumber(SourceLocation IncludePos);
+
+  /// getFileEntryForFileID - Return the FileEntry record for the specified
+  /// FileID if one exists.
+  const FileEntry *getFileEntryForFileID(unsigned FileID) const {
+    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
+    return FileIDs[FileID-1].Info->first;
+  }
+  
+  /// PrintStats - Print statistics to stderr.
+  ///
+  void PrintStats() const;
+private:
+  /// createFileID - Create a new fileID for the specified InfoRec and include
+  /// position.  This works regardless of whether the InfoRec corresponds to a
+  /// file or some other input source.
+  unsigned createFileID(const InfoRec *File, SourceLocation IncludePos);
+    
+  /// getFileInfo - Create or return a cached FileInfo for the specified file.
+  /// This returns null on failure.
+  const InfoRec *getInfoRec(const FileEntry *SourceFile);
+  
+  /// createMemBufferInfoRec - Create a new info record for the specified memory
+  /// buffer.  This does no caching.
+  const InfoRec *createMemBufferInfoRec(const SourceBuffer *Buffer);
+
+  const InfoRec *getInfoRec(unsigned FileID) const {
+    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
+    return FileIDs[FileID-1].Info;
+  }
+  
+  FileInfo *getFileInfo(unsigned FileID) const {
+    if (const InfoRec *IR = getInfoRec(FileID))
+      return const_cast<FileInfo *>(&IR->second);
+    return 0;
+  }
+  FileInfo *getFileInfo(const FileEntry *SourceFile) {
+    if (const InfoRec *IR = getInfoRec(SourceFile))
+      return const_cast<FileInfo *>(&IR->second);
+    return 0;
+  }
+};
+
+
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Basic/SourceManager.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/SourceManager.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/TokenKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/TokenKinds.def?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/TokenKinds.def (added)
+++ cfe/cfe/trunk/include/clang/Basic/TokenKinds.def Wed Jul 11 11:22:17 2007
@@ -0,0 +1,241 @@
+//===--- TokenKinds.def - C Family Token Kind Database ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the TokenKind database.  This includes normal tokens like
+//  tok::ampamp (corresponding to the && token) as well as keywords for various
+//  languages.  Users of this file must optionally #define the TOK/KEYWORD/ALIAS
+//  macros to make use of this file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TOK
+#define TOK(X)
+#endif
+#ifndef KEYWORD
+#define KEYWORD(X,Y) TOK(kw ## X)
+#endif
+#ifndef ALIAS
+#define ALIAS(X,Y)
+#endif
+
+TOK(unknown)             // Not a token.
+TOK(eof)                 // End of file.
+TOK(eom)                 // End of macro (end of line inside a macro).
+
+// C99 6.4.2: Identifiers.
+TOK(identifier)          // abcde123
+
+// C99 6.4.4.1: Integer Constants
+// C99 6.4.4.2: Floating Constants
+TOK(numeric_constant)    // 0x123
+
+// C99 6.4.4: Character Constants
+TOK(char_constant)       // 'a'   L'b'
+
+// C99 6.4.5: String Literals.
+TOK(string_literal)      // "foo"  L"foo"
+TOK(angle_string_literal)// <foo>
+
+// C99 6.4.6: Punctuators.
+TOK(l_square)            // [
+TOK(r_square)            // ]
+TOK(l_paren)             // (
+TOK(r_paren)             // )
+TOK(l_brace)             // {
+TOK(r_brace)             // }
+TOK(period)              // .
+TOK(ellipsis)            // ...
+TOK(amp)                 // &
+TOK(ampamp)              // &&
+TOK(ampequal)            // &=
+TOK(star)                // *
+TOK(starequal)           // *=
+TOK(plus)                // +
+TOK(plusplus)            // ++
+TOK(plusequal)           // +=
+TOK(minus)               // -
+TOK(arrow)               // ->
+TOK(minusminus)          // --
+TOK(minusequal)          // -=
+TOK(tilde)               // ~
+TOK(exclaim)             // !
+TOK(exclaimequal)        // !=
+TOK(slash)               // /
+TOK(slashequal)          // /=
+TOK(percent)             // %
+TOK(percentequal)        // %=
+TOK(less)                // <
+TOK(lessless)            // <<
+TOK(lessequal)           // <=
+TOK(lesslessequal)       // <<=
+TOK(greater)             // >
+TOK(greatergreater)      // >>
+TOK(greaterequal)        // >=
+TOK(greatergreaterequal) // >>=
+TOK(caret)               // ^
+TOK(caretequal)          // ^=
+TOK(pipe)                // |
+TOK(pipepipe)            // ||
+TOK(pipeequal)           // |=
+TOK(question)            // ?
+TOK(colon)               // :
+TOK(semi)                // ;
+TOK(equal)               // =
+TOK(equalequal)          // ==
+TOK(comma)               // ,
+TOK(hash)                // #
+TOK(hashhash)            // ##
+
+// C++ Support
+TOK(periodstar)          // .*
+TOK(arrowstar)           // ->*
+TOK(coloncolon)          // ::
+
+// GNU C++ Extensions
+TOK(lessquestionequal)   // <?=
+TOK(greaterquestionequal)// >?=
+TOK(lessquestion)        // <?
+TOK(greaterquestion)     // >?
+
+// Objective C support.
+TOK(at)                  // @
+
+// at_identifier         // @foo
+// at_string             // @"foo"
+
+
+// C99 6.4.1: Keywords.  These turn into kw_* tokens.  The _ prefix is used to
+// prevent the __VA_ARGS__ token from appearing here.
+// Flags allowed:
+//   NOTC90 - In C90, this token is never available.
+//   EXTC90 - In C90, this token is an extension that is enabled unless strict.
+//   NOTC99 - In C99, this token is never available.
+//   EXTC99 - In C99, this token is an extension that is enabled unless strict.
+//   NOTCPP - In C++, this token is never available.
+//   EXTCPP - In C++, this token is an extension that is enabled unless strict.
+//
+KEYWORD(_auto                        , 0)
+KEYWORD(_break                       , 0)
+KEYWORD(_case                        , 0)
+KEYWORD(_char                        , 0)
+KEYWORD(_const                       , 0)
+KEYWORD(_continue                    , 0)
+KEYWORD(_default                     , 0)
+KEYWORD(_do                          , 0)
+KEYWORD(_double                      , 0)
+KEYWORD(_else                        , 0)
+KEYWORD(_enum                        , 0)
+KEYWORD(_extern                      , 0)
+KEYWORD(_float                       , 0)
+KEYWORD(_for                         , 0)
+KEYWORD(_goto                        , 0)
+KEYWORD(_if                          , 0)
+KEYWORD(_inline                      , EXTC90)  // Ext in C90, ok in C99/C++
+KEYWORD(_int                         , 0)
+KEYWORD(_long                        , 0)
+KEYWORD(_register                    , 0)
+KEYWORD(_restrict                    , NOTC90)    // Not in C90
+KEYWORD(_return                      , 0)
+KEYWORD(_short                       , 0)
+KEYWORD(_signed                      , 0)
+KEYWORD(_sizeof                      , 0)
+KEYWORD(_static                      , 0)
+KEYWORD(_struct                      , 0)
+KEYWORD(_switch                      , 0)
+KEYWORD(_typedef                     , 0)
+KEYWORD(_union                       , 0)
+KEYWORD(_unsigned                    , 0)
+KEYWORD(_void                        , 0)
+KEYWORD(_volatile                    , 0)
+KEYWORD(_while                       , 0)
+KEYWORD(__Bool                       , NOTC90|NOTCPP)  // C99 only
+KEYWORD(__Complex                    , NOTC90)         // C99/C++
+KEYWORD(__Imaginary                  , NOTC90|NOTCPP)  // C90 only
+
+// Special tokens to the compiler.
+KEYWORD(___VA_ARGS__                 , EXTC90|EXTCPP) // Only in C99.
+KEYWORD(___func__                    , EXTC90|EXTCPP) // Only in C99.
+KEYWORD(___FUNCTION__                , EXTC90|EXTC99|EXTCPP) // GCC Extension.
+KEYWORD(___PRETTY_FUNCTION__         , EXTC90|EXTC99|EXTCPP) // GCC Extension.
+
+// C++
+KEYWORD(_asm                         , EXTC90|EXTC99) // Exts in C90/C99
+KEYWORD(_catch                       , NOTC90|NOTC99)
+KEYWORD(_class                       , NOTC90|NOTC99)
+KEYWORD(_const_cast                  , NOTC90|NOTC99)
+KEYWORD(_delete                      , NOTC90|NOTC99)
+KEYWORD(_dynamic_cast                , NOTC90|NOTC99)
+KEYWORD(_explicit                    , NOTC90|NOTC99)
+KEYWORD(_export                      , NOTC90|NOTC99)
+KEYWORD(_false                       , NOTC90|NOTC99)
+KEYWORD(_friend                      , NOTC90|NOTC99)
+KEYWORD(_mutable                     , NOTC90|NOTC99)
+KEYWORD(_namespace                   , NOTC90|NOTC99)
+KEYWORD(_new                         , NOTC90|NOTC99)
+KEYWORD(_operator                    , NOTC90|NOTC99)
+KEYWORD(_private                     , NOTC90|NOTC99)
+KEYWORD(_protected                   , NOTC90|NOTC99)
+KEYWORD(_public                      , NOTC90|NOTC99)
+KEYWORD(_reinterpret_cast            , NOTC90|NOTC99)
+KEYWORD(_static_cast                 , NOTC90|NOTC99)
+KEYWORD(_template                    , NOTC90|NOTC99)
+KEYWORD(_this                        , NOTC90|NOTC99)
+KEYWORD(_throw                       , NOTC90|NOTC99)
+KEYWORD(_true                        , NOTC90|NOTC99)
+KEYWORD(_try                         , NOTC90|NOTC99)
+KEYWORD(_typename                    , NOTC90|NOTC99)
+KEYWORD(_typeid                      , NOTC90|NOTC99)
+KEYWORD(_using                       , NOTC90|NOTC99)
+KEYWORD(_virtual                     , NOTC90|NOTC99)
+KEYWORD(_wchar_t                     , NOTC90|NOTC99)
+
+// GNU Extensions.
+KEYWORD(__Decimal32                  , EXTC90|EXTC99|EXTCPP)
+KEYWORD(__Decimal64                  , EXTC90|EXTC99|EXTCPP)
+KEYWORD(__Decimal128                 , EXTC90|EXTC99|EXTCPP)
+KEYWORD(_typeof                      , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___null                      , NOTC90|NOTC99|EXTCPP) // C++-only Extensn
+KEYWORD(___alignof                   , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___attribute                 , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___builtin_choose_expr       , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___builtin_offsetof          , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___builtin_types_compatible_p, EXTC90|EXTC99|EXTCPP)
+KEYWORD(___builtin_va_arg            , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___extension__               , 0)     // Not treated as an extension!
+KEYWORD(___imag                      , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___label__                   , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___real                      , EXTC90|EXTC99|EXTCPP)
+KEYWORD(___thread                    , EXTC90|EXTC99|EXTCPP)
+
+// Alternate spelling for various tokens.  There are GCC extensions in all
+// languages, but should not be disabled in strict conformance mode.
+ALIAS("__attribute__", __attribute)
+ALIAS("__const"      , const      )
+ALIAS("__const__"    , const      )
+ALIAS("__alignof__"  , __alignof  )
+ALIAS("__asm"        , asm        )
+ALIAS("__asm__"      , asm        )
+ALIAS("__complex"    , _Complex   )
+ALIAS("__complex__"  , _Complex   )
+ALIAS("__imag__"     , __imag     )
+ALIAS("__inline"     , inline     )
+ALIAS("__inline__"   , inline     )
+ALIAS("__real__"     , __real     )
+ALIAS("__restrict"   , restrict   )
+ALIAS("__restrict__" , restrict   )
+ALIAS("__signed"     , signed     )
+ALIAS("__signed__"   , signed     )
+ALIAS("__typeof"     , typeof     )
+ALIAS("__typeof__"   , typeof     )
+ALIAS("__volatile"   , volatile   )
+ALIAS("__volatile__" , volatile   )
+
+#undef ALIAS
+#undef KEYWORD
+#undef TOK

Propchange: cfe/cfe/trunk/include/clang/Basic/TokenKinds.def

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/TokenKinds.def

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Basic/TokenKinds.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/TokenKinds.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/TokenKinds.h (added)
+++ cfe/cfe/trunk/include/clang/Basic/TokenKinds.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,36 @@
+//===--- TokenKinds.h - Enum values for C Token Kinds -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the TokenKind enum and support functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOKENKINDS_H
+#define LLVM_CLANG_TOKENKINDS_H
+
+namespace llvm {
+namespace clang {
+
+namespace tok {
+
+/// TokenKind - This provides a simple uniform namespace for tokens from all C
+/// languages.
+enum TokenKind {
+#define TOK(X) X,
+#include "clang/Basic/TokenKinds.def"
+  NUM_TOKENS
+};
+
+const char *getTokenName(enum TokenKind Kind);
+
+}  // end namespace tok
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Basic/TokenKinds.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Basic/TokenKinds.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h (added)
+++ cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,104 @@
+//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the IdentifierTokenInfo and IdentifierTable interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_IDENTIFIERTABLE_H
+#define LLVM_CLANG_IDENTIFIERTABLE_H
+
+#include "clang/Basic/TokenKinds.h"
+#include <string> 
+
+namespace llvm {
+namespace clang {
+  class IdentifierTable;
+  class MacroInfo;
+  
+/// IdentifierTokenInfo - One of these records is kept for each identifier that
+/// is lexed.  This contains information about whether the token was #define'd,
+/// is a language keyword, or if it is a front-end token of some sort (e.g. a
+/// variable or function name).  The preprocessor keeps this information in a
+/// set, and all tok::identifier tokens have a pointer to one of these.  
+class IdentifierTokenInfo {
+  unsigned NameLen;        // String that is the identifier.
+  MacroInfo *Macro;        // Set if this identifier is #define'd.
+  tok::TokenKind TokenID:8;// Nonzero if this is a front-end token.
+  bool IsExtension : 1;    // True if this token is a language extension.
+  void *FETokenInfo;       // Managed by the language front-end.
+  friend class IdentifierTable;
+public:
+  /// getName - Return the actual string for this identifier.  The length of
+  /// this string is stored in NameLen, and the returned string is properly null
+  /// terminated.
+  ///
+  const char *getName() const {
+    // String data is stored immediately after the IdentifierTokenInfo object.
+    return (const char*)(this+1);
+  }
+  
+  /// getNameLength - Return the length of the identifier string.
+  ///
+  unsigned getNameLength() const {
+    return NameLen;
+  }
+  
+  /// getMacroInfo - Return macro information about this identifier, or null if
+  /// it is not a macro.
+  MacroInfo *getMacroInfo() const { return Macro; }
+  void setMacroInfo(MacroInfo *I) { Macro = I; }
+  
+  /// get/setTokenID - If this is a source-language token (e.g. 'for'), this API
+  /// can be used to cause the lexer to map identifiers to source-language
+  /// tokens.
+  tok::TokenKind getTokenID() const { return TokenID; }
+  void setTokenID(tok::TokenKind ID) { TokenID = ID; }
+  
+  /// get/setExtension - Initialize information about whether or not this
+  /// language token is an extension.  This controls extension warnings, and is
+  /// only valid if a custom token ID is set.
+  bool isExtensionToken() const { return IsExtension; }
+  void setIsExtensionToken(bool Val) { IsExtension = Val; }
+  
+  
+  /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
+  /// associate arbitrary metadata with this token.
+  template<typename T>
+  T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
+  void setFETokenInfo(void *T) { FETokenInfo = T; }
+private:
+  void Destroy();
+};
+
+/// IdentifierTable - This table implements an efficient mapping from strings to
+/// IdentifierTokenInfo nodes.  It has no other purpose, but this is an
+/// extremely performance-critical piece of the code, as each occurrance of
+/// every identifier goes through here when lexed.
+class IdentifierTable {
+  void *TheTable;
+  void *TheMemory;
+  unsigned NumIdentifiers;
+public:
+  IdentifierTable();
+  ~IdentifierTable();
+  /// get - Return the identifier token info for the specified named identifier.
+  ///
+  IdentifierTokenInfo &get(const char *NameStart, const char *NameEnd);
+  IdentifierTokenInfo &get(const std::string &Name);
+  
+  /// PrintStats - Print some statistics to stderr that indicate how well the
+  /// hashing is doing.
+  void PrintStats() const;
+};
+
+}  // end namespace llvm
+}  // end namespace clang
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/Lexer.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/Lexer.h (added)
+++ cfe/cfe/trunk/include/clang/Lex/Lexer.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,416 @@
+//===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the Lexer and LexerToken interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEXER_H
+#define LLVM_CLANG_LEXER_H
+
+#include "clang/Basic/TokenKinds.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace clang {
+class Diagnostic;
+class Lexer;
+class Preprocessor;
+class SourceBuffer;
+class SourceLocation;
+class IdentifierTokenInfo;
+
+struct LangOptions {
+  unsigned Trigraphs    : 1;  // Trigraphs in source files.
+  unsigned BCPLComment  : 1;  // BCPL-style // comments.
+  unsigned DollarIdents : 1;  // '$' allowed in identifiers.
+  unsigned Digraphs     : 1;  // When added to C?  C99?
+  unsigned HexFloats    : 1;  // C99 Hexadecimal float constants.
+  unsigned C99          : 1;  // C99 Support
+  unsigned CPlusPlus    : 1;  // C++ Support
+  unsigned CPPMinMax    : 1;  // C++ <?=, >?= tokens.
+  unsigned NoExtensions : 1;  // All extensions are disabled, strict mode.
+  
+  unsigned ObjC1        : 1;  // Objective C 1 support enabled.
+  unsigned ObjC2        : 1;  // Objective C 2 support enabled (implies ObjC1).
+  
+  LangOptions() {
+    Trigraphs = BCPLComment = DollarIdents = Digraphs = ObjC1 = ObjC2 = 0;
+    C99 = CPlusPlus = CPPMinMax = NoExtensions = 0;
+  }
+};
+
+
+/// LexerToken - This structure provides full information about a lexed token.
+/// it is not intended to be space efficient, it is intended to return as much
+/// information as possible about each returned token.  This is expected to be
+/// compressed into a smaller form if memory footprint is important.
+class LexerToken {
+  /// The start and end of the token text itself.
+  const char *Start, *End;
+  
+  /// TheLexer - The lexer object this token came from.
+  const Lexer *TheLexer;
+  
+  /// IdentifierInfo - If this was an identifier, this points to the uniqued
+  /// information about this identifier.
+  IdentifierTokenInfo *IdentifierInfo;
+
+  /// Kind - The actual flavor of token this is.
+  ///
+  tok::TokenKind Kind : 8;
+  
+  /// Flags - Bits we track about this token, members of the TokenFlags enum.
+  unsigned Flags : 8;
+public:
+    
+  // Various flags set per token:
+  enum TokenFlags {
+    StartOfLine   = 0x01,  // At start of line or only after whitespace.
+    LeadingSpace  = 0x02,  // Whitespace exists before this token.
+    NeedsCleaning = 0x04   // Contained an escaped newline or trigraph.
+    //#define STRINGIFY_ARG   (1 << 2) /* If macro argument to be stringified.
+    //#define PASTE_LEFT      (1 << 3) /* If on LHS of a ## operator.
+  };
+
+  tok::TokenKind getKind() const { return Kind; }
+  void SetKind(tok::TokenKind K) { Kind = K; }
+
+  const char *getStart() const { return Start; }
+  const char *getEnd() const { return End; }
+  void SetStart(const char *S) { Start = S; }
+  void SetEnd  (const char *E) { End = E; }
+  
+  const Lexer *getLexer() const { return TheLexer; }
+  
+  /// ClearFlags - Reset all flags to cleared.
+  ///
+  void StartToken(const Lexer *L) {
+    Flags = 0;
+    IdentifierInfo = 0;
+    TheLexer = L;
+  }
+  
+  /// ClearPosition - Mark this token as not having a position, FIXME temporary.
+  void ClearPosition() {
+    TheLexer = 0;
+  }
+  
+  IdentifierTokenInfo *getIdentifierInfo() const { return IdentifierInfo; }
+  void SetIdentifierInfo(IdentifierTokenInfo *II) {
+    IdentifierInfo = II;
+  }
+
+  /// SetFlag - Set the specified flag.
+  void SetFlag(TokenFlags Flag) {
+    Flags |= Flag;
+  }
+  
+  /// ClearFlag - Unset the specified flag.
+  void ClearFlag(TokenFlags Flag) {
+    Flags &= ~Flag;
+  }
+
+  /// SetFlagValue - Set a flag to either true or false.
+  void SetFlagValue(TokenFlags Flag, bool Val) {
+    if (Val) 
+      SetFlag(Flag);
+    else
+      ClearFlag(Flag);
+  }
+  
+  /// getSourceLocation - Return a source location identifier for the specified
+  /// offset in the current file.
+  SourceLocation getSourceLocation() const;
+  
+  /// isAtStartOfLine - Return true if this token is at the start of a line.
+  ///
+  bool isAtStartOfLine() const { return Flags & StartOfLine; }
+  
+  /// hasLeadingSpace - Return true if this token has whitespace before it.
+  ///
+  bool hasLeadingSpace() const { return Flags & LeadingSpace; }
+  
+  /// needsCleaning - Return true if this token has trigraphs or escaped
+  /// newlines in it.
+  ///
+  bool needsCleaning() const { return Flags & NeedsCleaning; }
+  
+  /// dump - Print the token to stderr, used for debugging.
+  ///
+  void dump(bool DumpFlags = false) const;
+};
+
+/// PPConditionalInfo - Information about the conditional stack (#if directives)
+/// currently active.
+struct PPConditionalInfo {
+  /// IfLoc - Location where the conditional started.
+  const char *IfLoc;
+  
+  /// WasSkipping - True if this was contained in a skipping directive, e.g.
+  /// in a "#if 0" block.
+  bool WasSkipping;
+  
+  /// FoundNonSkip - True if we have emitted tokens already, and now we're in
+  /// an #else block or something.  Only useful in Skipping blocks.
+  bool FoundNonSkip;
+  
+  /// FoundElse - True if we've seen a #else in this block.  If so,
+  /// #elif/#else directives are not allowed.
+  bool FoundElse;
+};
+
+
+/// Lexer - This provides a simple interface that turns a text buffer into a
+/// stream of tokens.  This provides no support for file reading or buffering,
+/// or buffering/seeking of tokens, only forward lexing is supported.  It relies
+/// on the specified Preprocessor object to handle preprocessor directives, etc.
+class Lexer {
+  char PeekCharacter;            // The current char we are peeking ahead.
+  const char *BufferPtr;         // Current pointer into the buffer.
+  const char * const BufferStart;// Start of the buffer.
+  const char * const BufferEnd;  // End of the buffer.
+  const SourceBuffer *InputFile; // The file we are reading from.
+  unsigned CurFileID;            // FileID for the current input file.
+  Preprocessor &PP;              // Preprocessor object controlling lexing.
+  LangOptions Features;          // Features enabled by this language (cache).
+  
+  // Context-specific lexing flags.
+  bool IsAtStartOfLine;          // True if sitting at start of line.
+  bool ParsingPreprocessorDirective; // True if parsing #XXX
+  bool ParsingFilename;          // True after #include: turn <xx> into string.
+  
+  // Context that changes as the file is lexed.
+    
+  /// ConditionalStack - Information about the set of #if/#ifdef/#ifndef blocks
+  /// we are currently in.
+  std::vector<PPConditionalInfo> ConditionalStack;
+  
+  friend class Preprocessor;
+public:
+    
+  /// Lexer constructor - Create a new lexer object for the specified buffer
+  /// with the specified preprocessor managing the lexing process.  This lexer
+  /// assumes that the specified SourceBuffer and Preprocessor objects will
+  /// outlive it, but doesn't take ownership of either pointer.
+  Lexer(const SourceBuffer *InBuffer, unsigned CurFileID, Preprocessor &PP);
+  
+  /// getFeatures - Return the language features currently enabled.  NOTE: this
+  /// lexer modifies features as a file is parsed!
+  const LangOptions &getFeatures() const { return Features; }
+
+  /// getCurFileID - Return the FileID for the file we are lexing out of.  This
+  /// implicitly encodes the include path to get to the file.
+  unsigned getCurFileID() const { return CurFileID; }
+  
+  /// Lex - Return the next token in the file.  If this is the end of file, it
+  /// return the tok::eof token.  Return true if an error occurred and
+  /// compilation should terminate, false if normal.  This implicitly involves
+  /// the preprocessor.
+  bool Lex(LexerToken &Result) {
+    // Start a new token.
+    Result.StartToken(this);
+    
+    // NOTE, any changes here should also change code after calls to 
+    // Preprocessor::HandleDirective
+    if (IsAtStartOfLine) {
+      Result.SetFlag(LexerToken::StartOfLine);
+      IsAtStartOfLine = false;
+    }
+   
+    // Get a token.
+    return LexTokenInternal(Result);
+  }
+  
+  /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
+  /// (potentially) macro expand the filename.  If the sequence parsed is not
+  /// lexically legal, emit a diagnostic and return a result EOM token.
+  bool LexIncludeFilename(LexerToken &Result);
+  
+  /// ReadToEndOfLine - Read the rest of the current preprocessor line as an
+  /// uninterpreted string.  This switches the lexer out of directive mode.
+  std::string ReadToEndOfLine();
+  
+  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
+  /// token is the characters used to represent the token in the source file
+  /// after trigraph expansion and escaped-newline folding.  In particular, this
+  /// wants to get the true, uncanonicalized, spelling of things like digraphs
+  /// UCNs, etc.
+  static std::string getSpelling(const LexerToken &Tok,
+                                 const LangOptions &Features);
+  std::string getSpelling(const LexerToken &Tok) const {
+    assert(this && "Can't get the spelling of a token with a null lexer!");
+    return getSpelling(Tok, Features);
+  }
+
+  /// getSpelling - This method is used to get the spelling of a token into a
+  /// preallocated buffer, instead of as an std::string.  The caller is required
+  /// to allocate enough space for the token, which is guaranteed to be at most
+  /// Tok.End-Tok.Start bytes long.  The actual length of the token is returned.
+  static unsigned getSpelling(const LexerToken &Tok, char *Buffer,
+                              const LangOptions &Features);
+  unsigned getSpelling(const LexerToken &Tok, char *Buffer) const {
+    assert(this && "Can't get the spelling of a token with a null lexer!");
+    return getSpelling(Tok, Buffer, Features);
+  }
+  
+  
+  /// Diag - Forwarding function for diagnostics.  This translate a source
+  /// position in the current buffer into a SourceLocation object for rendering.
+  bool Diag(const char *Loc, unsigned DiagID,
+            const std::string &Msg = "") const;
+
+  /// getSourceLocation - Return a source location identifier for the specified
+  /// offset in the current file.
+  SourceLocation getSourceLocation(const char *Loc) const;
+  
+  //===--------------------------------------------------------------------===//
+  // Internal implementation interfaces.
+private:
+
+  /// LexTokenInternal - Internal interface to lex a preprocessing token. Called
+  /// by Lex.
+  ///
+  bool LexTokenInternal(LexerToken &Result);
+    
+  
+  //===--------------------------------------------------------------------===//
+  // Lexer character reading interfaces.
+  
+  // This lexer is built on two interfaces for reading characters, both of which
+  // automatically provide phase 1/2 translation.  getAndAdvanceChar is used
+  // when we know that we will be reading a character from the input buffer and
+  // that this character will be part of the result token. This occurs in (f.e.)
+  // string processing, because we know we need to read until we find the
+  // closing '"' character.
+  //
+  // The second interface is the combination of PeekCharAndSize with
+  // ConsumeChar.  PeekCharAndSize reads a phase 1/2 translated character,
+  // returning it and its size.  If the lexer decides that this character is
+  // part of the current token, it calls ConsumeChar on it.  This two stage
+  // approach allows us to emit diagnostics for characters (e.g. warnings about
+  // trigraphs), knowing that they only are emitted if the character is
+  // consumed.
+  
+  
+  /// getAndAdvanceChar - Read a single 'character' from the specified buffer,
+  /// advance over it, and return it.  This is tricky in several cases.  Here we
+  /// just handle the trivial case and fall-back to the non-inlined
+  /// getCharAndSizeSlow method to handle the hard case.
+  inline char getAndAdvanceChar(const char *&Ptr, LexerToken &Tok) {
+    // If this is not a trigraph and not a UCN or escaped newline, return
+    // quickly.
+    if (Ptr[0] != '?' && Ptr[0] != '\\') return *Ptr++;
+    
+    unsigned Size = 0;
+    char C = getCharAndSizeSlow(Ptr, Size, &Tok);
+    Ptr += Size;
+    return C;
+  }
+  
+  /// ConsumeChar - When a character (identified by PeekCharAndSize) is consumed
+  /// and added to a given token, check to see if there are diagnostics that
+  /// need to be emitted or flags that need to be set on the token.  If so, do
+  /// it.
+  const char *ConsumeChar(const char *Ptr, unsigned Size, LexerToken &Tok) {
+    // Normal case, we consumed exactly one token.  Just return it.
+    if (Size == 1)
+      return Ptr+Size;
+
+    // Otherwise, re-lex the character with a current token, allowing
+    // diagnostics to be emitted and flags to be set.
+    Size = 0;
+    getCharAndSizeSlow(Ptr, Size, &Tok);
+    return Ptr+Size;
+  }
+  
+  /// getCharAndSize - Peek a single 'character' from the specified buffer,
+  /// get its size, and return it.  This is tricky in several cases.  Here we
+  /// just handle the trivial case and fall-back to the non-inlined
+  /// getCharAndSizeSlow method to handle the hard case.
+  inline char getCharAndSize(const char *Ptr, unsigned &Size) {
+    // If this is not a trigraph and not a UCN or escaped newline, return
+    // quickly.
+    if (Ptr[0] != '?' && Ptr[0] != '\\') {
+      Size = 1;
+      return *Ptr;
+    }
+    
+    Size = 0;
+    return getCharAndSizeSlow(Ptr, Size);
+  }
+  
+  /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize
+  /// method.
+  char getCharAndSizeSlow(const char *Ptr, unsigned &Size, LexerToken *Tok = 0);
+  
+  
+  //===--------------------------------------------------------------------===//
+  // #if directive handling.
+  
+  /// pushConditionalLevel - When we enter a #if directive, this keeps track of
+  /// what we are currently in for diagnostic emission (e.g. #if with missing
+  /// #endif).
+  void pushConditionalLevel(const char *DirectiveStart, bool WasSkipping,
+                            bool FoundNonSkip, bool FoundElse) {
+    PPConditionalInfo CI;
+    CI.IfLoc = DirectiveStart;
+    CI.WasSkipping = WasSkipping;
+    CI.FoundNonSkip = FoundNonSkip;
+    CI.FoundElse = FoundElse;
+    ConditionalStack.push_back(CI);
+  }
+  void pushConditionalLevel(const PPConditionalInfo &CI) {
+    ConditionalStack.push_back(CI);
+  }    
+  
+  /// popConditionalLevel - Remove an entry off the top of the conditional
+  /// stack, returning information about it.  If the conditional stack is empty,
+  /// this returns true and does not fill in the arguments.
+  bool popConditionalLevel(PPConditionalInfo &CI) {
+    if (ConditionalStack.empty()) return true;
+    CI = ConditionalStack.back();
+    ConditionalStack.pop_back();
+    return false;
+  }
+  
+  /// peekConditionalLevel - Return the top of the conditional stack.  This
+  /// requires that there be a conditional active.
+  PPConditionalInfo &peekConditionalLevel() {
+    assert(!ConditionalStack.empty() && "No conditionals active!");
+    return ConditionalStack.back();
+  }
+  
+  unsigned getConditionalStackDepth() const { return ConditionalStack.size(); }
+  
+  //===--------------------------------------------------------------------===//
+  // Other lexer functions.
+  
+  // Part of block comment parsing.
+  bool isBlockCommentEndOfEscapedNewLine(const char *CurPtr, char &PrevChar);
+  
+  // Helper functions to lex the remainder of a token of the specific type.
+  bool LexIdentifier         (LexerToken &Result, const char *CurPtr);
+  bool LexNumericConstant    (LexerToken &Result, const char *CurPtr);
+  bool LexStringLiteral      (LexerToken &Result, const char *CurPtr);
+  bool LexAngledStringLiteral(LexerToken &Result, const char *CurPtr);
+  bool LexCharConstant       (LexerToken &Result, const char *CurPtr);
+  bool LexEndOfFile          (LexerToken &Result, const char *CurPtr);
+  
+  bool SkipWhitespace        (LexerToken &Result, const char *CurPtr);
+  bool SkipBCPLComment       (LexerToken &Result, const char *CurPtr);
+  bool SkipBlockComment      (LexerToken &Result, const char *CurPtr);
+};
+
+
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Lex/Lexer.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Lex/Lexer.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Lex/MacroExpander.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/MacroExpander.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/MacroExpander.h (added)
+++ cfe/cfe/trunk/include/clang/Lex/MacroExpander.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,63 @@
+//===--- MacroExpander.h - Lex from a macro expansion -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroExpander interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_MACROEXPANDER_H
+#define LLVM_CLANG_MACROEXPANDER_H
+
+namespace llvm {
+namespace clang {
+  class MacroInfo;
+  class Preprocessor;
+  class LexerToken;
+
+/// MacroExpander - This implements a lexer that returns token from a macro body
+/// instead of lexing from a character buffer.
+///
+class MacroExpander {
+  /// Macro - The macro we are expanding from.
+  ///
+  MacroInfo &Macro;
+
+  /// CurMacroID - This encodes the instantiation point of the macro being
+  /// expanded and the include stack.
+  unsigned CurMacroID;
+  
+  /// PP - The current preprocessor object we are expanding for.
+  ///
+  Preprocessor &PP;
+  
+  /// CurToken - This is the next token that Lex will return.
+  unsigned CurToken;
+  
+  /// Lexical information about the expansion point of the macro: the identifier
+  /// that the macro expanded from had these properties.
+  bool AtStartOfLine, HasLeadingSpace;
+  
+public:
+  MacroExpander(MacroInfo &macro, unsigned MacroID, Preprocessor &pp,
+                bool atStartOfLine, bool hasLeadingSpace)
+    : Macro(macro), CurMacroID(MacroID), PP(pp), CurToken(0),
+      AtStartOfLine(atStartOfLine), HasLeadingSpace(hasLeadingSpace) {
+  }
+  
+  MacroInfo &getMacro() const { return Macro; }
+
+  /// Lex - Lex and return a token from this macro stream.
+  bool Lex(LexerToken &Tok);
+  
+};
+  
+}  // end namespace llvm
+}  // end namespace clang
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Lex/MacroExpander.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Lex/MacroExpander.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Lex/MacroInfo.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/MacroInfo.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/MacroInfo.h (added)
+++ cfe/cfe/trunk/include/clang/Lex/MacroInfo.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,106 @@
+//===--- MacroInfo.h - Information about #defined identifiers ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_MACROINFO_H
+#define LLVM_CLANG_MACROINFO_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Lexer.h"
+#include <vector>
+
+namespace llvm {
+namespace clang {
+    
+/// MacroInfo - Each identifier that is #define'd has an instance of this class
+/// associated with it, used to implement macro expansion.
+class MacroInfo {
+  /// Location - This is the place the macro is defined.
+  SourceLocation Location;
+
+  // TODO: Parameter list
+  // TODO: # parameters
+  
+  /// ReplacementTokens - This is the list of tokens that the macro is defined
+  /// to.
+  std::vector<LexerToken> ReplacementTokens;
+  
+  /// isDisabled - True if we have started an expansion of this macro already.
+  /// This disbles recursive expansion, which would be quite bad for things like
+  /// #define A A.
+  bool isDisabled;
+  
+#if 0
+  /* Number of tokens in expansion, or bytes for traditional macros.  */
+  unsigned int count;
+  /* Number of parameters.  */
+  unsigned short paramc;
+  /* If a function-like macro.  */
+  unsigned int fun_like : 1;
+  /* If a variadic macro.  */
+  unsigned int variadic : 1;
+  /* Nonzero if it has been expanded or had its existence tested.  */
+  unsigned int used     : 1;
+  /* Indicate which field of 'exp' is in use.  */
+  unsigned int traditional : 1;
+#endif
+public:
+  MacroInfo(SourceLocation DefLoc) : Location(DefLoc) {
+    isDisabled = false;
+  }
+
+  /// getNumTokens - Return the number of tokens that this macro expands to.
+  ///
+  unsigned getNumTokens() const {
+    return ReplacementTokens.size();
+  }
+
+  const LexerToken &getReplacementToken(unsigned Tok) const {
+    assert(Tok < ReplacementTokens.size() && "Invalid token #");
+    return ReplacementTokens[Tok];
+  }
+
+  /// AddTokenToBody - Add the specified token to the replacement text for the
+  /// macro.
+  void AddTokenToBody(const LexerToken &Tok) {
+    ReplacementTokens.push_back(Tok);
+    // FIXME: Remember where this token came from, do something intelligent with
+    // its location.
+    ReplacementTokens.back().ClearPosition();
+  }
+  
+  /// isEnabled - Return true if this macro is enabled: in other words, that we
+  /// are not currently in an expansion of this macro.
+  bool isEnabled() const { return !isDisabled; }
+  
+  void EnableMacro() {
+    assert(isDisabled && "Cannot enable an already-enabled macro!");
+    isDisabled = false;
+  }
+
+  void DisableMacro() {
+    assert(!isDisabled && "Cannot disable an already-disabled macro!");
+    isDisabled = true;
+  }
+  
+  /// dump - Print the macro to stderr, used for debugging.
+  ///
+  void dump() const;
+  
+  // Todo:
+  // bool isDefinedInSystemHeader() { Look this up based on Location }
+};
+    
+}  // end namespace llvm
+}  // end namespace clang
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Lex/MacroInfo.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Lex/MacroInfo.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: cfe/cfe/trunk/include/clang/Lex/Preprocessor.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/Preprocessor.h?rev=38539&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/Preprocessor.h (added)
+++ cfe/cfe/trunk/include/clang/Lex/Preprocessor.h Wed Jul 11 11:22:17 2007
@@ -0,0 +1,376 @@
+//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the Preprocessor interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_PREPROCESSOR_H
+#define LLVM_CLANG_PREPROCESSOR_H
+
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/MacroExpander.h"
+#include "clang/Lex/IdentifierTable.h"
+#include "clang/Basic/SourceLocation.h"
+
+namespace llvm {
+namespace clang {
+  
+class Lexer;
+class LexerToken;
+class SourceManager;
+class FileManager;
+class DirectoryEntry;
+class FileEntry;
+
+/// DirectoryLookup - This class is used to specify the search order for
+/// directories in #include directives.
+class DirectoryLookup {
+public:
+  enum DirType {
+    NormalHeaderDir,
+    SystemHeaderDir,
+    ExternCSystemHeaderDir
+  };
+private:  
+  /// Dir - This is the actual directory that we're referring to.
+  ///
+  const DirectoryEntry *Dir;
+  
+  /// DirCharacteristic - The type of directory this is, one of the DirType enum
+  /// values.
+  DirType DirCharacteristic : 2;
+  
+  /// UserSupplied - True if this is a user-supplied directory.
+  ///
+  bool UserSupplied;
+public:
+  DirectoryLookup(const DirectoryEntry *dir, DirType DT, bool isUser)
+    : Dir(dir), DirCharacteristic(DT), UserSupplied(isUser) {}
+    
+  /// getDir - Return the directory that this entry refers to.
+  ///
+  const DirectoryEntry *getDir() const { return Dir; }
+  
+  /// DirCharacteristic - The type of directory this is, one of the DirType enum
+  /// values.
+  DirType getDirCharacteristic() const { return DirCharacteristic; }
+  
+  /// isUserSupplied - True if this is a user-supplied directory.
+  ///
+  bool isUserSupplied() const { return UserSupplied; }
+};
+
+/// Preprocessor - This object forms engages in a tight little dance to
+/// efficiently preprocess tokens.  Lexers know only about tokens within a
+/// single source file, and don't know anything about preprocessor-level issues
+/// like the #include stack, token expansion, etc.
+///
+class Preprocessor {
+  Diagnostic &Diags;
+  const LangOptions &Features;
+  FileManager   &FileMgr;
+  SourceManager &SourceMgr;
+  
+  // #include search path information.  Requests for #include "x" search the
+  /// directory of the #including file first, then each directory in SearchDirs
+  /// consequtively. Requests for <x> search the current dir first, then each
+  /// directory in SearchDirs, starting at SystemDirIdx, consequtively.  If
+  /// NoCurDirSearch is true, then the check for the file in the current
+  /// directory is supressed.
+  std::vector<DirectoryLookup> SearchDirs;
+  unsigned SystemDirIdx;
+  bool NoCurDirSearch;
+  
+  enum {
+    /// MaxIncludeStackDepth - Maximum depth of #includes.
+    MaxAllowedIncludeStackDepth = 200
+  };
+
+  // State that changes while the preprocessor runs:
+  bool DisableMacroExpansion;    // True if macro expansion is disabled.
+  bool SkippingContents;         // True if in a #if 0 block.
+
+  /// IdentifierInfo - This is mapping/lookup information for all identifiers in
+  /// the program, including program keywords.
+  IdentifierTable IdentifierInfo;
+  
+  /// CurLexer - This is the current top of the stack that we're lexing from if
+  /// not expanding a macro.  One of CurLexer and CurMacroExpander must be null.
+  ///
+  Lexer *CurLexer;
+  
+  /// CurDirLookup - The next DirectoryLookup structure to search for a file if
+  /// CurLexer is non-null.  This allows us to implement #include_next.
+  const DirectoryLookup *CurNextDirLookup;
+  
+  /// IncludeStack - This keeps track of the stack of files currently #included,
+  /// not counting CurLexer.
+  struct IncludeStackInfo {
+    Lexer *TheLexer;
+    const DirectoryLookup *TheDirLookup;
+    IncludeStackInfo(Lexer *L, const DirectoryLookup *D)
+      : TheLexer(L), TheDirLookup(D) {
+    }
+  };
+  std::vector<IncludeStackInfo> IncludeStack;
+  
+  /// CurMacroExpander - This is the current macro we are expanding, if we are
+  /// expanding a macro.  One of CurLexer and CurMacroExpander must be null.
+  MacroExpander *CurMacroExpander;
+  
+  /// MacroStack - This keeps track of the macros that are recursively being
+  /// expanded.
+  std::vector<MacroExpander*> MacroStack;
+  
+  
+  /// PreFileInfo - The preprocessor keeps track of this information for each
+  /// file that is #included.
+  struct PerFileInfo {
+    // isImport - True if this is a #import'd or #pragma once file.
+    bool isImport;
+    
+    // NumIncludes - This is the number of times the file has been included
+    // already.
+    unsigned short NumIncludes;
+    
+    PerFileInfo() : isImport(false), NumIncludes(0) {}
+  };
+  
+  /// FileInfo - This contains all of the preprocessor-specific data about files
+  /// that are included.  The vector is indexed by the FileEntry's UID.
+  ///
+  std::vector<PerFileInfo> FileInfo;
+  
+  // Various statistics we track for performance analysis.
+  unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
+  unsigned NumIf, NumElse, NumEndif;
+  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
+  unsigned NumMacroExpanded, NumFastMacroExpanded, MaxMacroStackDepth;
+  unsigned NumSkipped;
+public:
+  Preprocessor(Diagnostic &diags, const LangOptions &opts, FileManager &FM,
+               SourceManager &SM);
+  ~Preprocessor();
+
+  Diagnostic &getDiagnostics() const { return Diags; }
+  const LangOptions &getLangOptions() const { return Features; }
+  FileManager &getFileManager() const { return FileMgr; }
+  SourceManager &getSourceManager() const { return SourceMgr; }
+
+  IdentifierTable &getIdentifierTable() { return IdentifierInfo; }
+
+  /// isSkipping - Return true if we're lexing a '#if 0' block.  This causes
+  /// lexer errors/warnings to get ignored.
+  bool isSkipping() const { return SkippingContents; }
+  
+  /// isCurrentLexer - Return true if we are lexing directly from the specified
+  /// lexer.
+  bool isCurrentLexer(const Lexer *L) const {
+    return CurLexer == L;
+  }
+  
+  /// SetSearchPaths - Interface for setting the file search paths.
+  ///
+  void SetSearchPaths(const std::vector<DirectoryLookup> &dirs,
+                      unsigned systemDirIdx, bool noCurDirSearch) {
+    SearchDirs = dirs;
+    SystemDirIdx = systemDirIdx;
+    NoCurDirSearch = noCurDirSearch;
+  }
+  
+  /// getIdentifierInfo - Return information about the specified preprocessor
+  /// identifier token.  The version of this method that takes two character
+  /// pointers is preferred unless the identifier is already available as a
+  /// string (this avoids allocation and copying of memory to construct an
+  /// std::string).
+  IdentifierTokenInfo *getIdentifierInfo(const char *NameStart,
+                                         const char *NameEnd) {
+    // If we are in a "#if 0" block, don't bother lookup up identifiers.
+    if (SkippingContents) return 0;
+    return &IdentifierInfo.get(NameStart, NameEnd);
+  }
+  IdentifierTokenInfo *getIdentifierInfo(const std::string &Name) {
+    // If we are in a "#if 0" block, don't bother lookup up identifiers.
+    if (SkippingContents) return 0;
+    return &IdentifierInfo.get(Name);
+  }
+  
+  /// AddKeyword - This method is used to associate a token ID with specific
+  /// identifiers because they are language keywords.  This causes the lexer to
+  /// automatically map matching identifiers to specialized token codes.
+  ///
+  /// The C90/C99/CPP flags are set to 0 if the token should be enabled in the
+  /// specified langauge, set to 1 if it is an extension in the specified
+  /// language, and set to 2 if disabled in the specified language.
+  void AddKeyword(const std::string &Keyword, tok::TokenKind TokenCode,
+                  int C90, int C99, int CPP) {
+    int Flags = Features.CPlusPlus ? CPP : (Features.C99 ? C99 : C90);
+    
+    // Don't add this keyword if disabled in this language or if an extension
+    // and extensions are disabled.
+    if (Flags+Features.NoExtensions >= 2) return;
+    
+    IdentifierTokenInfo &Info = *getIdentifierInfo(Keyword);
+    Info.setTokenID(TokenCode);
+    Info.setIsExtensionToken(Flags == 1);
+  }
+  
+  /// AddKeywords - Add all keywords to the symbol table.
+  ///
+  void AddKeywords();
+
+  /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+  /// return null on failure.  isSystem indicates whether the file reference is
+  /// for system #include's or not.  If successful, this returns 'UsedDir', the
+  /// DirectoryLookup member the file was found in, or null if not applicable.
+  /// If FromDir is non-null, the directory search should start with the entry
+  /// after the indicated lookup.  This is used to implement #include_next.
+  const FileEntry *LookupFile(const std::string &Filename, bool isSystem,
+                              const DirectoryLookup *FromDir,
+                              const DirectoryLookup *&NextDir);
+  
+  /// EnterSourceFile - Add a source file to the top of the include stack and
+  /// start lexing tokens from it instead of the current buffer.
+  void EnterSourceFile(unsigned CurFileID, const DirectoryLookup *Dir);
+
+  /// EnterMacro - Add a Macro to the top of the include stack and start lexing
+  /// tokens from it instead of the current buffer.  Return true on failure.
+  bool EnterMacro(LexerToken &Identifier);
+  
+  
+  /// Lex - To lex a token from the preprocessor, just pull a token from the
+  /// current lexer or macro object.
+  bool Lex(LexerToken &Result) {
+    if (CurLexer)
+      return CurLexer->Lex(Result);
+    else
+      return CurMacroExpander->Lex(Result);
+  }
+  
+  /// LexUnexpandedToken - This is just like Lex, but this disables macro
+  /// expansion of identifier tokens.
+  bool LexUnexpandedToken(LexerToken &Result) {
+    // Disable macro expansion.
+    bool OldVal = DisableMacroExpansion;
+    DisableMacroExpansion = true;
+    // Lex the token.
+    bool ResVal = Lex(Result);
+    
+    // Reenable it.
+    DisableMacroExpansion = OldVal;
+    return ResVal;
+  }
+  
+  /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
+  /// the specified LexerToken's location, translating the token's start
+  /// position in the current buffer into a SourcePosition object for rendering.
+  bool Diag(const LexerToken &Tok, unsigned DiagID, const std::string &Msg="");  
+  bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg="");  
+  
+  void PrintStats();
+
+  //===--------------------------------------------------------------------===//
+  // Preprocessor callback methods.  These are invoked by a lexer as various
+  // directives and events are found.
+
+  /// HandleIdentifier - This callback is invoked when the lexer reads an
+  /// identifier and has filled in the tokens IdentifierInfo member.  This
+  /// callback potentially macro expands it or turns it into a named token (like
+  /// 'for').
+  bool HandleIdentifier(LexerToken &Identifier);
+
+  /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
+  /// the current file.  This either returns the EOF token or pops a level off
+  /// the include stack and keeps going.
+  bool HandleEndOfFile(LexerToken &Result);
+  
+  /// HandleEndOfMacro - This callback is invoked when the lexer hits the end of
+  /// the current macro.  This either returns the EOF token or pops a level off
+  /// the include stack and keeps going.
+  bool HandleEndOfMacro(LexerToken &Result);
+  
+  /// HandleDirective - This callback is invoked when the lexer sees a # token
+  /// at the start of a line.  This consumes the directive, modifies the 
+  /// lexer/preprocessor state, and advances the lexer(s) so that the next token
+  /// read is the correct one.
+  bool HandleDirective(LexerToken &Result);
+
+private:
+  /// getFileInfo - Return the PerFileInfo structure for the specified
+  /// FileEntry.
+  PerFileInfo &getFileInfo(const FileEntry *FE);
+
+  /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
+  /// current line until the tok::eom token is found.
+  bool DiscardUntilEndOfDirective();
+
+  /// ReadMacroName - Lex and validate a macro name, which occurs after a
+  /// #define or #undef.  This emits a diagnostic, sets the token kind to eom,
+  /// and discards the rest of the macro line if the macro name is invalid.
+  bool ReadMacroName(LexerToken &MacroNameTok);
+  
+  /// CheckEndOfDirective - Ensure that the next token is a tok::eom token.  If
+  /// not, emit a diagnostic and consume up until the eom.
+  bool CheckEndOfDirective(const char *Directive);
+  
+  /// SkipExcludedConditionalBlock - We just read a #if or related directive and
+  /// decided that the subsequent tokens are in the #if'd out portion of the
+  /// file.  Lex the rest of the file, until we see an #endif.  If
+  /// FoundNonSkipPortion is true, then we have already emitted code for part of
+  /// this #if directive, so #else/#elif blocks should never be entered. If
+  /// FoundElse is false, then #else directives are ok, if not, then we have
+  /// already seen one so a #else directive is a duplicate.  When this returns,
+  /// the caller can lex the first valid token.
+  bool SkipExcludedConditionalBlock(const char *IfTokenLoc,
+                                    bool FoundNonSkipPortion, bool FoundElse);
+  
+  /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+  /// may occur after a #if or #elif directive.  Sets Result to the result of
+  /// the expression.  Returns false normally, true if lexing must be aborted.
+  bool EvaluateDirectiveExpression(bool &Result);
+  /// EvaluateValue - Used to implement EvaluateDirectiveExpression,
+  /// see PPExpressions.cpp.
+  bool EvaluateValue(int &Result, LexerToken &PeekTok, bool &StopParse);
+  /// EvaluateDirectiveSubExpr - Used to implement EvaluateDirectiveExpression,
+  /// see PPExpressions.cpp.
+  bool EvaluateDirectiveSubExpr(int &LHS, unsigned MinPrec,
+                                LexerToken &PeekTok, bool &StopParse);
+  
+  //===--------------------------------------------------------------------===//
+  /// Handle*Directive - implement the various preprocessor directives.  These
+  /// should side-effect the current preprocessor object so that the next call
+  /// to Lex() will return the appropriate token next.  If a fatal error occurs
+  /// return true, otherwise return false.
+  
+  bool HandleUserDiagnosticDirective(LexerToken &Result, bool isWarning);
+  
+  // File inclusion.
+  bool HandleIncludeDirective(LexerToken &Result,
+                              const DirectoryLookup *LookupFrom = 0,
+                              bool isImport = false);
+  bool HandleIncludeNextDirective(LexerToken &Result);
+  bool HandleImportDirective(LexerToken &Result);
+  
+  // Macro handling.
+  bool HandleDefineDirective(LexerToken &Result);
+  bool HandleUndefDirective(LexerToken &Result);
+  
+  // Conditional Inclusion.
+  bool HandleIfdefDirective(LexerToken &Result, bool isIfndef);
+  bool HandleIfDirective(LexerToken &Result);
+  bool HandleEndifDirective(LexerToken &Result);
+  bool HandleElseDirective(LexerToken &Result);
+  bool HandleElifDirective(LexerToken &Result);
+};
+
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Lex/Preprocessor.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Lex/Preprocessor.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision