[llvm-commits] [hlvm] r38020 - in /hlvm/trunk: Makefile.hlvm hlvm/Reader/XML/Makefile hlvm/Reader/XML/XMLReader.cpp hlvm/Reader/Yaml/YamlReader.cpp utils/ utils/bin/ utils/bin/mkFuncs.pm utils/bin/mkTokenizer utils/tmplt/ utils/tmplt/Preamble_Code utils/tmplt/Tokenizer_Template.cpp utils/tmplt/Tokenizer_Template.h

Sat Jul 7 16:59:02 PDT 2007

Author: reid
Date: Sat Jul  7 18:59:01 2007
New Revision: 38020

URL: http://llvm.org/viewvc/llvm-project?rev=38020&view=rev
Log:
Add facilities for generation of a tokenizer based on an RNG XML Schema. The
tokenizer uses gperf to generate a perfect hash function that can be used to
identify element and attribute names.

Added:
    hlvm/trunk/utils/
    hlvm/trunk/utils/bin/
    hlvm/trunk/utils/bin/mkFuncs.pm   (with props)
    hlvm/trunk/utils/bin/mkTokenizer   (with props)
    hlvm/trunk/utils/tmplt/
    hlvm/trunk/utils/tmplt/Preamble_Code
    hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp
    hlvm/trunk/utils/tmplt/Tokenizer_Template.h
Modified:
    hlvm/trunk/Makefile.hlvm
    hlvm/trunk/hlvm/Reader/XML/Makefile
    hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp
    hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp

Modified: hlvm/trunk/Makefile.hlvm
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/Makefile.hlvm?rev=38020&r1=38019&r2=38020&view=diff

==============================================================================

--- hlvm/trunk/Makefile.hlvm (original)
+++ hlvm/trunk/Makefile.hlvm Sat Jul  7 18:59:01 2007
@@ -93,10 +93,14 @@
 
 preconditions: $(PROJ_OBJ_DIR)/Makefile.hlvm
 
-#$(PROJ_OBJ_DIR)/%Tokenizer.cpp $(PROJ_OBJ_DIR)/%Tokenizer.h $(PROJ_OBJ_DIR)/%TokenHash.i : $(PROJ_SRC_DIR)/%.rng $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.cpp $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.h $(HLVM_top_srcdir)/utils/bin/mkTokenizer
-#	$(Echo) Building Tokenizer For $*
-#	$(Verb) $(HLVM_top_srcdir)/utils/bin/mkTokenizer -f \
-#	  $(PROJ_SRC_DIR)/$*.rng
+$(PROJ_OBJ_DIR)/%Tokenizer.cpp $(PROJ_OBJ_DIR)/%Tokenizer.h \
+  $(PROJ_OBJ_DIR)/%TokenHash.i : $(PROJ_SRC_DIR)/%.rng \
+  $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.cpp \
+  $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.h \
+  $(HLVM_top_srcdir)/utils/bin/mkTokenizer
+	$(Echo) Building Tokenizer For $*
+	$(Verb) $(HLVM_top_srcdir)/utils/bin/mkTokenizer -f \
+	  $(PROJ_SRC_DIR)/$*.rng
 
 ifdef INSTALL_INCLUDES
 PartialPath := $(patsubst $(PROJ_SRC_ROOT)/%,%,$(PROJ_SRC_DIR))

Modified: hlvm/trunk/hlvm/Reader/XML/Makefile
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/hlvm/Reader/XML/Makefile?rev=38020&r1=38019&r2=38020&view=diff

==============================================================================
--- hlvm/trunk/hlvm/Reader/XML/Makefile (original)
+++ hlvm/trunk/hlvm/Reader/XML/Makefile Sat Jul  7 18:59:01 2007
@@ -4,10 +4,26 @@
 #
 #-------------------------------------------------------------------------------
 
-LEVEL = ../../..
-LIBRARYNAME = HLVMXMLReader
-DONT_BUILD_RELINKED := 1
-BUILD_ARCHIVE := 1
-INSTALL_INCLUDES := XMLReader.h
+LEVEL 			:= ../../..
+LIBRARYNAME 		:= HLVMXMLReader
+DONT_BUILD_RELINKED 	:= 1
+BUILD_ARCHIVE 		:= 1
+EXTRA_DIST 		:= HLVM.rng
+BUILT_SOURCES 		:= HLVMTokenizer.cpp 
+INSTALL_INCLUDES 	:= XMLReader.h
 
 include $(LEVEL)/Makefile.hlvm
+
+$(ObjDir)/HLVMTokenizer.o: \
+	$(PROJ_OBJ_DIR)/HLVMTokenizer.h \
+	$(PROJ_OBJ_DIR)/HLVMTokenHash.i \
+	$(PROJ_OBJ_DIR)/HLVMTokenizer.cpp
+
+$(PROJ_OBJ_DIR)/HLVMTokenizer.cpp $(PROJ_OBJ_DIR)/HLVMTokenizer.h \
+  $(PROJ_OBJ_DIR)/HLVMTokenHash.i : $(PROJ_SRC_DIR)/HLVM.rng \
+  $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.cpp \
+  $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.h \
+  $(HLVM_top_srcdir)/utils/bin/mkTokenizer
+	$(Echo) Building Tokenizer For $*
+	$(Verb) $(HLVM_top_srcdir)/utils/bin/mkTokenizer -f \
+	  $(PROJ_SRC_DIR)/HLVM.rng

Modified: hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp?rev=38020&r1=38019&r2=38020&view=diff

==============================================================================
--- hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp (original)
+++ hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp Sat Jul  7 18:59:01 2007
@@ -28,25 +28,400 @@
 //===----------------------------------------------------------------------===//
 
 #include <hlvm/Reader/XML/XMLReader.h>
+#include <hlvm/Base/Locator.h>
 #include <hlvm/AST/AST.h>
+#include <expat.h>
+#include <vector>
+#include <string>
 
 using namespace hlvm;
 
 namespace {
 
+/// This structure provides information about an attribute and its value.
+/// It is used during parsing of an XML document when the parser calls
+/// the Handler's ElementStart method.
+/// @brief Attribute Information Structure.
+enum AttributeTypes
+{
+  CDATA_AttrType,
+  ID_AttrType,
+  IDREF_AttrType,
+  IDREFS_AttrType,
+  NMTOKEN_AttrType,
+  NMTOKENS_AttrType,
+  ENTITY_AttrType,
+  ENTITIES_AttrType,
+  NOTATION_AttrType,
+};
+
+struct AttrInfo
+{
+  std::string uri;    ///< The namespace URI of the attribute
+  std::string local;  ///< The name of the attribute
+  std::string value;  ///< The value of the attribute
+  AttributeTypes type;///< The basic type of the attribute
+  int32_t token;      ///< The token for the attribute name
+  uint32_t ns;        ///< The token for the attribute namespace
+};
+
+struct NodeInfo : public hlvm::Locator
+{
+  std::string uri;    ///< The namespace uri of the element
+  std::string local;  ///< The local name of the element
+  int32_t token;      ///< Tokenized value of local name
+  uint32_t ns;        ///< Tokenized value of namespace name
+};
+
+/// This structure provides information about an element. It is used during
+/// parsing of an XML document when the parser calls the Handler's 
+/// ElementStart method. 
+/// @brief Element Information Structure.
+struct ElementInfo : public NodeInfo {
+  std::vector<NodeInfo> kids;   ///< Node info of child elements
+  std::vector<AttrInfo> attrs; ///< Attributes of the element
+  void find_attrs(
+    int token1, const std::string*& value1) const;
+  void find_attrs(
+    int token1, const std::string*& value1,
+    int token2, const std::string*& value2
+  ) const;
+  void find_attrs(
+    int token1, const std::string*& value1,
+    int token2, const std::string*& value2,
+    int token3, const std::string*& value3
+  ) const;
+};
+
 class XMLReaderImpl : public XMLReader {
+  llvm::sys::Path path_;
+  AST::AST* ast_;
+  XML_Parser xp_;
+  std::vector<ElementInfo> elems_; ///< The element stack
+  ElementInfo* etop_; ///< A pointer to the top of the element stack
 public:
   XMLReaderImpl(const llvm::sys::Path& path) :
-    path_(path), ast_(0) {}
-
-  virtual ~XMLReaderImpl() { if (ast_) delete ast_; }
+    path_(path), ast_(0), xp_(0), elems_(), etop_(0)
+  {
+    xp_ = XML_ParserCreate( "UTF-8");
+    // Reserve some space on the elements and attributes list so we aren't
+    // mucking around with tiny allocations. If we cross 64 elements on the
+    // stack or 64 attributes on one element, then they will double to 128. 
+    // Its unlikely that documents will reach these limits and so there 
+    // will be no reallocation after this initial reserve.
+    elems_.reserve(64);
+  }
+
+  virtual ~XMLReaderImpl() 
+  { 
+    if (ast_) delete ast_; 
+    XML_ParserFree( xp_ );
+  }
 
   virtual void read();
   virtual AST::AST* get();
 
-private: 
-  llvm::sys::Path path_;
-  AST::AST* ast_;
+/// @name Expat Parsing Handlers
+/// @{
+private:
+
+  static void XMLCALL 
+  StartElementHandler(
+    void *user_data, const XML_Char* name, const XML_Char** attributes
+  )
+  {
+    // Convert the user data to our XMLReaderImpl pointer
+    register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+    // Make a new element info on the top of the stack.
+    p->elems_.resize(p->elems_.size()+1);
+    p->etop_ = &p->elems_.back();
+    ElementInfo& ei = *(p->etop_);
+
+    // Fill in the element info
+    ei.local = name;
+    ei.token = d.tokenize(name);
+    ei.set(
+      p->source_->publicId(),
+      p->source_->systemId(),
+      uint32_t(XML_GetCurrentLineNumber(p->xp_)), 
+      uint32_t(XML_GetCurrentColumnNumber(p->xp_))
+    );
+    ei.kids.clear();
+    ei.attrs.clear();
+
+    // Handle the attributes
+    if ( attributes )
+    {
+      // Determine index of first default attribute
+      // size_t default_attr_index = XML_GetSpecifiedAttributeCount( p->xp_ );
+
+      // Process all the attributes
+      size_t curr_attr = 0;
+      while ( *attributes != 0 )
+      {
+        // Resize the attrs vector to accommodate this attribute and get
+        // a preference to that current attribute for ease of expression
+        ei.attrs.resize(curr_attr+1);
+        XPS_xml::AttrInfo& attr = ei.attrs[curr_attr];
+
+        // Handle the namespace^Dname couplet
+        const XML_Char* attr_ns_name = *attributes;
+        XML_Char* attr_name = strrchr(attr_ns_name, Namespace_Separator);
+        if (attr_name == 0)
+        {
+          attr.ns = 0;
+          attr_name = const_cast<XML_Char*>(attr_ns_name);
+        }
+        else
+        {
+          *attr_name = 0; // Terminate namespace name
+          attr_name++;    // Advance to start of attribute name
+          NSMapType::const_iterator NSI = p->nsmap_.find(attr_ns_name);
+          XPS_assert(NSI != p->nsmap_.end());
+          attr.ns = NSI->second;
+        }
+
+        // Get the token for the
+        attr.local = attr_name;
+        attr.token = d.tokenize(attr_name);
+        attr.value = attributes[1];
+
+        // Increment loop counters
+        attributes +=2;
+        curr_attr++;
+      }
+    }
+
+    // Tell the handler about the element
+    p->handler_->ElementStart(ei);
+  }
+
+  static void XMLCALL 
+  EndElementHandler( void *user_data, const XML_Char *name)
+  {
+    // Get the parser
+    register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+    // Get the current position
+    int line = XML_GetCurrentLineNumber( p->xp_ );
+    int column = XML_GetCurrentColumnNumber( p->xp_ );
+
+    // Find the separator that separates the namespace name from the local name
+    const XML_Char* ns_name = name;
+    int ns = 0;
+    XML_Char* local_name = strrchr(name, Namespace_Separator);
+
+    // If we didn't find the separator, then there's no namespace. This means
+    // that we've returned to the anonymous namespace so accommodate that now.
+    if (local_name == 0) {
+      local_name = const_cast<XML_Char*>(name);
+    }
+
+    // Otherwise we have a namespace and we need to decouple the couplet
+    else
+    {
+      *local_name = 0; // terminate namespace
+      local_name++;    // get start of local name
+
+      // Lookup the namespace by prefix. Its a hard error to not find the 
+      // namespace name in the map because the Namespace start handler should
+      // have already placed it there.
+      NSMapType::const_iterator NSI = p->nsmap_.find(ns_name);
+      XPS_assert(NSI != p->nsmap_.end());
+      ns = NSI->second;
+    }
+
+    // Get the dialect
+    const Dialect& d = p->find_dialect(ns);
+
+    // Convert the element name to a token
+    int name_token = d.tokenize(local_name);
+
+    // Save the previous token before poping it and make sure that it is the
+    // same as the one the parser told us we're popping.
+    int32_t token = p->elems_.back().token;
+    XPS_assert(token == name_token);
+
+    // Tell the handler that we're ending an element.
+    p->handler_->ElementEnd( p->elems_.back(), line, column );
+
+    // Pop the element token and then push it on the "kids" list of the 
+    // parent element indicating that we've completed parsing one child element.
+    NodeInfo ki = static_cast<NodeInfo&>(p->elems_.back());
+    p->elems_.pop_back();
+    if (!p->elems_.empty())
+    {
+      p->etop_ = & p->elems_.back();
+      p->etop_->kids.push_back(ki);
+    }
+
+    // Fix the string we modified
+    if (ns != 0)
+      *(--local_name) = Namespace_Separator;
+  }
+
+  static void XMLCALL 
+  CharacterDataHandler( void *user_data, const XML_Char *s, int len)
+  {
+    // Get the parser
+    register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+    // Tell the handler about the characters
+    std::string tmp;
+    tmp.assign(s,len);
+    p->handler_->Characters(tmp);
+  }
+
+  static void XMLCALL 
+  ProcessingInstructionHandler(
+    void *user_data, const XML_Char *target, const XML_Char *data)
+  {
+    // Get the parser
+    register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+    // Tell the handler about the processing instruction
+    p->handler_->ProcessingInstruction(target,data);
+  }
+
+  static void XMLCALL 
+  CommentHandler( void *user_data, const XML_Char *data)
+  {
+    // Get the parser
+    register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+    // Comments are always valid
+    p->handler_->Comment(data);
+  }
+
+  static void XMLCALL 
+  StartCdataSectionHandler(void *user_data)
+  {
+    // Get the parser
+    register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+    // Put the CData Section on the element stack
+    ElementInfo ei;
+    ei.ns = 0;
+    ei.local = "CDATA";
+    ei.token = CDATASectionToken;
+    ei.setLine( XML_GetCurrentLineNumber( p->xp_ ));
+    ei.setColumn( XML_GetCurrentColumnNumber( p->xp_ ));
+    ei.kids.clear();
+    p->elems_.push_back(ei);
+
+    // Inform the handler of the CData Section
+    p->handler_->CDataSectionStart();
+  }
+
+  static void XMLCALL 
+  EndCdataSectionHandler(void *user_data)
+  {
+    // Get the parser
+    register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+    // validate that the top of stack is a CDataSection
+    XPS_assert(p->etop_->token == CDATASectionToken);
+
+    // Pop the CData off the stack
+    NodeInfo ki = static_cast<NodeInfo&>(p->elems_.back());
+    p->elems_.pop_back();
+    p->etop_ = & p->elems_.back();
+    p->etop_->kids.push_back(ki);
+
+    // Inform the handler (always valid)
+    p->handler_->CDataSectionEnd();
+  }
+
+  static void XMLCALL 
+  DefaultHandler(
+    void *user_data, const XML_Char *s, int len)
+  {
+    // static_cast<XMLReaderImpl*>(user_data)->handler_->Other(s,len);
+  }
+
+  static void XMLCALL 
+  StartDoctypeDeclHandler(
+    void * /*user_data*/, 
+    const XML_Char * /*doctypeName*/, 
+    const XML_Char * /*sysid*/, 
+    const XML_Char * /*pubid*/, 
+    int /*has_internal_subset*/)
+  {
+    // FIXME: Implement
+  }
+
+  static void XMLCALL 
+  EndDoctypeDeclHandler(void * /*user_data*/)
+  {
+    // FIXME: Implement
+  }
+
+  static void XMLCALL 
+  EntityDeclHandler( 
+    void * /*user_data*/, 
+    const XML_Char * /*entityName*/, 
+    int /*is_parameter_entity*/, 
+    const XML_Char * /*value*/,
+    int /*value_length*/, 
+    const XML_Char * /*base*/, 
+    const XML_Char * /*systemId*/, 
+    const XML_Char * /*publicId*/, 
+    const XML_Char * /*notationName*/)
+  {
+    // FIXME: Implement
+  }
+
+  static void XMLCALL 
+  NotationDeclHandler( 
+    void * /*user_data*/,
+    const XML_Char * /*notationName*/, 
+    const XML_Char * /*base*/, 
+    const XML_Char * /*systemId*/, 
+    const XML_Char * /*publicId*/)
+  {
+    // FIXME: Implement
+  }
+
+  static int XMLCALL 
+  NotStandaloneHandler(void * /*user_data*/ )
+  {
+    // FIXME: Implement
+    return XML_STATUS_ERROR;
+  }
+
+  static int XMLCALL 
+  ExternalEntityRefHandler( 
+    XML_Parser /*parser*/,
+    const XML_Char * /*context*/, 
+    const XML_Char * /*base*/, 
+    const XML_Char * /*systemId*/,
+    const XML_Char * /*publicId*/)
+  {
+    // FIXME: Implement
+    return XML_STATUS_ERROR;
+  }
+
+  static void XMLCALL 
+  SkippedEntityHandler( 
+    void * /*user_data*/, 
+    const XML_Char * /*entityName*/, 
+    int /*is_parameter_entity*/)
+  {
+    // FIXME: Implement
+  }
+
+  static int XMLCALL 
+  UnknownEncodingHandler( 
+    void * /*encodingHandlerData*/,
+    const XML_Char * /*name*/, 
+    XML_Encoding * /*info*/)
+  {
+    // FIXME: Implement
+    return XML_STATUS_ERROR;
+  }
+
+/// @}
 };
 
 AST::AST*
@@ -55,9 +430,27 @@
   return ast_;
 }
 
+static const XML_Char Namespace_Separator = 4; 
+
 void
 XMLReaderImpl::read() {
   ast_ = new AST::AST();
+
+  // Set up the parser for parsing a document.
+  XML_ParserReset(xp_,"UTF-8");
+  XML_SetUserData(xp_, this );
+  XML_SetElementHandler(xp_, &StartElementHandler, &EndElementHandler );
+  XML_SetNamespaceDeclHandler(xp_, StartNamespaceDeclHandler, 
+    EndNamespaceDeclHandler );
+  XML_SetCharacterDataHandler( xp_, CharacterDataHandler );
+  XML_SetProcessingInstructionHandler(xp_, ProcessingInstructionHandler );
+  XML_SetCommentHandler( xp_, CommentHandler );
+  XML_SetCdataSectionHandler( xp_, StartCdataSectionHandler, 
+    EndCdataSectionHandler );
+  XML_SetNotStandaloneHandler( xp_, NotStandaloneHandler );
+  XML_SetExternalEntityRefHandler( xp_, ExternalEntityRefHandler);
+  XML_SetSkippedEntityHandler( xp_, SkippedEntityHandler);
+  XML_SetUnknownEncodingHandler( xp_, UnknownEncodingHandler, this);
 }
 
 }

Modified: hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp?rev=38020&r1=38019&r2=38020&view=diff

==============================================================================
--- hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp (original)
+++ hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp Sat Jul  7 18:59:01 2007
@@ -29,6 +29,8 @@
 
 class YamlReaderImpl : public hlvm::YamlReader {
   public:
+    typedef std::map<SYMID,hlvm::AST::Node> NodeIDMap;
+  public:
     YamlReaderImpl() {
       parser_ = syck_new_parser();
       syck_parser_handler(parser_, SyckNodeHandler(NodeHandler));
@@ -37,8 +39,20 @@
         SyckBadAnchorHandler(BadAnchorHandler));
     }
 
-    static SYMID NodeHandler(SyckParser*, SyckNode* )
+    static SYMID NodeHandler(SyckParser*p, SyckNode* n)
     {
+      switch (n->kind) {
+        case syck_str_kind: // Scalar
+          break;
+        case syck_seq_kind: // Array
+          break;
+        case syck_map_kind: // Map
+          break;
+        default:
+          // Unknown kind? 
+          // FIXME: Should we generate an error here?
+          break;/
+      }
       return 0;
     }
 

Added: hlvm/trunk/utils/bin/mkFuncs.pm
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/bin/mkFuncs.pm?rev=38020&view=auto

==============================================================================
--- hlvm/trunk/utils/bin/mkFuncs.pm (added)
+++ hlvm/trunk/utils/bin/mkFuncs.pm Sat Jul  7 18:59:01 2007
@@ -0,0 +1,101 @@
+#!/usr/bin/perl
+
+sub get_hlvm_dir 
+{
+  chomp(my $cwd=`pwd`);
+  my $hlvmdir = $cwd;
+  $hlvmdir =~ s|(.*hlvm).*|$1|;
+  if (-d"$hlvmdir/AST") {
+    $hlvmdir =~ s|(.*)/hlvm|$1|;
+  }
+  return $hlvmdir;
+}
+
+sub process_file
+{
+  my $preamble = shift(@_);
+  my $input  = shift(@_);
+  my $output = shift(@_);
+  my $line   = "";
+  my $hlvmdir = get_hlvm_dir();
+
+  chomp($MODULE_PATH = `pwd`);
+  $MODULE_PATH =~ s|$hlvmdir\/hlvm\/(.*)|$1|;
+  $MODULE = $MODULE_PATH;
+  $MODULE =~ s|\/|_|g;
+
+  ($sec,$min,$hour,$mday,$mon,$year) = localtime(time);
+
+  local $YEAR = $year + 1900;
+  local $DATE = sprintf("%4d/%02d/%02d", $YEAR, $mon + 1, $mday );
+  local $TIME = sprintf("%02d:%02d:%02d", $hour, $min, $sec);
+
+  local $AUTHOR = $ENV{XPS_AUTHOR};
+  if ( length($AUTHOR) < 5 )
+  {
+      $AUTHOR = $ENV{AUTHOR};
+      if (length($AUTHOR) < 5)
+      {
+          $AUTHOR = getpwuid($<);
+          if (length($AUTHOR) < 1 )
+          {
+              $AUTHOR = $ENV{USER};
+              if (length($AUTHOR) < 1)
+              {
+                  $AUTHOR = $ENV{LOGNAME};
+                  if (length($AUTHOR) < 1)
+                  {
+                      $AUTHOR = "Author Unknown";
+                  }
+              }
+          }
+      }
+  }
+
+  local $NAMESPACE 		= "HLVM_$MODULE";
+  local $module_header 	= ucfirst($MODULE);
+  local $MODULE_INCLUDE = "<hlvm/$MODULE/${module_header}.h>";
+  local $CLASS_INCLUDE	= "<hlvm/$MODULE/${CLASS}.h>";
+  local $NAMESPACE_UC 	= uc( $NAMESPACE );
+  local $CLASS_UC 	= uc( $CLASS );
+  local $HEADER_UC   	= uc( $HEADER );
+
+  open ( OUT,"> $output" ) || die ("Couldn't open $output for writing\n");
+
+  for $infile ( $preamble , $input )
+  {
+      open ( IN, "< $infile" ) || die ("Couldn't open $infile for reading\n");
+
+      while ( defined($line = <IN>) )
+      {
+          $line =~ s/\%ID\%/\$Id\$/g;
+          $line =~ s/\%LOG\%/\$Log\$/g;
+          $line =~ s/\%AUTHOR%/$AUTHOR/g;
+          $line =~ s/\%USER%/$USER/g;
+          $line =~ s/\%DATE%/$DATE/g;
+          $line =~ s/\%TIME%/$TIME/g;
+          $line =~ s/\%YEAR%/$YEAR/g;
+          $line =~ s/\%MODULE%/$MODULE/g;
+          $line =~ s/\%NAMESPACE\%/$NAMESPACE/g;
+          $line =~ s/\%NAMESPACE_UC\%/$NAMESPACE_UC/g;
+          $line =~ s/\%CLASS\%/$CLASS/g;
+          $line =~ s/\%CLASS_UC\%/$CLASS_UC/g;
+          $line =~ s/\%MODULE_PATH\%/$MODULE_PATH/g;
+          $line =~ s/\%MODULE\%/$MODULE/g;
+          $line =~ s/\%HEADER\%/$HEADER/g;
+          $line =~ s/\%HEADER_UC\%/$HEADER_UC/g;
+          $line =~ s/\%STYLE\%/$STYLE/g;
+          $line =~ s/\%CLASS_INCLUDE\%/$CLASS_INCLUDE/g;
+          $line =~ s/\%MODULE_INCLUDE\%/$MODULE_INCLUDE/g;
+          $line =~ s/\%TOKEN_LIST\%/$TOKEN_LIST/g;
+          $line =~ s/\%SCHEMA_NAME\%/$SCHEMA_NAME/g;
+
+          print OUT $line || die ("Couldn't write to OUT file\n");
+      }
+      close IN;
+  };
+
+  close OUT;
+}
+
+1;

Propchange: hlvm/trunk/utils/bin/mkFuncs.pm

------------------------------------------------------------------------------
    svn:executable = *

Added: hlvm/trunk/utils/bin/mkTokenizer
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/bin/mkTokenizer?rev=38020&view=auto

==============================================================================
--- hlvm/trunk/utils/bin/mkTokenizer (added)
+++ hlvm/trunk/utils/bin/mkTokenizer Sat Jul  7 18:59:01 2007
@@ -0,0 +1,133 @@
+#!/usr/bin/perl
+#
+# Copyright (C) 2002 eXtensible Systems, Inc. All Rights Reserved
+#
+# This program is open source software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (GPL) as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version. You should have received a copy of the GPL in a
+# file named COPYING that was included with this program; if not, you can 
+# obtain a copy of the license through the Internet at http://www.fsf.org/
+# 
+# This program is distributed in the hope that it will be useful, but 
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+# for more details.
+#-------------------------------------------------------------------------------
+#
+# This script will extract the identifier names from a RelaxNG schema. The
+# identifier names form the token set that act as terminals in the grammar. We
+# make parsing efficient by generating a perfect hash function with gperf from
+# the set of token identifiers.
+#
+# Identifier names are X in the following schema constructs:
+#    <element name="X">
+#    <attribute name="X">
+#    <value>X</value>
+#
+# Usage: 
+#   mkTokenizer -f <schema_file> <hlvm_src_root>
+#
+$SchemaFile = shift;
+if ("$SchemaFile" eq "-f") {
+  $Force = 1;
+  $SchemaFile = shift;
+}
+die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$SchemaFile";
+$HLVM_root = shift;
+die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$HLVM_root";
+print "$HLVM_root\n";
+
+$Schema = $SchemaFile;
+$Schema =~ s/\.rng//;
+$Schema = substr($Schema,rindex($Schema,'/')+1);
+$PreambleFile = "$HLVM_root"."/utils/tmplt/Preamble_Code";
+$HeaderTemplate = "$HLVM_root"."/utils/tmplt/Tokenizer_Template.h";
+$HeaderFile = "$Schema"."Tokenizer.h";
+$SourceTemplate = "$HLVM_root"."/utils/tmplt/Tokenizer_Template.cpp";     
+$SourceFile = "$Schema"."Tokenizer.cpp";
+$TokenHashClass = "$Schema" . "TokenHash";
+$TokenHashFile = "$TokenHashClass" . ".i";
+
+# Check the validity of the files we use/create
+die "Invalid schema file name" if ! -e "$SchemaFile";
+die "ERROR: '$PreambleFile' doesn't exist" if ! -e "$PreambleFile";
+if (!$Force) {
+  die "ERROR: '$HeaderFile' exists" if -e "$HeaderFile";
+  die "ERROR: '$SourceFile' exists" if -e "$SourceFile";
+  die "ERROR: '$TokenHashFile' exists" if -e "$TokenHashFile";
+}
+
+# Get the plain old schema name from the file name
+
+use FindBin;
+use lib $FindBin::Bin;
+use mkFuncs;
+use File::Copy;
+
+sub sortUnique
+{
+  my @list = @_;
+  my %set;
+  my @result = ();
+  for ($i = 0; $i <= $#_; $i++) 
+  {
+    if (!exists $set{$list[$i]} )
+    {
+      $set{$_[$i]} = 1;
+      push @result, $_[$i];
+    }
+  }
+  return sort(@result);
+}
+
+sub getTokens
+{
+  my $fname = shift(@_);
+  my $stage = 0;
+  my @tokens;
+
+  open( SCHEMA, "< $fname") || die "Couldn't open $fname for reading.\n";
+
+  while ( defined($line = <SCHEMA>) )
+  {
+    while ($line =~ /<element[^>]*name="([^"]*)"/g) {
+      push @tokens,$1;
+    }
+    while ($line =~ /<attribute[^>]*name="([^"]*)"/g) {
+      push @tokens,$1;
+    }
+    while ($line =~ /<value>\s*([^<\s]*)/g) {
+      push @tokens,$1;
+    }
+  }
+  close SCHEMA;
+
+  return sortUnique(@tokens);
+}
+
+# Extract the terminal tokens from the schema file
+my @tokens = getTokens($SchemaFile);
+
+# Set up a gperf invocation to convert the token list into a perfect hash 
+# function
+open(GPERF,"| gperf -tcDCIoGl --fast 0 -L C++ -Z $TokenHashClass -s 2 -S 1 -k '*' > $TokenHashFile");
+
+# Run the input through GPERF to create the perfect hash function
+$hlvmdir = get_hlvm_dir();
+chomp($Module = `pwd`);
+$Module =~ s|$hlvmdir\/hlvm\/(.*)|$1|;
+$Module =~ s|\/|_|g;
+print GPERF "struct TokenMap {\n";
+print GPERF "const char *name; HLVM_$Module"."::".$Schema."Tokens token;\n" ;
+print GPERF "};\n%%\n" ;
+print GPERF "\"$_\", HLVM_".$Module."::TKN_".$_.",\n" foreach @tokens;
+print GPERF "%%\n";
+close GPERF;
+
+# Generate the header file for the tokenizer, starting it with the preamble for
+# C++ source files
+$TOKEN_LIST = "TKN_" . join(",\n    TKN_", at tokens) . ",";
+$SCHEMA_NAME = $Schema;
+process_file($PreambleFile,$HeaderTemplate,$HeaderFile);
+process_file($PreambleFile,$SourceTemplate,$SourceFile);

Propchange: hlvm/trunk/utils/bin/mkTokenizer

------------------------------------------------------------------------------
    svn:executable = *

Added: hlvm/trunk/utils/tmplt/Preamble_Code
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/tmplt/Preamble_Code?rev=38020&view=auto

==============================================================================
--- hlvm/trunk/utils/tmplt/Preamble_Code (added)
+++ hlvm/trunk/utils/tmplt/Preamble_Code Sat Jul  7 18:59:01 2007
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+//                      High Level Virtual Machine (HLVM)
+//
+// Copyright (C) 2006 Reid Spencer. All Rights Reserved.
+//
+// This software is free software; you can redistribute it and/or modify it 
+// under the terms of the GNU Lesser General Public License as published by 
+// the Free Software Foundation; either version 2.1 of the License, or (at 
+// your option) any later version.
+//
+// This software is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for 
+// more details.
+//
+// You should have received a copy of the GNU Lesser General Public License 
+// along with this library in the file named LICENSE.txt; if not, write to the 
+// Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
+// MA 02110-1301 USA
+//

Added: hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp?rev=38020&view=auto

==============================================================================
--- hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp (added)
+++ hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp Sat Jul  7 18:59:01 2007
@@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////////////
+/// @file hlvm/%MODULE_PATH%/%SCHEMA_NAME%Tokenizer.cpp
+/// @author %AUTHOR%
+/// @date %DATE%
+/// @brief Implements the functions of class %SCHEMA_NAME%Tokenizer.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <hlvm/%MODULE_PATH%/%SCHEMA_NAME%Tokenizer.h>
+#include <hlvm/%MODULE_PATH%/%SCHEMA_NAME%TokenHash.i>
+
+namespace HLVM_%MODULE% {
+
+int
+%SCHEMA_NAME%Tokenizer::recognize( const char * xml_str )
+{
+  const char* str = reinterpret_cast<const char*>( xml_str );
+  const struct TokenMap *token_map = 
+    %SCHEMA_NAME%TokenHash::in_word_set( str, strlen(str) );
+  if (token_map)
+  {
+      return int(token_map->token);
+  }
+  return int(TKN_NONE);
+}
+
+const char *
+%SCHEMA_NAME%Tokenizer::lookup( int tkn )
+{
+  for (unsigned int i = 0 ; i < sizeof(wordlist)/sizeof(wordlist[0]); i++)
+  {
+    if (tkn == wordlist[i].token) return wordlist[i].name;
+  }
+  return "";
+}
+
+}

Added: hlvm/trunk/utils/tmplt/Tokenizer_Template.h
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/tmplt/Tokenizer_Template.h?rev=38020&view=auto

==============================================================================
--- hlvm/trunk/utils/tmplt/Tokenizer_Template.h (added)
+++ hlvm/trunk/utils/tmplt/Tokenizer_Template.h Sat Jul  7 18:59:01 2007
@@ -0,0 +1,46 @@
+///////////////////////////////////////////////////////////////////////////////
+/// @file hlvm/%MODULE_PATH%/%SCHEMA_NAME%Tokenizer.h
+/// @author %AUTHOR%
+/// @date %DATE%
+/// @brief Declares the HLVM_%MODULE%::%SCHEMA_NAME%Tokenizer class.
+///
+/// This file is autogenerated by the mkTokenizer.pl script.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef HLVM_%MODULE%_%SCHEMA_NAME%TOKENIZER_H
+#define HLVM_%MODULE%_%SCHEMA_NAME%TOKENIZER_H
+
+namespace HLVM_%MODULE%
+{
+  /// @brief The list of tokens for the $SCHEMA schema.
+  enum %SCHEMA_NAME%Tokens
+  {
+    TKN_ERROR = -1,
+    TKN_NONE  = 0,
+    %TOKEN_LIST%
+    TKN_COUNT
+  };
+  /// @brief Efficient token recognizer (perfect hash function) for the 
+  /// %SCHEMA_NAME% schema
+  class %SCHEMA_NAME%Tokenizer
+  {
+  /// @name Methods
+  /// @{
+  public:
+    /// This function uses a fast perfect hash algorithm to convert the provided
+    /// string into a numeric integer token. The set of strings supported are
+    /// all the element, attribute and value names of the 
+    /// %SCHEMA_NAME% Schema. 
+    /// @param str The string to convert to a numeric token
+    /// @return Returns an enumerated token value.
+    /// @brief Convert a string token to an enumeration token, if possible.
+    static int recognize( const char * str );
+
+    /// @brief Lookup the name of a token by its value.
+    static const char * lookup( int tkn );
+
+  /// @}
+  };
+}
+
+#endif