[llvm-commits] [hlvm] r38020 - in /hlvm/trunk: Makefile.hlvm hlvm/Reader/XML/Makefile hlvm/Reader/XML/XMLReader.cpp hlvm/Reader/Yaml/YamlReader.cpp utils/ utils/bin/ utils/bin/mkFuncs.pm utils/bin/mkTokenizer utils/tmplt/ utils/tmplt/Preamble_Code utils/tmplt/Tokenizer_Template.cpp utils/tmplt/Tokenizer_Template.h
Reid Spencer
reid at x10sys.com
Sat Jul 7 16:59:02 PDT 2007
Author: reid
Date: Sat Jul 7 18:59:01 2007
New Revision: 38020
URL: http://llvm.org/viewvc/llvm-project?rev=38020&view=rev
Log:
Add facilities for generation of a tokenizer based on an RNG XML Schema. The
tokenizer uses gperf to generate a perfect hash function that can be used to
identify element and attribute names.
Added:
hlvm/trunk/utils/
hlvm/trunk/utils/bin/
hlvm/trunk/utils/bin/mkFuncs.pm (with props)
hlvm/trunk/utils/bin/mkTokenizer (with props)
hlvm/trunk/utils/tmplt/
hlvm/trunk/utils/tmplt/Preamble_Code
hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp
hlvm/trunk/utils/tmplt/Tokenizer_Template.h
Modified:
hlvm/trunk/Makefile.hlvm
hlvm/trunk/hlvm/Reader/XML/Makefile
hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp
hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp
Modified: hlvm/trunk/Makefile.hlvm
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/Makefile.hlvm?rev=38020&r1=38019&r2=38020&view=diff
==============================================================================
--- hlvm/trunk/Makefile.hlvm (original)
+++ hlvm/trunk/Makefile.hlvm Sat Jul 7 18:59:01 2007
@@ -93,10 +93,14 @@
preconditions: $(PROJ_OBJ_DIR)/Makefile.hlvm
-#$(PROJ_OBJ_DIR)/%Tokenizer.cpp $(PROJ_OBJ_DIR)/%Tokenizer.h $(PROJ_OBJ_DIR)/%TokenHash.i : $(PROJ_SRC_DIR)/%.rng $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.cpp $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.h $(HLVM_top_srcdir)/utils/bin/mkTokenizer
-# $(Echo) Building Tokenizer For $*
-# $(Verb) $(HLVM_top_srcdir)/utils/bin/mkTokenizer -f \
-# $(PROJ_SRC_DIR)/$*.rng
+$(PROJ_OBJ_DIR)/%Tokenizer.cpp $(PROJ_OBJ_DIR)/%Tokenizer.h \
+ $(PROJ_OBJ_DIR)/%TokenHash.i : $(PROJ_SRC_DIR)/%.rng \
+ $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.cpp \
+ $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.h \
+ $(HLVM_top_srcdir)/utils/bin/mkTokenizer
+ $(Echo) Building Tokenizer For $*
+ $(Verb) $(HLVM_top_srcdir)/utils/bin/mkTokenizer -f \
+ $(PROJ_SRC_DIR)/$*.rng
ifdef INSTALL_INCLUDES
PartialPath := $(patsubst $(PROJ_SRC_ROOT)/%,%,$(PROJ_SRC_DIR))
Modified: hlvm/trunk/hlvm/Reader/XML/Makefile
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/hlvm/Reader/XML/Makefile?rev=38020&r1=38019&r2=38020&view=diff
==============================================================================
--- hlvm/trunk/hlvm/Reader/XML/Makefile (original)
+++ hlvm/trunk/hlvm/Reader/XML/Makefile Sat Jul 7 18:59:01 2007
@@ -4,10 +4,26 @@
#
#-------------------------------------------------------------------------------
-LEVEL = ../../..
-LIBRARYNAME = HLVMXMLReader
-DONT_BUILD_RELINKED := 1
-BUILD_ARCHIVE := 1
-INSTALL_INCLUDES := XMLReader.h
+LEVEL := ../../..
+LIBRARYNAME := HLVMXMLReader
+DONT_BUILD_RELINKED := 1
+BUILD_ARCHIVE := 1
+EXTRA_DIST := HLVM.rng
+BUILT_SOURCES := HLVMTokenizer.cpp
+INSTALL_INCLUDES := XMLReader.h
include $(LEVEL)/Makefile.hlvm
+
+$(ObjDir)/HLVMTokenizer.o: \
+ $(PROJ_OBJ_DIR)/HLVMTokenizer.h \
+ $(PROJ_OBJ_DIR)/HLVMTokenHash.i \
+ $(PROJ_OBJ_DIR)/HLVMTokenizer.cpp
+
+$(PROJ_OBJ_DIR)/HLVMTokenizer.cpp $(PROJ_OBJ_DIR)/HLVMTokenizer.h \
+ $(PROJ_OBJ_DIR)/HLVMTokenHash.i : $(PROJ_SRC_DIR)/HLVM.rng \
+ $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.cpp \
+ $(HLVM_top_srcdir)/utils/tmplt/Tokenizer_Template.h \
+ $(HLVM_top_srcdir)/utils/bin/mkTokenizer
+ $(Echo) Building Tokenizer For $*
+ $(Verb) $(HLVM_top_srcdir)/utils/bin/mkTokenizer -f \
+ $(PROJ_SRC_DIR)/HLVM.rng
Modified: hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp?rev=38020&r1=38019&r2=38020&view=diff
==============================================================================
--- hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp (original)
+++ hlvm/trunk/hlvm/Reader/XML/XMLReader.cpp Sat Jul 7 18:59:01 2007
@@ -28,25 +28,400 @@
//===----------------------------------------------------------------------===//
#include <hlvm/Reader/XML/XMLReader.h>
+#include <hlvm/Base/Locator.h>
#include <hlvm/AST/AST.h>
+#include <expat.h>
+#include <vector>
+#include <string>
using namespace hlvm;
namespace {
+/// This structure provides information about an attribute and its value.
+/// It is used during parsing of an XML document when the parser calls
+/// the Handler's ElementStart method.
+/// @brief Attribute Information Structure.
+enum AttributeTypes
+{
+ CDATA_AttrType,
+ ID_AttrType,
+ IDREF_AttrType,
+ IDREFS_AttrType,
+ NMTOKEN_AttrType,
+ NMTOKENS_AttrType,
+ ENTITY_AttrType,
+ ENTITIES_AttrType,
+ NOTATION_AttrType,
+};
+
+struct AttrInfo
+{
+ std::string uri; ///< The namespace URI of the attribute
+ std::string local; ///< The name of the attribute
+ std::string value; ///< The value of the attribute
+ AttributeTypes type;///< The basic type of the attribute
+ int32_t token; ///< The token for the attribute name
+ uint32_t ns; ///< The token for the attribute namespace
+};
+
+struct NodeInfo : public hlvm::Locator
+{
+ std::string uri; ///< The namespace uri of the element
+ std::string local; ///< The local name of the element
+ int32_t token; ///< Tokenized value of local name
+ uint32_t ns; ///< Tokenized value of namespace name
+};
+
+/// This structure provides information about an element. It is used during
+/// parsing of an XML document when the parser calls the Handler's
+/// ElementStart method.
+/// @brief Element Information Structure.
+struct ElementInfo : public NodeInfo {
+ std::vector<NodeInfo> kids; ///< Node info of child elements
+ std::vector<AttrInfo> attrs; ///< Attributes of the element
+ void find_attrs(
+ int token1, const std::string*& value1) const;
+ void find_attrs(
+ int token1, const std::string*& value1,
+ int token2, const std::string*& value2
+ ) const;
+ void find_attrs(
+ int token1, const std::string*& value1,
+ int token2, const std::string*& value2,
+ int token3, const std::string*& value3
+ ) const;
+};
+
class XMLReaderImpl : public XMLReader {
+ llvm::sys::Path path_;
+ AST::AST* ast_;
+ XML_Parser xp_;
+ std::vector<ElementInfo> elems_; ///< The element stack
+ ElementInfo* etop_; ///< A pointer to the top of the element stack
public:
XMLReaderImpl(const llvm::sys::Path& path) :
- path_(path), ast_(0) {}
-
- virtual ~XMLReaderImpl() { if (ast_) delete ast_; }
+ path_(path), ast_(0), xp_(0), elems_(), etop_(0)
+ {
+ xp_ = XML_ParserCreate( "UTF-8");
+ // Reserve some space on the elements and attributes list so we aren't
+ // mucking around with tiny allocations. If we cross 64 elements on the
+ // stack or 64 attributes on one element, then they will double to 128.
+ // Its unlikely that documents will reach these limits and so there
+ // will be no reallocation after this initial reserve.
+ elems_.reserve(64);
+ }
+
+ virtual ~XMLReaderImpl()
+ {
+ if (ast_) delete ast_;
+ XML_ParserFree( xp_ );
+ }
virtual void read();
virtual AST::AST* get();
-private:
- llvm::sys::Path path_;
- AST::AST* ast_;
+/// @name Expat Parsing Handlers
+/// @{
+private:
+
+ static void XMLCALL
+ StartElementHandler(
+ void *user_data, const XML_Char* name, const XML_Char** attributes
+ )
+ {
+ // Convert the user data to our XMLReaderImpl pointer
+ register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+ // Make a new element info on the top of the stack.
+ p->elems_.resize(p->elems_.size()+1);
+ p->etop_ = &p->elems_.back();
+ ElementInfo& ei = *(p->etop_);
+
+ // Fill in the element info
+ ei.local = name;
+ ei.token = d.tokenize(name);
+ ei.set(
+ p->source_->publicId(),
+ p->source_->systemId(),
+ uint32_t(XML_GetCurrentLineNumber(p->xp_)),
+ uint32_t(XML_GetCurrentColumnNumber(p->xp_))
+ );
+ ei.kids.clear();
+ ei.attrs.clear();
+
+ // Handle the attributes
+ if ( attributes )
+ {
+ // Determine index of first default attribute
+ // size_t default_attr_index = XML_GetSpecifiedAttributeCount( p->xp_ );
+
+ // Process all the attributes
+ size_t curr_attr = 0;
+ while ( *attributes != 0 )
+ {
+ // Resize the attrs vector to accommodate this attribute and get
+ // a preference to that current attribute for ease of expression
+ ei.attrs.resize(curr_attr+1);
+ XPS_xml::AttrInfo& attr = ei.attrs[curr_attr];
+
+ // Handle the namespace^Dname couplet
+ const XML_Char* attr_ns_name = *attributes;
+ XML_Char* attr_name = strrchr(attr_ns_name, Namespace_Separator);
+ if (attr_name == 0)
+ {
+ attr.ns = 0;
+ attr_name = const_cast<XML_Char*>(attr_ns_name);
+ }
+ else
+ {
+ *attr_name = 0; // Terminate namespace name
+ attr_name++; // Advance to start of attribute name
+ NSMapType::const_iterator NSI = p->nsmap_.find(attr_ns_name);
+ XPS_assert(NSI != p->nsmap_.end());
+ attr.ns = NSI->second;
+ }
+
+ // Get the token for the
+ attr.local = attr_name;
+ attr.token = d.tokenize(attr_name);
+ attr.value = attributes[1];
+
+ // Increment loop counters
+ attributes +=2;
+ curr_attr++;
+ }
+ }
+
+ // Tell the handler about the element
+ p->handler_->ElementStart(ei);
+ }
+
+ static void XMLCALL
+ EndElementHandler( void *user_data, const XML_Char *name)
+ {
+ // Get the parser
+ register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+ // Get the current position
+ int line = XML_GetCurrentLineNumber( p->xp_ );
+ int column = XML_GetCurrentColumnNumber( p->xp_ );
+
+ // Find the separator that separates the namespace name from the local name
+ const XML_Char* ns_name = name;
+ int ns = 0;
+ XML_Char* local_name = strrchr(name, Namespace_Separator);
+
+ // If we didn't find the separator, then there's no namespace. This means
+ // that we've returned to the anonymous namespace so accommodate that now.
+ if (local_name == 0) {
+ local_name = const_cast<XML_Char*>(name);
+ }
+
+ // Otherwise we have a namespace and we need to decouple the couplet
+ else
+ {
+ *local_name = 0; // terminate namespace
+ local_name++; // get start of local name
+
+ // Lookup the namespace by prefix. Its a hard error to not find the
+ // namespace name in the map because the Namespace start handler should
+ // have already placed it there.
+ NSMapType::const_iterator NSI = p->nsmap_.find(ns_name);
+ XPS_assert(NSI != p->nsmap_.end());
+ ns = NSI->second;
+ }
+
+ // Get the dialect
+ const Dialect& d = p->find_dialect(ns);
+
+ // Convert the element name to a token
+ int name_token = d.tokenize(local_name);
+
+ // Save the previous token before poping it and make sure that it is the
+ // same as the one the parser told us we're popping.
+ int32_t token = p->elems_.back().token;
+ XPS_assert(token == name_token);
+
+ // Tell the handler that we're ending an element.
+ p->handler_->ElementEnd( p->elems_.back(), line, column );
+
+ // Pop the element token and then push it on the "kids" list of the
+ // parent element indicating that we've completed parsing one child element.
+ NodeInfo ki = static_cast<NodeInfo&>(p->elems_.back());
+ p->elems_.pop_back();
+ if (!p->elems_.empty())
+ {
+ p->etop_ = & p->elems_.back();
+ p->etop_->kids.push_back(ki);
+ }
+
+ // Fix the string we modified
+ if (ns != 0)
+ *(--local_name) = Namespace_Separator;
+ }
+
+ static void XMLCALL
+ CharacterDataHandler( void *user_data, const XML_Char *s, int len)
+ {
+ // Get the parser
+ register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+ // Tell the handler about the characters
+ std::string tmp;
+ tmp.assign(s,len);
+ p->handler_->Characters(tmp);
+ }
+
+ static void XMLCALL
+ ProcessingInstructionHandler(
+ void *user_data, const XML_Char *target, const XML_Char *data)
+ {
+ // Get the parser
+ register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+ // Tell the handler about the processing instruction
+ p->handler_->ProcessingInstruction(target,data);
+ }
+
+ static void XMLCALL
+ CommentHandler( void *user_data, const XML_Char *data)
+ {
+ // Get the parser
+ register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+ // Comments are always valid
+ p->handler_->Comment(data);
+ }
+
+ static void XMLCALL
+ StartCdataSectionHandler(void *user_data)
+ {
+ // Get the parser
+ register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+ // Put the CData Section on the element stack
+ ElementInfo ei;
+ ei.ns = 0;
+ ei.local = "CDATA";
+ ei.token = CDATASectionToken;
+ ei.setLine( XML_GetCurrentLineNumber( p->xp_ ));
+ ei.setColumn( XML_GetCurrentColumnNumber( p->xp_ ));
+ ei.kids.clear();
+ p->elems_.push_back(ei);
+
+ // Inform the handler of the CData Section
+ p->handler_->CDataSectionStart();
+ }
+
+ static void XMLCALL
+ EndCdataSectionHandler(void *user_data)
+ {
+ // Get the parser
+ register XMLReaderImpl* p = reinterpret_cast<XMLReaderImpl*>(user_data);
+
+ // validate that the top of stack is a CDataSection
+ XPS_assert(p->etop_->token == CDATASectionToken);
+
+ // Pop the CData off the stack
+ NodeInfo ki = static_cast<NodeInfo&>(p->elems_.back());
+ p->elems_.pop_back();
+ p->etop_ = & p->elems_.back();
+ p->etop_->kids.push_back(ki);
+
+ // Inform the handler (always valid)
+ p->handler_->CDataSectionEnd();
+ }
+
+ static void XMLCALL
+ DefaultHandler(
+ void *user_data, const XML_Char *s, int len)
+ {
+ // static_cast<XMLReaderImpl*>(user_data)->handler_->Other(s,len);
+ }
+
+ static void XMLCALL
+ StartDoctypeDeclHandler(
+ void * /*user_data*/,
+ const XML_Char * /*doctypeName*/,
+ const XML_Char * /*sysid*/,
+ const XML_Char * /*pubid*/,
+ int /*has_internal_subset*/)
+ {
+ // FIXME: Implement
+ }
+
+ static void XMLCALL
+ EndDoctypeDeclHandler(void * /*user_data*/)
+ {
+ // FIXME: Implement
+ }
+
+ static void XMLCALL
+ EntityDeclHandler(
+ void * /*user_data*/,
+ const XML_Char * /*entityName*/,
+ int /*is_parameter_entity*/,
+ const XML_Char * /*value*/,
+ int /*value_length*/,
+ const XML_Char * /*base*/,
+ const XML_Char * /*systemId*/,
+ const XML_Char * /*publicId*/,
+ const XML_Char * /*notationName*/)
+ {
+ // FIXME: Implement
+ }
+
+ static void XMLCALL
+ NotationDeclHandler(
+ void * /*user_data*/,
+ const XML_Char * /*notationName*/,
+ const XML_Char * /*base*/,
+ const XML_Char * /*systemId*/,
+ const XML_Char * /*publicId*/)
+ {
+ // FIXME: Implement
+ }
+
+ static int XMLCALL
+ NotStandaloneHandler(void * /*user_data*/ )
+ {
+ // FIXME: Implement
+ return XML_STATUS_ERROR;
+ }
+
+ static int XMLCALL
+ ExternalEntityRefHandler(
+ XML_Parser /*parser*/,
+ const XML_Char * /*context*/,
+ const XML_Char * /*base*/,
+ const XML_Char * /*systemId*/,
+ const XML_Char * /*publicId*/)
+ {
+ // FIXME: Implement
+ return XML_STATUS_ERROR;
+ }
+
+ static void XMLCALL
+ SkippedEntityHandler(
+ void * /*user_data*/,
+ const XML_Char * /*entityName*/,
+ int /*is_parameter_entity*/)
+ {
+ // FIXME: Implement
+ }
+
+ static int XMLCALL
+ UnknownEncodingHandler(
+ void * /*encodingHandlerData*/,
+ const XML_Char * /*name*/,
+ XML_Encoding * /*info*/)
+ {
+ // FIXME: Implement
+ return XML_STATUS_ERROR;
+ }
+
+/// @}
};
AST::AST*
@@ -55,9 +430,27 @@
return ast_;
}
+static const XML_Char Namespace_Separator = 4;
+
void
XMLReaderImpl::read() {
ast_ = new AST::AST();
+
+ // Set up the parser for parsing a document.
+ XML_ParserReset(xp_,"UTF-8");
+ XML_SetUserData(xp_, this );
+ XML_SetElementHandler(xp_, &StartElementHandler, &EndElementHandler );
+ XML_SetNamespaceDeclHandler(xp_, StartNamespaceDeclHandler,
+ EndNamespaceDeclHandler );
+ XML_SetCharacterDataHandler( xp_, CharacterDataHandler );
+ XML_SetProcessingInstructionHandler(xp_, ProcessingInstructionHandler );
+ XML_SetCommentHandler( xp_, CommentHandler );
+ XML_SetCdataSectionHandler( xp_, StartCdataSectionHandler,
+ EndCdataSectionHandler );
+ XML_SetNotStandaloneHandler( xp_, NotStandaloneHandler );
+ XML_SetExternalEntityRefHandler( xp_, ExternalEntityRefHandler);
+ XML_SetSkippedEntityHandler( xp_, SkippedEntityHandler);
+ XML_SetUnknownEncodingHandler( xp_, UnknownEncodingHandler, this);
}
}
Modified: hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp?rev=38020&r1=38019&r2=38020&view=diff
==============================================================================
--- hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp (original)
+++ hlvm/trunk/hlvm/Reader/Yaml/YamlReader.cpp Sat Jul 7 18:59:01 2007
@@ -29,6 +29,8 @@
class YamlReaderImpl : public hlvm::YamlReader {
public:
+ typedef std::map<SYMID,hlvm::AST::Node> NodeIDMap;
+ public:
YamlReaderImpl() {
parser_ = syck_new_parser();
syck_parser_handler(parser_, SyckNodeHandler(NodeHandler));
@@ -37,8 +39,20 @@
SyckBadAnchorHandler(BadAnchorHandler));
}
- static SYMID NodeHandler(SyckParser*, SyckNode* )
+ static SYMID NodeHandler(SyckParser*p, SyckNode* n)
{
+ switch (n->kind) {
+ case syck_str_kind: // Scalar
+ break;
+ case syck_seq_kind: // Array
+ break;
+ case syck_map_kind: // Map
+ break;
+ default:
+ // Unknown kind?
+ // FIXME: Should we generate an error here?
+ break;/
+ }
return 0;
}
Added: hlvm/trunk/utils/bin/mkFuncs.pm
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/bin/mkFuncs.pm?rev=38020&view=auto
==============================================================================
--- hlvm/trunk/utils/bin/mkFuncs.pm (added)
+++ hlvm/trunk/utils/bin/mkFuncs.pm Sat Jul 7 18:59:01 2007
@@ -0,0 +1,101 @@
+#!/usr/bin/perl
+
+sub get_hlvm_dir
+{
+ chomp(my $cwd=`pwd`);
+ my $hlvmdir = $cwd;
+ $hlvmdir =~ s|(.*hlvm).*|$1|;
+ if (-d"$hlvmdir/AST") {
+ $hlvmdir =~ s|(.*)/hlvm|$1|;
+ }
+ return $hlvmdir;
+}
+
+sub process_file
+{
+ my $preamble = shift(@_);
+ my $input = shift(@_);
+ my $output = shift(@_);
+ my $line = "";
+ my $hlvmdir = get_hlvm_dir();
+
+ chomp($MODULE_PATH = `pwd`);
+ $MODULE_PATH =~ s|$hlvmdir\/hlvm\/(.*)|$1|;
+ $MODULE = $MODULE_PATH;
+ $MODULE =~ s|\/|_|g;
+
+ ($sec,$min,$hour,$mday,$mon,$year) = localtime(time);
+
+ local $YEAR = $year + 1900;
+ local $DATE = sprintf("%4d/%02d/%02d", $YEAR, $mon + 1, $mday );
+ local $TIME = sprintf("%02d:%02d:%02d", $hour, $min, $sec);
+
+ local $AUTHOR = $ENV{XPS_AUTHOR};
+ if ( length($AUTHOR) < 5 )
+ {
+ $AUTHOR = $ENV{AUTHOR};
+ if (length($AUTHOR) < 5)
+ {
+ $AUTHOR = getpwuid($<);
+ if (length($AUTHOR) < 1 )
+ {
+ $AUTHOR = $ENV{USER};
+ if (length($AUTHOR) < 1)
+ {
+ $AUTHOR = $ENV{LOGNAME};
+ if (length($AUTHOR) < 1)
+ {
+ $AUTHOR = "Author Unknown";
+ }
+ }
+ }
+ }
+ }
+
+ local $NAMESPACE = "HLVM_$MODULE";
+ local $module_header = ucfirst($MODULE);
+ local $MODULE_INCLUDE = "<hlvm/$MODULE/${module_header}.h>";
+ local $CLASS_INCLUDE = "<hlvm/$MODULE/${CLASS}.h>";
+ local $NAMESPACE_UC = uc( $NAMESPACE );
+ local $CLASS_UC = uc( $CLASS );
+ local $HEADER_UC = uc( $HEADER );
+
+ open ( OUT,"> $output" ) || die ("Couldn't open $output for writing\n");
+
+ for $infile ( $preamble , $input )
+ {
+ open ( IN, "< $infile" ) || die ("Couldn't open $infile for reading\n");
+
+ while ( defined($line = <IN>) )
+ {
+ $line =~ s/\%ID\%/\$Id\$/g;
+ $line =~ s/\%LOG\%/\$Log\$/g;
+ $line =~ s/\%AUTHOR%/$AUTHOR/g;
+ $line =~ s/\%USER%/$USER/g;
+ $line =~ s/\%DATE%/$DATE/g;
+ $line =~ s/\%TIME%/$TIME/g;
+ $line =~ s/\%YEAR%/$YEAR/g;
+ $line =~ s/\%MODULE%/$MODULE/g;
+ $line =~ s/\%NAMESPACE\%/$NAMESPACE/g;
+ $line =~ s/\%NAMESPACE_UC\%/$NAMESPACE_UC/g;
+ $line =~ s/\%CLASS\%/$CLASS/g;
+ $line =~ s/\%CLASS_UC\%/$CLASS_UC/g;
+ $line =~ s/\%MODULE_PATH\%/$MODULE_PATH/g;
+ $line =~ s/\%MODULE\%/$MODULE/g;
+ $line =~ s/\%HEADER\%/$HEADER/g;
+ $line =~ s/\%HEADER_UC\%/$HEADER_UC/g;
+ $line =~ s/\%STYLE\%/$STYLE/g;
+ $line =~ s/\%CLASS_INCLUDE\%/$CLASS_INCLUDE/g;
+ $line =~ s/\%MODULE_INCLUDE\%/$MODULE_INCLUDE/g;
+ $line =~ s/\%TOKEN_LIST\%/$TOKEN_LIST/g;
+ $line =~ s/\%SCHEMA_NAME\%/$SCHEMA_NAME/g;
+
+ print OUT $line || die ("Couldn't write to OUT file\n");
+ }
+ close IN;
+ };
+
+ close OUT;
+}
+
+1;
Propchange: hlvm/trunk/utils/bin/mkFuncs.pm
------------------------------------------------------------------------------
svn:executable = *
Added: hlvm/trunk/utils/bin/mkTokenizer
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/bin/mkTokenizer?rev=38020&view=auto
==============================================================================
--- hlvm/trunk/utils/bin/mkTokenizer (added)
+++ hlvm/trunk/utils/bin/mkTokenizer Sat Jul 7 18:59:01 2007
@@ -0,0 +1,133 @@
+#!/usr/bin/perl
+#
+# Copyright (C) 2002 eXtensible Systems, Inc. All Rights Reserved
+#
+# This program is open source software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (GPL) as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version. You should have received a copy of the GPL in a
+# file named COPYING that was included with this program; if not, you can
+# obtain a copy of the license through the Internet at http://www.fsf.org/
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#-------------------------------------------------------------------------------
+#
+# This script will extract the identifier names from a RelaxNG schema. The
+# identifier names form the token set that act as terminals in the grammar. We
+# make parsing efficient by generating a perfect hash function with gperf from
+# the set of token identifiers.
+#
+# Identifier names are X in the following schema constructs:
+# <element name="X">
+# <attribute name="X">
+# <value>X</value>
+#
+# Usage:
+# mkTokenizer -f <schema_file> <hlvm_src_root>
+#
+$SchemaFile = shift;
+if ("$SchemaFile" eq "-f") {
+ $Force = 1;
+ $SchemaFile = shift;
+}
+die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$SchemaFile";
+$HLVM_root = shift;
+die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$HLVM_root";
+print "$HLVM_root\n";
+
+$Schema = $SchemaFile;
+$Schema =~ s/\.rng//;
+$Schema = substr($Schema,rindex($Schema,'/')+1);
+$PreambleFile = "$HLVM_root"."/utils/tmplt/Preamble_Code";
+$HeaderTemplate = "$HLVM_root"."/utils/tmplt/Tokenizer_Template.h";
+$HeaderFile = "$Schema"."Tokenizer.h";
+$SourceTemplate = "$HLVM_root"."/utils/tmplt/Tokenizer_Template.cpp";
+$SourceFile = "$Schema"."Tokenizer.cpp";
+$TokenHashClass = "$Schema" . "TokenHash";
+$TokenHashFile = "$TokenHashClass" . ".i";
+
+# Check the validity of the files we use/create
+die "Invalid schema file name" if ! -e "$SchemaFile";
+die "ERROR: '$PreambleFile' doesn't exist" if ! -e "$PreambleFile";
+if (!$Force) {
+ die "ERROR: '$HeaderFile' exists" if -e "$HeaderFile";
+ die "ERROR: '$SourceFile' exists" if -e "$SourceFile";
+ die "ERROR: '$TokenHashFile' exists" if -e "$TokenHashFile";
+}
+
+# Get the plain old schema name from the file name
+
+use FindBin;
+use lib $FindBin::Bin;
+use mkFuncs;
+use File::Copy;
+
+sub sortUnique
+{
+ my @list = @_;
+ my %set;
+ my @result = ();
+ for ($i = 0; $i <= $#_; $i++)
+ {
+ if (!exists $set{$list[$i]} )
+ {
+ $set{$_[$i]} = 1;
+ push @result, $_[$i];
+ }
+ }
+ return sort(@result);
+}
+
+sub getTokens
+{
+ my $fname = shift(@_);
+ my $stage = 0;
+ my @tokens;
+
+ open( SCHEMA, "< $fname") || die "Couldn't open $fname for reading.\n";
+
+ while ( defined($line = <SCHEMA>) )
+ {
+ while ($line =~ /<element[^>]*name="([^"]*)"/g) {
+ push @tokens,$1;
+ }
+ while ($line =~ /<attribute[^>]*name="([^"]*)"/g) {
+ push @tokens,$1;
+ }
+ while ($line =~ /<value>\s*([^<\s]*)/g) {
+ push @tokens,$1;
+ }
+ }
+ close SCHEMA;
+
+ return sortUnique(@tokens);
+}
+
+# Extract the terminal tokens from the schema file
+my @tokens = getTokens($SchemaFile);
+
+# Set up a gperf invocation to convert the token list into a perfect hash
+# function
+open(GPERF,"| gperf -tcDCIoGl --fast 0 -L C++ -Z $TokenHashClass -s 2 -S 1 -k '*' > $TokenHashFile");
+
+# Run the input through GPERF to create the perfect hash function
+$hlvmdir = get_hlvm_dir();
+chomp($Module = `pwd`);
+$Module =~ s|$hlvmdir\/hlvm\/(.*)|$1|;
+$Module =~ s|\/|_|g;
+print GPERF "struct TokenMap {\n";
+print GPERF "const char *name; HLVM_$Module"."::".$Schema."Tokens token;\n" ;
+print GPERF "};\n%%\n" ;
+print GPERF "\"$_\", HLVM_".$Module."::TKN_".$_.",\n" foreach @tokens;
+print GPERF "%%\n";
+close GPERF;
+
+# Generate the header file for the tokenizer, starting it with the preamble for
+# C++ source files
+$TOKEN_LIST = "TKN_" . join(",\n TKN_", at tokens) . ",";
+$SCHEMA_NAME = $Schema;
+process_file($PreambleFile,$HeaderTemplate,$HeaderFile);
+process_file($PreambleFile,$SourceTemplate,$SourceFile);
Propchange: hlvm/trunk/utils/bin/mkTokenizer
------------------------------------------------------------------------------
svn:executable = *
Added: hlvm/trunk/utils/tmplt/Preamble_Code
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/tmplt/Preamble_Code?rev=38020&view=auto
==============================================================================
--- hlvm/trunk/utils/tmplt/Preamble_Code (added)
+++ hlvm/trunk/utils/tmplt/Preamble_Code Sat Jul 7 18:59:01 2007
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// High Level Virtual Machine (HLVM)
+//
+// Copyright (C) 2006 Reid Spencer. All Rights Reserved.
+//
+// This software is free software; you can redistribute it and/or modify it
+// under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation; either version 2.1 of the License, or (at
+// your option) any later version.
+//
+// This software is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+// more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library in the file named LICENSE.txt; if not, write to the
+// Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+// MA 02110-1301 USA
+//
Added: hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp?rev=38020&view=auto
==============================================================================
--- hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp (added)
+++ hlvm/trunk/utils/tmplt/Tokenizer_Template.cpp Sat Jul 7 18:59:01 2007
@@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////////////
+/// @file hlvm/%MODULE_PATH%/%SCHEMA_NAME%Tokenizer.cpp
+/// @author %AUTHOR%
+/// @date %DATE%
+/// @brief Implements the functions of class %SCHEMA_NAME%Tokenizer.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <hlvm/%MODULE_PATH%/%SCHEMA_NAME%Tokenizer.h>
+#include <hlvm/%MODULE_PATH%/%SCHEMA_NAME%TokenHash.i>
+
+namespace HLVM_%MODULE% {
+
+int
+%SCHEMA_NAME%Tokenizer::recognize( const char * xml_str )
+{
+ const char* str = reinterpret_cast<const char*>( xml_str );
+ const struct TokenMap *token_map =
+ %SCHEMA_NAME%TokenHash::in_word_set( str, strlen(str) );
+ if (token_map)
+ {
+ return int(token_map->token);
+ }
+ return int(TKN_NONE);
+}
+
+const char *
+%SCHEMA_NAME%Tokenizer::lookup( int tkn )
+{
+ for (unsigned int i = 0 ; i < sizeof(wordlist)/sizeof(wordlist[0]); i++)
+ {
+ if (tkn == wordlist[i].token) return wordlist[i].name;
+ }
+ return "";
+}
+
+}
Added: hlvm/trunk/utils/tmplt/Tokenizer_Template.h
URL: http://llvm.org/viewvc/llvm-project/hlvm/trunk/utils/tmplt/Tokenizer_Template.h?rev=38020&view=auto
==============================================================================
--- hlvm/trunk/utils/tmplt/Tokenizer_Template.h (added)
+++ hlvm/trunk/utils/tmplt/Tokenizer_Template.h Sat Jul 7 18:59:01 2007
@@ -0,0 +1,46 @@
+///////////////////////////////////////////////////////////////////////////////
+/// @file hlvm/%MODULE_PATH%/%SCHEMA_NAME%Tokenizer.h
+/// @author %AUTHOR%
+/// @date %DATE%
+/// @brief Declares the HLVM_%MODULE%::%SCHEMA_NAME%Tokenizer class.
+///
+/// This file is autogenerated by the mkTokenizer.pl script.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef HLVM_%MODULE%_%SCHEMA_NAME%TOKENIZER_H
+#define HLVM_%MODULE%_%SCHEMA_NAME%TOKENIZER_H
+
+namespace HLVM_%MODULE%
+{
+ /// @brief The list of tokens for the $SCHEMA schema.
+ enum %SCHEMA_NAME%Tokens
+ {
+ TKN_ERROR = -1,
+ TKN_NONE = 0,
+ %TOKEN_LIST%
+ TKN_COUNT
+ };
+ /// @brief Efficient token recognizer (perfect hash function) for the
+ /// %SCHEMA_NAME% schema
+ class %SCHEMA_NAME%Tokenizer
+ {
+ /// @name Methods
+ /// @{
+ public:
+ /// This function uses a fast perfect hash algorithm to convert the provided
+ /// string into a numeric integer token. The set of strings supported are
+ /// all the element, attribute and value names of the
+ /// %SCHEMA_NAME% Schema.
+ /// @param str The string to convert to a numeric token
+ /// @return Returns an enumerated token value.
+ /// @brief Convert a string token to an enumeration token, if possible.
+ static int recognize( const char * str );
+
+ /// @brief Lookup the name of a token by its value.
+ static const char * lookup( int tkn );
+
+ /// @}
+ };
+}
+
+#endif
More information about the llvm-commits
mailing list