You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and dots ('.'), can be up to 35 characters long. Letters must be lowercase.
		
		
		
		
		
			
		
			
				
					
					
						
							405 lines
						
					
					
						
							15 KiB
						
					
					
				
			
		
		
	
	
							405 lines
						
					
					
						
							15 KiB
						
					
					
				| //  Copyright (c) 2001-2011 Hartmut Kaiser | |
| //  | |
| //  Distributed under the Boost Software License, Version 1.0. (See accompanying  | |
| //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
|  | |
| #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) | |
| #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM | |
|  | |
| #if defined(_MSC_VER) | |
| #pragma once | |
| #endif | |
|  | |
| #include <boost/spirit/home/support/info.hpp> | |
| #include <boost/spirit/home/qi/skip_over.hpp> | |
| #include <boost/spirit/home/qi/parser.hpp> | |
| #include <boost/spirit/home/qi/detail/assign_to.hpp> | |
| #include <boost/spirit/home/lex/reference.hpp> | |
| #include <boost/spirit/home/lex/meta_compiler.hpp> | |
| #include <boost/spirit/home/lex/lexer_type.hpp> | |
| #include <boost/spirit/home/lex/lexer/token_def.hpp> | |
| #include <boost/assert.hpp> | |
| #include <boost/noncopyable.hpp> | |
| #include <boost/detail/iterator.hpp> | |
| #include <boost/fusion/include/vector.hpp> | |
| #include <boost/mpl/assert.hpp> | |
| #include <boost/range/iterator_range.hpp> | |
| #include <string> | |
|  | |
| namespace boost { namespace spirit { namespace lex | |
| { | |
|     /////////////////////////////////////////////////////////////////////////// | |
|     namespace detail | |
|     { | |
|         /////////////////////////////////////////////////////////////////////// | |
|         template <typename LexerDef> | |
|         struct lexer_def_ | |
|           : proto::extends< | |
|                 typename proto::terminal< | |
|                    lex::reference<lexer_def_<LexerDef> const>  | |
|                 >::type | |
|               , lexer_def_<LexerDef> > | |
|           , qi::parser<lexer_def_<LexerDef> > | |
|           , lex::lexer_type<lexer_def_<LexerDef> > | |
|         { | |
|         private: | |
|             // avoid warnings about using 'this' in constructor | |
|             lexer_def_& this_() { return *this; } | |
| 
 | |
|             typedef typename LexerDef::char_type char_type; | |
|             typedef typename LexerDef::string_type string_type; | |
|             typedef typename LexerDef::id_type id_type; | |
| 
 | |
|             typedef lex::reference<lexer_def_ const> reference_; | |
|             typedef typename proto::terminal<reference_>::type terminal_type; | |
|             typedef proto::extends<terminal_type, lexer_def_> proto_base_type; | |
| 
 | |
|             reference_ alias() const | |
|             { | |
|                 return reference_(*this); | |
|             } | |
| 
 | |
|         public: | |
|             // Qi interface: metafunction calculating parser attribute type | |
|             template <typename Context, typename Iterator> | |
|             struct attribute | |
|             { | |
|                 //  the return value of a token set contains the matched token  | |
|                 //  id, and the corresponding pair of iterators | |
|                 typedef typename Iterator::base_iterator_type iterator_type; | |
|                 typedef  | |
|                     fusion::vector2<id_type, iterator_range<iterator_type> >  | |
|                 type; | |
|             }; | |
| 
 | |
|             // Qi interface: parse functionality | |
|             template <typename Iterator, typename Context | |
|               , typename Skipper, typename Attribute> | |
|             bool parse(Iterator& first, Iterator const& last | |
|               , Context& /*context*/, Skipper const& skipper | |
|               , Attribute& attr) const | |
|             { | |
|                 qi::skip_over(first, last, skipper);   // always do a pre-skip | |
|  | |
|                 if (first != last) { | |
|                     typedef typename  | |
|                         boost::detail::iterator_traits<Iterator>::value_type  | |
|                     token_type; | |
| 
 | |
|                     token_type const& t = *first; | |
|                     if (token_is_valid(t) && t.state() == first.get_state()) { | |
|                     // any of the token definitions matched | |
|                         spirit::traits::assign_to(t, attr); | |
|                         ++first; | |
|                         return true; | |
|                     } | |
|                 } | |
|                 return false; | |
|             } | |
| 
 | |
|             // Qi interface: 'what' functionality | |
|             template <typename Context> | |
|             info what(Context& /*context*/) const | |
|             { | |
|                 return info("lexer"); | |
|             } | |
| 
 | |
|         private: | |
|             // allow to use the lexer.self.add("regex1", id1)("regex2", id2); | |
|             // syntax | |
|             struct adder | |
|             { | |
|                 adder(lexer_def_& def_)  | |
|                   : def(def_) {} | |
| 
 | |
|                 // Add a token definition based on a single character as given | |
|                 // by the first parameter, the second parameter allows to  | |
|                 // specify the token id to use for the new token. If no token | |
|                 // id is given the character code is used. | |
|                 adder const& operator()(char_type c | |
|                   , id_type token_id = id_type()) const | |
|                 { | |
|                     if (id_type() == token_id) | |
|                         token_id = static_cast<id_type>(c); | |
|                     def.def.add_token (def.state.c_str(), c, token_id | |
|                         , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
|                     return *this; | |
|                 } | |
| 
 | |
|                 // Add a token definition based on a character sequence as  | |
|                 // given by the first parameter, the second parameter allows to  | |
|                 // specify the token id to use for the new token. If no token | |
|                 // id is given this function will generate a unique id to be  | |
|                 // used as the token's id. | |
|                 adder const& operator()(string_type const& s | |
|                   , id_type token_id = id_type()) const | |
|                 { | |
|                     if (id_type() == token_id) | |
|                         token_id = def.def.get_next_id(); | |
|                     def.def.add_token (def.state.c_str(), s, token_id | |
|                         , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
|                     return *this; | |
|                 } | |
| 
 | |
|                 template <typename Attribute> | |
|                 adder const& operator()( | |
|                     token_def<Attribute, char_type, id_type>& tokdef | |
|                   , id_type token_id = id_type()) const | |
|                 { | |
|                     // make sure we have a token id | |
|                     if (id_type() == token_id) { | |
|                         if (id_type() == tokdef.id()) { | |
|                             token_id = def.def.get_next_id(); | |
|                             tokdef.id(token_id); | |
|                         } | |
|                         else { | |
|                             token_id = tokdef.id(); | |
|                         } | |
|                     } | |
|                     else {  | |
|                     // the following assertion makes sure that the token_def | |
|                     // instance has not been assigned a different id earlier | |
|                         BOOST_ASSERT(id_type() == tokdef.id()  | |
|                                   || token_id == tokdef.id()); | |
|                         tokdef.id(token_id); | |
|                     } | |
| 
 | |
|                     def.define(tokdef); | |
|                     return *this; | |
|                 } | |
| 
 | |
| //                 template <typename F> | |
| //                 adder const& operator()(char_type c, id_type token_id, F act) const | |
| //                 { | |
| //                     if (id_type() == token_id) | |
| //                         token_id = def.def.get_next_id(); | |
| //                     std::size_t unique_id =  | |
| //                         def.def.add_token (def.state.c_str(), s, token_id); | |
| //                     def.def.add_action(unique_id, def.state.c_str(), act); | |
| //                     return *this; | |
| //                 } | |
|  | |
|                 lexer_def_& def; | |
| 
 | |
|             private: | |
|                 // silence MSVC warning C4512: assignment operator could not be generated | |
|                 adder& operator= (adder const&); | |
|             }; | |
|             friend struct adder; | |
| 
 | |
|             // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); | |
|             // syntax | |
|             struct pattern_adder | |
|             { | |
|                 pattern_adder(lexer_def_& def_)  | |
|                   : def(def_) {} | |
| 
 | |
|                 pattern_adder const& operator()(string_type const& p | |
|                   , string_type const& s) const | |
|                 { | |
|                     def.def.add_pattern (def.state.c_str(), p, s); | |
|                     return *this; | |
|                 } | |
| 
 | |
|                 lexer_def_& def; | |
| 
 | |
|             private: | |
|                 // silence MSVC warning C4512: assignment operator could not be generated | |
|                 pattern_adder& operator= (pattern_adder const&); | |
|             }; | |
|             friend struct pattern_adder; | |
| 
 | |
|         private: | |
|             // Helper function to invoke the necessary 2 step compilation | |
|             // process on token definition expressions | |
|             template <typename TokenExpr> | |
|             void compile2pass(TokenExpr const& expr)  | |
|             { | |
|                 expr.collect(def, state, targetstate); | |
|                 expr.add_actions(def); | |
|             } | |
| 
 | |
|         public: | |
|             /////////////////////////////////////////////////////////////////// | |
|             template <typename Expr> | |
|             void define(Expr const& expr) | |
|             { | |
|                 compile2pass(compile<lex::domain>(expr)); | |
|             } | |
| 
 | |
|             lexer_def_(LexerDef& def_, string_type const& state_ | |
|                   , string_type const& targetstate_ = string_type()) | |
|               : proto_base_type(terminal_type::make(alias())) | |
|               , add(this_()), add_pattern(this_()), def(def_) | |
|               , state(state_), targetstate(targetstate_) | |
|             {} | |
| 
 | |
|             // allow to switch states | |
|             lexer_def_ operator()(char_type const* state) const | |
|             { | |
|                 return lexer_def_(def, state); | |
|             } | |
|             lexer_def_ operator()(char_type const* state | |
|               , char_type const* targetstate) const | |
|             { | |
|                 return lexer_def_(def, state, targetstate); | |
|             } | |
|             lexer_def_ operator()(string_type const& state | |
|               , string_type const& targetstate = string_type()) const | |
|             { | |
|                 return lexer_def_(def, state, targetstate); | |
|             } | |
| 
 | |
|             // allow to assign a token definition expression | |
|             template <typename Expr> | |
|             lexer_def_& operator= (Expr const& xpr) | |
|             { | |
|                 // Report invalid expression error as early as possible. | |
|                 // If you got an error_invalid_expression error message here, | |
|                 // then the expression (expr) is not a valid spirit lex  | |
|                 // expression. | |
|                 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
| 
 | |
|                 def.clear(state.c_str()); | |
|                 define(xpr); | |
|                 return *this; | |
|             } | |
| 
 | |
|             // explicitly tell the lexer that the given state will be defined | |
|             // (useful in conjunction with "*") | |
|             std::size_t add_state(char_type const* state = 0) | |
|             { | |
|                 return def.add_state(state ? state : def.initial_state().c_str()); | |
|             } | |
| 
 | |
|             adder add; | |
|             pattern_adder add_pattern; | |
| 
 | |
|         private: | |
|             LexerDef& def; | |
|             string_type state; | |
|             string_type targetstate; | |
| 
 | |
|         private: | |
|             // silence MSVC warning C4512: assignment operator could not be generated | |
|             lexer_def_& operator= (lexer_def_ const&); | |
|         }; | |
| 
 | |
| #if defined(BOOST_NO_RVALUE_REFERENCES) | |
|         // allow to assign a token definition expression | |
|         template <typename LexerDef, typename Expr> | |
|         inline lexer_def_<LexerDef>& | |
|         operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) | |
|         { | |
|             // Report invalid expression error as early as possible. | |
|             // If you got an error_invalid_expression error message here, | |
|             // then the expression (expr) is not a valid spirit lex  | |
|             // expression. | |
|             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
| 
 | |
|             lexdef.define(xpr); | |
|             return lexdef; | |
|         } | |
| #else | |
|         // allow to assign a token definition expression | |
|         template <typename LexerDef, typename Expr> | |
|         inline lexer_def_<LexerDef>& | |
|         operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr) | |
|         { | |
|             // Report invalid expression error as early as possible. | |
|             // If you got an error_invalid_expression error message here, | |
|             // then the expression (expr) is not a valid spirit lex  | |
|             // expression. | |
|             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
| 
 | |
|             lexdef.define(xpr); | |
|             return lexdef; | |
|         } | |
| #endif | |
|  | |
|         template <typename LexerDef, typename Expr> | |
|         inline lexer_def_<LexerDef>&  | |
|         operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) | |
|         { | |
|             // Report invalid expression error as early as possible. | |
|             // If you got an error_invalid_expression error message here, | |
|             // then the expression (expr) is not a valid spirit lex  | |
|             // expression. | |
|             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
| 
 | |
|             lexdef.define(xpr); | |
|             return lexdef; | |
|         } | |
|     } | |
| 
 | |
|     /////////////////////////////////////////////////////////////////////////// | |
|     //  The match_flags flags are used to influence different matching  | |
|     //  modes of the lexer | |
|     struct match_flags | |
|     { | |
|         enum enum_type  | |
|         { | |
|             match_default = 0,          // no flags | |
|             match_not_dot_newline = 1,  // the regex '.' doesn't match newlines | |
|             match_icase = 2             // all matching operations are case insensitive | |
|         }; | |
|     }; | |
| 
 | |
|     /////////////////////////////////////////////////////////////////////////// | |
|     //  This represents a lexer object | |
|     /////////////////////////////////////////////////////////////////////////// | |
|  | |
|     /////////////////////////////////////////////////////////////////////////// | |
|     // This is the first token id automatically assigned by the library  | |
|     // if needed | |
|     enum tokenids  | |
|     { | |
|         min_token_id = 0x10000 | |
|     }; | |
| 
 | |
|     template <typename Lexer> | |
|     class lexer : public Lexer | |
|     { | |
|     private: | |
|         // avoid warnings about using 'this' in constructor | |
|         lexer& this_() { return *this; } | |
| 
 | |
|         std::size_t next_token_id;   // has to be an integral type | |
|  | |
|     public: | |
|         typedef Lexer lexer_type; | |
|         typedef typename Lexer::id_type id_type; | |
|         typedef typename Lexer::char_type char_type; | |
|         typedef typename Lexer::iterator_type iterator_type; | |
|         typedef lexer base_type; | |
| 
 | |
|         typedef detail::lexer_def_<lexer> lexer_def; | |
|         typedef std::basic_string<char_type> string_type; | |
| 
 | |
|         lexer(unsigned int flags = match_flags::match_default | |
|             , id_type first_id = id_type(min_token_id))  | |
|           : lexer_type(flags) | |
|           , next_token_id(first_id) | |
|           , self(this_(), lexer_type::initial_state())  | |
|         {} | |
| 
 | |
|         // access iterator interface | |
|         template <typename Iterator> | |
|         iterator_type begin(Iterator& first, Iterator const& last | |
|                 , char_type const* initial_state = 0) const | |
|             { return this->lexer_type::begin(first, last, initial_state); } | |
|         iterator_type end() const  | |
|             { return this->lexer_type::end(); } | |
| 
 | |
|         std::size_t map_state(char_type const* state) | |
|             { return this->lexer_type::add_state(state); } | |
| 
 | |
|         //  create a unique token id | |
|         id_type get_next_id() { return id_type(next_token_id++); } | |
| 
 | |
|         lexer_def self;  // allow for easy token definition | |
|     }; | |
| 
 | |
| }}} | |
| 
 | |
| #endif
 | |
| 
 |