You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and dots ('.'), can be up to 35 characters long. Letters must be lowercase.
405 lines
15 KiB
405 lines
15 KiB
// Copyright (c) 2001-2011 Hartmut Kaiser |
|
// |
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying |
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|
|
|
#if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) |
|
#define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM |
|
|
|
#if defined(_MSC_VER) |
|
#pragma once |
|
#endif |
|
|
|
#include <boost/spirit/home/support/info.hpp> |
|
#include <boost/spirit/home/qi/skip_over.hpp> |
|
#include <boost/spirit/home/qi/parser.hpp> |
|
#include <boost/spirit/home/qi/detail/assign_to.hpp> |
|
#include <boost/spirit/home/lex/reference.hpp> |
|
#include <boost/spirit/home/lex/meta_compiler.hpp> |
|
#include <boost/spirit/home/lex/lexer_type.hpp> |
|
#include <boost/spirit/home/lex/lexer/token_def.hpp> |
|
#include <boost/assert.hpp> |
|
#include <boost/noncopyable.hpp> |
|
#include <boost/detail/iterator.hpp> |
|
#include <boost/fusion/include/vector.hpp> |
|
#include <boost/mpl/assert.hpp> |
|
#include <boost/range/iterator_range.hpp> |
|
#include <string> |
|
|
|
namespace boost { namespace spirit { namespace lex |
|
{ |
|
/////////////////////////////////////////////////////////////////////////// |
|
namespace detail |
|
{ |
|
/////////////////////////////////////////////////////////////////////// |
|
template <typename LexerDef> |
|
struct lexer_def_ |
|
: proto::extends< |
|
typename proto::terminal< |
|
lex::reference<lexer_def_<LexerDef> const> |
|
>::type |
|
, lexer_def_<LexerDef> > |
|
, qi::parser<lexer_def_<LexerDef> > |
|
, lex::lexer_type<lexer_def_<LexerDef> > |
|
{ |
|
private: |
|
// avoid warnings about using 'this' in constructor |
|
lexer_def_& this_() { return *this; } |
|
|
|
typedef typename LexerDef::char_type char_type; |
|
typedef typename LexerDef::string_type string_type; |
|
typedef typename LexerDef::id_type id_type; |
|
|
|
typedef lex::reference<lexer_def_ const> reference_; |
|
typedef typename proto::terminal<reference_>::type terminal_type; |
|
typedef proto::extends<terminal_type, lexer_def_> proto_base_type; |
|
|
|
reference_ alias() const |
|
{ |
|
return reference_(*this); |
|
} |
|
|
|
public: |
|
// Qi interface: metafunction calculating parser attribute type |
|
template <typename Context, typename Iterator> |
|
struct attribute |
|
{ |
|
// the return value of a token set contains the matched token |
|
// id, and the corresponding pair of iterators |
|
typedef typename Iterator::base_iterator_type iterator_type; |
|
typedef |
|
fusion::vector2<id_type, iterator_range<iterator_type> > |
|
type; |
|
}; |
|
|
|
// Qi interface: parse functionality |
|
template <typename Iterator, typename Context |
|
, typename Skipper, typename Attribute> |
|
bool parse(Iterator& first, Iterator const& last |
|
, Context& /*context*/, Skipper const& skipper |
|
, Attribute& attr) const |
|
{ |
|
qi::skip_over(first, last, skipper); // always do a pre-skip |
|
|
|
if (first != last) { |
|
typedef typename |
|
boost::detail::iterator_traits<Iterator>::value_type |
|
token_type; |
|
|
|
token_type const& t = *first; |
|
if (token_is_valid(t) && t.state() == first.get_state()) { |
|
// any of the token definitions matched |
|
spirit::traits::assign_to(t, attr); |
|
++first; |
|
return true; |
|
} |
|
} |
|
return false; |
|
} |
|
|
|
// Qi interface: 'what' functionality |
|
template <typename Context> |
|
info what(Context& /*context*/) const |
|
{ |
|
return info("lexer"); |
|
} |
|
|
|
private: |
|
// allow to use the lexer.self.add("regex1", id1)("regex2", id2); |
|
// syntax |
|
struct adder |
|
{ |
|
adder(lexer_def_& def_) |
|
: def(def_) {} |
|
|
|
// Add a token definition based on a single character as given |
|
// by the first parameter, the second parameter allows to |
|
// specify the token id to use for the new token. If no token |
|
// id is given the character code is used. |
|
adder const& operator()(char_type c |
|
, id_type token_id = id_type()) const |
|
{ |
|
if (id_type() == token_id) |
|
token_id = static_cast<id_type>(c); |
|
def.def.add_token (def.state.c_str(), c, token_id |
|
, def.targetstate.empty() ? 0 : def.targetstate.c_str()); |
|
return *this; |
|
} |
|
|
|
// Add a token definition based on a character sequence as |
|
// given by the first parameter, the second parameter allows to |
|
// specify the token id to use for the new token. If no token |
|
// id is given this function will generate a unique id to be |
|
// used as the token's id. |
|
adder const& operator()(string_type const& s |
|
, id_type token_id = id_type()) const |
|
{ |
|
if (id_type() == token_id) |
|
token_id = def.def.get_next_id(); |
|
def.def.add_token (def.state.c_str(), s, token_id |
|
, def.targetstate.empty() ? 0 : def.targetstate.c_str()); |
|
return *this; |
|
} |
|
|
|
template <typename Attribute> |
|
adder const& operator()( |
|
token_def<Attribute, char_type, id_type>& tokdef |
|
, id_type token_id = id_type()) const |
|
{ |
|
// make sure we have a token id |
|
if (id_type() == token_id) { |
|
if (id_type() == tokdef.id()) { |
|
token_id = def.def.get_next_id(); |
|
tokdef.id(token_id); |
|
} |
|
else { |
|
token_id = tokdef.id(); |
|
} |
|
} |
|
else { |
|
// the following assertion makes sure that the token_def |
|
// instance has not been assigned a different id earlier |
|
BOOST_ASSERT(id_type() == tokdef.id() |
|
|| token_id == tokdef.id()); |
|
tokdef.id(token_id); |
|
} |
|
|
|
def.define(tokdef); |
|
return *this; |
|
} |
|
|
|
// template <typename F> |
|
// adder const& operator()(char_type c, id_type token_id, F act) const |
|
// { |
|
// if (id_type() == token_id) |
|
// token_id = def.def.get_next_id(); |
|
// std::size_t unique_id = |
|
// def.def.add_token (def.state.c_str(), s, token_id); |
|
// def.def.add_action(unique_id, def.state.c_str(), act); |
|
// return *this; |
|
// } |
|
|
|
lexer_def_& def; |
|
|
|
private: |
|
// silence MSVC warning C4512: assignment operator could not be generated |
|
adder& operator= (adder const&); |
|
}; |
|
friend struct adder; |
|
|
|
// allow to use lexer.self.add_pattern("pattern1", "regex1")(...); |
|
// syntax |
|
struct pattern_adder |
|
{ |
|
pattern_adder(lexer_def_& def_) |
|
: def(def_) {} |
|
|
|
pattern_adder const& operator()(string_type const& p |
|
, string_type const& s) const |
|
{ |
|
def.def.add_pattern (def.state.c_str(), p, s); |
|
return *this; |
|
} |
|
|
|
lexer_def_& def; |
|
|
|
private: |
|
// silence MSVC warning C4512: assignment operator could not be generated |
|
pattern_adder& operator= (pattern_adder const&); |
|
}; |
|
friend struct pattern_adder; |
|
|
|
private: |
|
// Helper function to invoke the necessary 2 step compilation |
|
// process on token definition expressions |
|
template <typename TokenExpr> |
|
void compile2pass(TokenExpr const& expr) |
|
{ |
|
expr.collect(def, state, targetstate); |
|
expr.add_actions(def); |
|
} |
|
|
|
public: |
|
/////////////////////////////////////////////////////////////////// |
|
template <typename Expr> |
|
void define(Expr const& expr) |
|
{ |
|
compile2pass(compile<lex::domain>(expr)); |
|
} |
|
|
|
lexer_def_(LexerDef& def_, string_type const& state_ |
|
, string_type const& targetstate_ = string_type()) |
|
: proto_base_type(terminal_type::make(alias())) |
|
, add(this_()), add_pattern(this_()), def(def_) |
|
, state(state_), targetstate(targetstate_) |
|
{} |
|
|
|
// allow to switch states |
|
lexer_def_ operator()(char_type const* state) const |
|
{ |
|
return lexer_def_(def, state); |
|
} |
|
lexer_def_ operator()(char_type const* state |
|
, char_type const* targetstate) const |
|
{ |
|
return lexer_def_(def, state, targetstate); |
|
} |
|
lexer_def_ operator()(string_type const& state |
|
, string_type const& targetstate = string_type()) const |
|
{ |
|
return lexer_def_(def, state, targetstate); |
|
} |
|
|
|
// allow to assign a token definition expression |
|
template <typename Expr> |
|
lexer_def_& operator= (Expr const& xpr) |
|
{ |
|
// Report invalid expression error as early as possible. |
|
// If you got an error_invalid_expression error message here, |
|
// then the expression (expr) is not a valid spirit lex |
|
// expression. |
|
BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); |
|
|
|
def.clear(state.c_str()); |
|
define(xpr); |
|
return *this; |
|
} |
|
|
|
// explicitly tell the lexer that the given state will be defined |
|
// (useful in conjunction with "*") |
|
std::size_t add_state(char_type const* state = 0) |
|
{ |
|
return def.add_state(state ? state : def.initial_state().c_str()); |
|
} |
|
|
|
adder add; |
|
pattern_adder add_pattern; |
|
|
|
private: |
|
LexerDef& def; |
|
string_type state; |
|
string_type targetstate; |
|
|
|
private: |
|
// silence MSVC warning C4512: assignment operator could not be generated |
|
lexer_def_& operator= (lexer_def_ const&); |
|
}; |
|
|
|
#if defined(BOOST_NO_RVALUE_REFERENCES) |
|
// allow to assign a token definition expression |
|
template <typename LexerDef, typename Expr> |
|
inline lexer_def_<LexerDef>& |
|
operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) |
|
{ |
|
// Report invalid expression error as early as possible. |
|
// If you got an error_invalid_expression error message here, |
|
// then the expression (expr) is not a valid spirit lex |
|
// expression. |
|
BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); |
|
|
|
lexdef.define(xpr); |
|
return lexdef; |
|
} |
|
#else |
|
// allow to assign a token definition expression |
|
template <typename LexerDef, typename Expr> |
|
inline lexer_def_<LexerDef>& |
|
operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr) |
|
{ |
|
// Report invalid expression error as early as possible. |
|
// If you got an error_invalid_expression error message here, |
|
// then the expression (expr) is not a valid spirit lex |
|
// expression. |
|
BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); |
|
|
|
lexdef.define(xpr); |
|
return lexdef; |
|
} |
|
#endif |
|
|
|
template <typename LexerDef, typename Expr> |
|
inline lexer_def_<LexerDef>& |
|
operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) |
|
{ |
|
// Report invalid expression error as early as possible. |
|
// If you got an error_invalid_expression error message here, |
|
// then the expression (expr) is not a valid spirit lex |
|
// expression. |
|
BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); |
|
|
|
lexdef.define(xpr); |
|
return lexdef; |
|
} |
|
} |
|
|
|
/////////////////////////////////////////////////////////////////////////// |
|
// The match_flags flags are used to influence different matching |
|
// modes of the lexer |
|
struct match_flags |
|
{ |
|
enum enum_type |
|
{ |
|
match_default = 0, // no flags |
|
match_not_dot_newline = 1, // the regex '.' doesn't match newlines |
|
match_icase = 2 // all matching operations are case insensitive |
|
}; |
|
}; |
|
|
|
/////////////////////////////////////////////////////////////////////////// |
|
// This represents a lexer object |
|
/////////////////////////////////////////////////////////////////////////// |
|
|
|
/////////////////////////////////////////////////////////////////////////// |
|
// This is the first token id automatically assigned by the library |
|
// if needed |
|
enum tokenids |
|
{ |
|
min_token_id = 0x10000 |
|
}; |
|
|
|
template <typename Lexer> |
|
class lexer : public Lexer |
|
{ |
|
private: |
|
// avoid warnings about using 'this' in constructor |
|
lexer& this_() { return *this; } |
|
|
|
std::size_t next_token_id; // has to be an integral type |
|
|
|
public: |
|
typedef Lexer lexer_type; |
|
typedef typename Lexer::id_type id_type; |
|
typedef typename Lexer::char_type char_type; |
|
typedef typename Lexer::iterator_type iterator_type; |
|
typedef lexer base_type; |
|
|
|
typedef detail::lexer_def_<lexer> lexer_def; |
|
typedef std::basic_string<char_type> string_type; |
|
|
|
lexer(unsigned int flags = match_flags::match_default |
|
, id_type first_id = id_type(min_token_id)) |
|
: lexer_type(flags) |
|
, next_token_id(first_id) |
|
, self(this_(), lexer_type::initial_state()) |
|
{} |
|
|
|
// access iterator interface |
|
template <typename Iterator> |
|
iterator_type begin(Iterator& first, Iterator const& last |
|
, char_type const* initial_state = 0) const |
|
{ return this->lexer_type::begin(first, last, initial_state); } |
|
iterator_type end() const |
|
{ return this->lexer_type::end(); } |
|
|
|
std::size_t map_state(char_type const* state) |
|
{ return this->lexer_type::add_state(state); } |
|
|
|
// create a unique token id |
|
id_type get_next_id() { return id_type(next_token_id++); } |
|
|
|
lexer_def self; // allow for easy token definition |
|
}; |
|
|
|
}}} |
|
|
|
#endif
|
|
|