You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and dots ('.'), can be up to 35 characters long. Letters must be lowercase.
281 lines
8.1 KiB
281 lines
8.1 KiB
/*============================================================================= |
|
Copyright (c) 2001-2003 Joel de Guzman |
|
http://spirit.sourceforge.net/ |
|
|
|
Use, modification and distribution is subject to the Boost Software |
|
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at |
|
http://www.boost.org/LICENSE_1_0.txt) |
|
=============================================================================*/ |
|
#ifndef BOOST_SPIRIT_TST_IPP |
|
#define BOOST_SPIRIT_TST_IPP |
|
|
|
/////////////////////////////////////////////////////////////////////////////// |
|
#include <memory> // for std::auto_ptr |
|
#include <boost/spirit/home/classic/core/assert.hpp> |
|
|
|
/////////////////////////////////////////////////////////////////////////////// |
|
namespace boost { namespace spirit { |
|
|
|
BOOST_SPIRIT_CLASSIC_NAMESPACE_BEGIN |
|
|
|
namespace impl |
|
{ |
|
|
|
/////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// tst class |
|
// |
|
// Ternary Search Tree implementation. The data structure is faster than |
|
// hashing for many typical search problems especially when the search |
|
// interface is iterator based. Searching for a string of length k in a |
|
// ternary search tree with n strings will require at most O(log n+k) |
|
// character comparisons. TSTs are many times faster than hash tables |
|
// for unsuccessful searches since mismatches are discovered earlier |
|
// after examining only a few characters. Hash tables always examine an |
|
// entire key when searching. |
|
// |
|
// For details see http://www.cs.princeton.edu/~rs/strings/. |
|
// |
|
// *** This is a low level class and is |
|
// not meant for public consumption *** |
|
// |
|
/////////////////////////////////////////////////////////////////////////////// |
|
template <typename T, typename CharT> |
|
struct tst_node |
|
{ |
|
tst_node(CharT value_) |
|
: value(value_) |
|
, left(0) |
|
, right(0) |
|
{ middle.link = 0; } |
|
|
|
~tst_node() |
|
{ |
|
delete left; |
|
delete right; |
|
if (value) |
|
delete middle.link; |
|
else |
|
delete middle.data; |
|
} |
|
|
|
tst_node* |
|
clone() const |
|
{ |
|
std::auto_ptr<tst_node> copy(new tst_node(value)); |
|
|
|
if (left) |
|
copy->left = left->clone(); |
|
if (right) |
|
copy->right = right->clone(); |
|
|
|
if (value && middle.link) |
|
{ |
|
copy->middle.link = middle.link->clone(); |
|
} |
|
else |
|
{ |
|
std::auto_ptr<T> mid_data(new T(*middle.data)); |
|
copy->middle.data = mid_data.release(); |
|
} |
|
|
|
return copy.release(); |
|
} |
|
|
|
union center { |
|
|
|
tst_node* link; |
|
T* data; |
|
}; |
|
|
|
CharT value; |
|
tst_node* left; |
|
center middle; |
|
tst_node* right; |
|
}; |
|
|
|
/////////////////////////////////////////////////////////////////////////// |
|
template <typename T, typename CharT> |
|
class tst |
|
{ |
|
public: |
|
|
|
struct search_info |
|
{ |
|
T* data; |
|
std::size_t length; |
|
}; |
|
|
|
tst() |
|
: root(0) {} |
|
|
|
tst(tst const& other) |
|
: root(other.root ? other.root->clone() : 0) {} |
|
|
|
~tst() |
|
{ delete root; } |
|
|
|
tst& |
|
operator=(tst const& other) |
|
{ |
|
if (this != &other) |
|
{ |
|
node_t* new_root = other.root ? other.root->clone() : 0; |
|
delete root; |
|
root = new_root; |
|
} |
|
return *this; |
|
} |
|
|
|
template <typename IteratorT> |
|
T* add(IteratorT first, IteratorT const& last, T const& data) |
|
{ |
|
if (first == last) |
|
return 0; |
|
|
|
node_t** np = &root; |
|
CharT ch = *first; |
|
|
|
BOOST_SPIRIT_ASSERT((first == last || ch != 0) |
|
&& "Won't add string containing null character"); |
|
|
|
for (;;) |
|
{ |
|
if (*np == 0 || ch == 0) |
|
{ |
|
node_t* right = 0; |
|
if (np != 0) |
|
right = *np; |
|
*np = new node_t(ch); |
|
if (right) |
|
(**np).right = right; |
|
} |
|
|
|
if (ch < (**np).value) |
|
{ |
|
np = &(**np).left; |
|
} |
|
else |
|
{ |
|
if (ch == (**np).value) |
|
{ |
|
if (ch == 0) |
|
{ |
|
if ((**np).middle.data == 0) |
|
{ |
|
(**np).middle.data = new T(data); |
|
return (**np).middle.data; |
|
} |
|
else |
|
{ |
|
// re-addition is disallowed |
|
return 0; |
|
} |
|
} |
|
++first; |
|
ch = (first == last) ? CharT(0) : *first; |
|
BOOST_SPIRIT_ASSERT((first == last || ch != 0) |
|
&& "Won't add string containing null character"); |
|
np = &(**np).middle.link; |
|
} |
|
else |
|
{ |
|
np = &(**np).right; |
|
} |
|
} |
|
} |
|
} |
|
|
|
template <typename ScannerT> |
|
search_info find(ScannerT const& scan) const |
|
{ |
|
search_info result = { 0, 0 }; |
|
if (scan.at_end()) { |
|
return result; |
|
} |
|
|
|
typedef typename ScannerT::iterator_t iterator_t; |
|
node_t* np = root; |
|
CharT ch = *scan; |
|
iterator_t save = scan.first; |
|
iterator_t latest = scan.first; |
|
std::size_t latest_len = 0; |
|
|
|
while (np) |
|
{ |
|
|
|
if (ch < np->value) // => go left! |
|
{ |
|
if (np->value == 0) |
|
{ |
|
result.data = np->middle.data; |
|
if (result.data) |
|
{ |
|
latest = scan.first; |
|
latest_len = result.length; |
|
} |
|
} |
|
|
|
np = np->left; |
|
} |
|
else if (ch == np->value) // => go middle! |
|
{ |
|
// Matching the null character is not allowed. |
|
if (np->value == 0) |
|
{ |
|
result.data = np->middle.data; |
|
if (result.data) |
|
{ |
|
latest = scan.first; |
|
latest_len = result.length; |
|
} |
|
break; |
|
} |
|
|
|
++scan; |
|
ch = scan.at_end() ? CharT(0) : *scan; |
|
np = np->middle.link; |
|
++result.length; |
|
} |
|
else // (ch > np->value) => go right! |
|
{ |
|
if (np->value == 0) |
|
{ |
|
result.data = np->middle.data; |
|
if (result.data) |
|
{ |
|
latest = scan.first; |
|
latest_len = result.length; |
|
} |
|
} |
|
|
|
np = np->right; |
|
} |
|
} |
|
|
|
if (result.data == 0) |
|
{ |
|
scan.first = save; |
|
} |
|
else |
|
{ |
|
scan.first = latest; |
|
result.length = latest_len; |
|
} |
|
return result; |
|
} |
|
|
|
private: |
|
|
|
typedef tst_node<T, CharT> node_t; |
|
node_t* root; |
|
}; |
|
|
|
/////////////////////////////////////////////////////////////////////////////// |
|
} // namespace impl |
|
|
|
BOOST_SPIRIT_CLASSIC_NAMESPACE_END |
|
|
|
}} // namespace boost::spirit |
|
|
|
#endif
|
|
|