You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and dots ('.'), can be up to 35 characters long. Letters must be lowercase.
116 lines
3.4 KiB
116 lines
3.4 KiB
// (C) Copyright Jeremy Siek 2004 |
|
// Distributed under the Boost Software License, Version 1.0. (See |
|
// accompanying file LICENSE_1_0.txt or copy at |
|
// http://www.boost.org/LICENSE_1_0.txt) |
|
|
|
#ifndef BOOST_STRINGTOK_HPP |
|
#define BOOST_STRINGTOK_HPP |
|
|
|
/* |
|
* stringtok.hpp -- Breaks a string into tokens. This is an example for lib3. |
|
* |
|
* Template function looks like this: |
|
* |
|
* template <typename Container> |
|
* void stringtok (Container &l, |
|
* string const &s, |
|
* char const * const ws = " \t\n"); |
|
* |
|
* A nondestructive version of strtok() that handles its own memory and can |
|
* be broken up by any character(s). Does all the work at once rather than |
|
* in an invocation loop like strtok() requires. |
|
* |
|
* Container is any type that supports push_back(a_string), although using |
|
* list<string> and deque<string> are indicated due to their O(1) push_back. |
|
* (I prefer deque<> because op[]/at() is available as well.) The first |
|
* parameter references an existing Container. |
|
* |
|
* s is the string to be tokenized. From the parameter declaration, it can |
|
* be seen that s is not affected. Since references-to-const may refer to |
|
* temporaries, you could use stringtok(some_container, readline("")) when |
|
* using the GNU readline library. |
|
* |
|
* The final parameter is an array of characters that serve as whitespace. |
|
* Whitespace characters default to one or more of tab, space, and newline, |
|
* in any combination. |
|
* |
|
* 'l' need not be empty on entry. On return, 'l' will have the token |
|
* strings appended. |
|
* |
|
* |
|
* [Example: |
|
* list<string> ls; |
|
* stringtok (ls, " this \t is\t\n a test "); |
|
* for (list<string>::const_iterator i = ls.begin(); |
|
* i != ls.end(); ++i) |
|
* { |
|
* cerr << ':' << (*i) << ":\n"; |
|
* } |
|
* |
|
* would print |
|
* :this: |
|
* :is: |
|
* :a: |
|
* :test: |
|
* -end example] |
|
* |
|
* pedwards@jaj.com May 1999 |
|
*/ |
|
|
|
|
|
#include <string> |
|
#include <cstring> // for strchr |
|
|
|
|
|
/***************************************************************** |
|
* This is the only part of the implementation that I don't like. |
|
* It can probably be improved upon by the reader... |
|
*/ |
|
|
|
inline bool |
|
isws (char c, char const * const wstr) |
|
{ |
|
using namespace std; |
|
return (strchr(wstr,c) != NULL); |
|
} |
|
|
|
|
|
namespace boost { |
|
|
|
/***************************************************************** |
|
* Simplistic and quite Standard, but a bit slow. This should be |
|
* templatized on basic_string instead, or on a more generic StringT |
|
* that just happens to support ::size_type, .substr(), and so on. |
|
* I had hoped that "whitespace" would be a trait, but it isn't, so |
|
* the user must supply it. Enh, this lets them break up strings on |
|
* different things easier than traits would anyhow. |
|
*/ |
|
template <typename Container> |
|
void |
|
stringtok (Container &l, std::string const &s, char const * const ws = " \t\n") |
|
{ |
|
typedef std::string::size_type size_type; |
|
const size_type S = s.size(); |
|
size_type i = 0; |
|
|
|
while (i < S) { |
|
// eat leading whitespace |
|
while ((i < S) && (isws(s[i],ws))) ++i; |
|
if (i == S) return; // nothing left but WS |
|
|
|
// find end of word |
|
size_type j = i+1; |
|
while ((j < S) && (!isws(s[j],ws))) ++j; |
|
|
|
// add word |
|
l.push_back(s.substr(i,j-i)); |
|
|
|
// set up for next loop |
|
i = j+1; |
|
} |
|
} |
|
|
|
|
|
} // namespace boost |
|
|
|
#endif // BOOST_STRINGTOK_HPP
|
|
|