aboutsummaryrefslogtreecommitdiff
path: root/0.4.0/paludis/util/tokeniser.hh
diff options
context:
space:
mode:
Diffstat (limited to '0.4.0/paludis/util/tokeniser.hh')
-rw-r--r--0.4.0/paludis/util/tokeniser.hh243
1 files changed, 243 insertions, 0 deletions
diff --git a/0.4.0/paludis/util/tokeniser.hh b/0.4.0/paludis/util/tokeniser.hh
new file mode 100644
index 000000000..f35082660
--- /dev/null
+++ b/0.4.0/paludis/util/tokeniser.hh
@@ -0,0 +1,243 @@
+/* vim: set sw=4 sts=4 et foldmethod=syntax : */
+
+/*
+ * Copyright (c) 2006 Ciaran McCreesh <ciaran.mccreesh@blueyonder.co.uk>
+ *
+ * This file is part of the Paludis package manager. Paludis is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU General
+ * Public License version 2, as published by the Free Software Foundation.
+ *
+ * Paludis is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef PALUDIS_GUARD_PALUDIS_TOKENISER_HH
+#define PALUDIS_GUARD_PALUDIS_TOKENISER_HH 1
+
+#include <iterator>
+#include <paludis/util/instantiation_policy.hh>
+#include <string>
+
+/** \file
+ * Declarations for Tokeniser and related utilities.
+ *
+ * \ingroup grptokenise
+ */
+
+namespace paludis
+{
+ /**
+ * Delimiter policy for Tokeniser.
+ *
+ * \ingroup grptokenise
+ */
+ namespace delim_kind
+ {
+ /**
+ * Any of the characters split, and the delimiter is discarded.
+ *
+ * \ingroup grptokenise
+ */
+ struct AnyOfTag
+ {
+ };
+ }
+
+ /**
+ * Delimiter mode for Tokeniser.
+ *
+ * \ingroup grptokenise
+ */
+ namespace delim_mode
+ {
+ /**
+ * Discard the delimiters.
+ *
+ * \ingroup grptokenise
+ */
+ struct DelimiterTag
+ {
+ };
+
+ /**
+ * Keep the delimiters.
+ *
+ * \ingroup grptokenise
+ */
+ struct BoundaryTag
+ {
+ };
+ }
+
+ /**
+ * Tokeniser internal use only.
+ *
+ * \ingroup grptokenise
+ */
+ namespace tokeniser_internals
+ {
+ /**
+ * A Writer handles Tokeniser's writes.
+ *
+ * \ingroup grptokenise
+ */
+ template <typename DelimMode_, typename Char_, typename Iter_>
+ struct Writer;
+
+ /**
+ * A Writer handles Tokeniser's writes (specialisation for
+ * delim_mode::DelimiterTag).
+ *
+ * \ingroup grptokenise
+ */
+ template <typename Char_, typename Iter_>
+ struct Writer<delim_mode::DelimiterTag, Char_, Iter_>
+ {
+ /**
+ * Handle a token.
+ */
+ static void handle_token(const std::basic_string<Char_> & s, Iter_ & i)
+ {
+ *i++ = s;
+ }
+
+ /**
+ * Handle a delimiter.
+ */
+ static void handle_delim(const std::basic_string<Char_> &, const Iter_ &)
+ {
+ }
+ };
+
+ /**
+ * A Writer handles Tokeniser's writes (specialisation for
+ * delim_mode::BoundaryTag).
+ *
+ * \ingroup grptokenise
+ */
+ template <typename Char_, typename Iter_>
+ struct Writer<delim_mode::BoundaryTag, Char_, Iter_>
+ {
+ /**
+ * Handle a token.
+ */
+ static void handle_token(const std::basic_string<Char_> & s, Iter_ & i)
+ {
+ *i++ = s;
+ }
+
+ /**
+ * Handle a delimiter.
+ */
+ static void handle_delim(const std::basic_string<Char_> & s, Iter_ & i)
+ {
+ *i++ = s;
+ }
+ };
+
+ }
+
+ /**
+ * Tokeniser splits up strings into smaller strings.
+ *
+ * \ingroup grptokenise
+ */
+ template <typename DelimKind_, typename DelimMode_, typename Char_ = std::string::value_type>
+ struct Tokeniser;
+
+ /**
+ * Tokeniser: specialisation for delim_kind::AnyOfTag.
+ *
+ * \ingroup grptokenise
+ */
+ template <typename DelimMode_, typename Char_>
+ class Tokeniser<delim_kind::AnyOfTag, DelimMode_, Char_> :
+ private InstantiationPolicy<Tokeniser<delim_kind::AnyOfTag, DelimMode_, Char_>,
+ instantiation_method::NonCopyableTag>
+ {
+ private:
+ const std::basic_string<Char_> _delims;
+
+ public:
+ /**
+ * Constructor.
+ */
+ Tokeniser(const std::basic_string<Char_> & delims) :
+ _delims(delims)
+ {
+ }
+
+ /**
+ * Do the tokenisation.
+ */
+ template <typename Iter_>
+ void tokenise(const std::basic_string<Char_> & s, Iter_ iter) const;
+ };
+
+ template <typename DelimMode_, typename Char_>
+ template <typename Iter_>
+ void
+ Tokeniser<delim_kind::AnyOfTag, DelimMode_, Char_>::tokenise(
+ const std::basic_string<Char_> & s, Iter_ iter) const
+ {
+ typename std::basic_string<Char_>::size_type p(0), old_p(0);
+ bool in_delim((! s.empty()) && std::basic_string<Char_>::npos != _delims.find(s[0]));
+
+ for ( ; p < s.length() ; ++p)
+ {
+ if (in_delim)
+ {
+ if (std::basic_string<Char_>::npos == _delims.find(s[p]))
+ {
+ tokeniser_internals::Writer<DelimMode_, Char_, Iter_>::handle_delim(
+ s.substr(old_p, p - old_p), iter);
+ in_delim = false;
+ old_p = p;
+ }
+ }
+ else
+ {
+ if (std::basic_string<Char_>::npos != _delims.find(s[p]))
+ {
+ tokeniser_internals::Writer<DelimMode_, Char_, Iter_>::handle_token(
+ s.substr(old_p, p - old_p), iter);
+ in_delim = true;
+ old_p = p;
+ }
+ }
+ }
+
+ if (old_p != p)
+ {
+ if (in_delim)
+ tokeniser_internals::Writer<DelimMode_, Char_, Iter_>::handle_delim(
+ s.substr(old_p, p - old_p), iter);
+ else
+ tokeniser_internals::Writer<DelimMode_, Char_, Iter_>::handle_token(
+ s.substr(old_p, p - old_p), iter);
+ }
+ }
+
+ /**
+ * Convenience singleton class for tokenising on whitespace.
+ *
+ * \ingroup grptokenise
+ */
+ class WhitespaceTokeniser :
+ public InstantiationPolicy<WhitespaceTokeniser, instantiation_method::SingletonAtStartupTag>,
+ public Tokeniser<delim_kind::AnyOfTag, delim_mode::DelimiterTag>
+ {
+ friend class InstantiationPolicy<WhitespaceTokeniser, instantiation_method::SingletonAtStartupTag>;
+
+ private:
+ WhitespaceTokeniser();
+ };
+}
+
+#endif