aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAvatar David Leverton <levertond@googlemail.com> 2008-04-07 20:40:44 +0000
committerAvatar David Leverton <levertond@googlemail.com> 2008-04-07 20:40:44 +0000
commit6e13322e31538908585e1a75cbb08c1366322cc6 (patch)
treebc70f0837e8df2b3676054eb0e340a9d82617cea
parentded4c4ec8f20c95fac4b3baa943fc6420f275d2a (diff)
downloadpaludis-6e13322e31538908585e1a75cbb08c1366322cc6.tar.gz
paludis-6e13322e31538908585e1a75cbb08c1366322cc6.tar.xz
Improve speed and memory usage for suggestions, particularly --install.
-rw-r--r--paludis/fuzzy_finder.cc85
-rw-r--r--paludis/util/damerau_levenshtein.cc35
-rw-r--r--paludis/util/damerau_levenshtein.hh2
3 files changed, 84 insertions, 38 deletions
diff --git a/paludis/fuzzy_finder.cc b/paludis/fuzzy_finder.cc
index 769c198..1b7ccf7 100644
--- a/paludis/fuzzy_finder.cc
+++ b/paludis/fuzzy_finder.cc
@@ -28,6 +28,7 @@
#include <paludis/package_id.hh>
#include <paludis/name.hh>
#include <paludis/query.hh>
+#include <paludis/query_delegate.hh>
#include <paludis/user_dep_spec.hh>
#include <paludis/util/set.hh>
#include <paludis/util/sequence.hh>
@@ -57,6 +58,71 @@ namespace
std::transform(res.begin(), e, res2.begin(), ::tolower);
return res2;
}
+
+ class FuzzyPackageNameDelegate :
+ public QueryDelegate
+ {
+ private:
+ std::string _package;
+ DamerauLevenshtein _distance_calculator;
+ unsigned _threshold;
+ char _first_char;
+
+ public:
+ FuzzyPackageNameDelegate(const std::string & package) :
+ _package(package),
+ _distance_calculator(tolower_0_cost(package)),
+ _threshold(package.length() <= 4 ? 1 : 2),
+ _first_char(tolower(package[0]))
+ {
+ }
+
+ virtual tr1::shared_ptr<QualifiedPackageNameSet> packages(const Environment &,
+ tr1::shared_ptr<const RepositoryNameSequence>,
+ tr1::shared_ptr<const CategoryNamePartSet>) const;
+
+ std::string as_human_readable_string() const
+ {
+ return "package name fuzzy-matches '" + _package + "'";
+ }
+ };
+
+ tr1::shared_ptr<QualifiedPackageNameSet>
+ FuzzyPackageNameDelegate::packages(const Environment & e,
+ tr1::shared_ptr<const RepositoryNameSequence> repos,
+ tr1::shared_ptr<const CategoryNamePartSet> cats) const
+ {
+ tr1::shared_ptr<QualifiedPackageNameSet> result(new QualifiedPackageNameSet);
+
+ for (RepositoryNameSequence::ConstIterator r(repos->begin()),
+ r_end(repos->end()); r_end != r; ++r)
+ {
+ tr1::shared_ptr<const Repository> repo(e.package_database()->fetch_repository(*r));
+
+ for (CategoryNamePartSet::ConstIterator c(cats->begin()),
+ c_end(cats->end()); c_end != c; ++c)
+ {
+ tr1::shared_ptr<const QualifiedPackageNameSet> pkgs(repo->package_names(*c));
+ for (QualifiedPackageNameSet::ConstIterator p(pkgs->begin()),
+ p_end(pkgs->end()); p_end != p; ++p)
+ if (tolower(p->package.data()[0]) == _first_char &&
+ _distance_calculator.distance_with(tolower_0_cost(p->package.data())) <= _threshold)
+ result->insert(*p);
+ }
+ }
+
+ return result;
+ }
+
+ class FuzzyPackageName :
+ public Query
+ {
+ public:
+ FuzzyPackageName(const std::string & p) :
+ Query(tr1::shared_ptr<QueryDelegate>(new FuzzyPackageNameDelegate(p)))
+ {
+ }
+ };
}
namespace paludis
@@ -88,26 +154,11 @@ FuzzyCandidatesFinder::FuzzyCandidatesFinder(const Environment & e, const std::s
real_generator = real_generator & query::Repository(*pds.repository_ptr());
}
- DamerauLevenshtein distance_calculator(tolower_0_cost(package));
-
- unsigned threshold(package.length() <= 4 ? 1 : 2);
-
- QualifiedPackageNameSet potential_candidates;
-
- tr1::shared_ptr<const PackageIDSequence> ids(e.package_database()->query(real_generator, qo_whatever));
+ tr1::shared_ptr<const PackageIDSequence> ids(e.package_database()->query(real_generator & FuzzyPackageName(package), qo_best_version_only));
for (PackageIDSequence::ConstIterator i(ids->begin()), i_end(ids->end())
; i != i_end ; ++i)
- {
- if (tolower(stringify((*i)->name().package)[0]) != tolower(package[0]))
- continue;
- potential_candidates.insert((*i)->name());
- }
-
- for (QualifiedPackageNameSet::ConstIterator p(potential_candidates.begin()), p_end(potential_candidates.end()) ;
- p != p_end ; ++p)
- if (distance_calculator.distance_with(tolower_0_cost(stringify(p->package))) <= threshold)
- _imp->candidates.push_back(*p);
+ _imp->candidates.push_back((*i)->name());
}
FuzzyCandidatesFinder::~FuzzyCandidatesFinder()
diff --git a/paludis/util/damerau_levenshtein.cc b/paludis/util/damerau_levenshtein.cc
index 965e14e..6c1e6b3 100644
--- a/paludis/util/damerau_levenshtein.cc
+++ b/paludis/util/damerau_levenshtein.cc
@@ -31,13 +31,9 @@ namespace paludis
{
std::string name;
unsigned n;
- tr1::shared_ptr<std::vector<unsigned> > prevprev;
- tr1::shared_ptr<std::vector<unsigned> > prev;
- tr1::shared_ptr<std::vector<unsigned> > current;
Implementation(const std::string & myname) :
- name(myname), n(name.length() + 1), prevprev(new std::vector<unsigned>(n)),
- prev(new std::vector<unsigned>(n)), current(new std::vector<unsigned>(n))
+ name(myname), n(name.length() + 1)
{
}
};
@@ -53,35 +49,34 @@ DamerauLevenshtein::~DamerauLevenshtein()
}
unsigned
-DamerauLevenshtein::distance_with(const std::string & candidate)
+DamerauLevenshtein::distance_with(const std::string & candidate) const
{
+ std::vector<unsigned> prevprev(_imp->n, 0);
+ std::vector<unsigned> prev(_imp->n);
+ std::vector<unsigned> current(_imp->n, 0);
+
for (unsigned i(0) ; i < _imp->n ; ++i)
- (*_imp->prev)[i] = i;
- _imp->prevprev->assign(0, _imp->n);
- _imp->current->assign(0, _imp->n);
+ prev[i] = i;
size_t m(candidate.length() + 1);
for (unsigned i(1) ; i < m ; ++i)
{
- (*_imp->current)[0] = i;
+ current[0] = i;
for (unsigned j(1) ; j < _imp->n ; ++j)
{
unsigned cost(candidate[i - 1] == _imp->name[j - 1] ? 0 : 1);
- (*_imp->current)[j] = std::min(
- std::min((*_imp->prev)[j] + 1, (*_imp->current)[j - 1] + 1),
- (*_imp->prev)[j - 1] + cost);
+ current[j] = std::min(
+ std::min(prev[j] + 1, current[j - 1] + 1),
+ prev[j - 1] + cost);
if (i > 1 && j > 1
&& candidate[i - 1] == _imp->name[j - 2]
&& candidate[i - 2] == _imp->name[j - 1])
- (*_imp->current)[j] = std::min((*_imp->current)[j], (*_imp->prevprev)[j - 2] + cost);
+ current[j] = std::min(current[j], prevprev[j - 2] + cost);
}
- tr1::shared_ptr<std::vector<unsigned> > aux(_imp->current);
- tr1::shared_ptr<std::vector<unsigned> > aux2(_imp->prev);
- _imp->current = _imp->prevprev;
- _imp->prev = aux;
- _imp->prevprev = aux2;
+ prevprev.swap(current);
+ prevprev.swap(prev);
}
- return (*_imp->prev)[_imp->n - 1];
+ return prev[_imp->n - 1];
}
diff --git a/paludis/util/damerau_levenshtein.hh b/paludis/util/damerau_levenshtein.hh
index e3b64c8..d689995 100644
--- a/paludis/util/damerau_levenshtein.hh
+++ b/paludis/util/damerau_levenshtein.hh
@@ -49,7 +49,7 @@ namespace paludis
/**
* Compute the Damerau-Levenshtein to this candidate.
*/
- unsigned distance_with(const std::string & candidate);
+ unsigned distance_with(const std::string & candidate) const;
///\}
};