C++ で Suffix Array
真の STL 使いは std::string とか書かずに全部 iterator でなんとかする、気がする。いや、そんなことはないか。
自分なら、比較用の functor はこう書く。
template<typename CharIterator, typename Index> struct suffix_less : public std::binary_function<Index, Index, bool> { public: suffix_less(CharIterator first, CharIterator last) : first_(first), last_(last) { } bool operator()(Index x, Index y) const { return std::lexicographical_compare(first_ + x, last_, first_ + y, last_); } private: CharIterator first_, last_; };
std::lexicographical_compare って案外マイナーかもしれない。
Boost には transform_iterator というのがあるので、それを使うという手もあり。
で、まぁ、すべてが iterator、な感じでアルゴリズムを実装するとこうなるという例。
#include <iostream> #include <functional> #include <algorithm> #include <iterator> #include <vector> #include <cstdlib> #include <boost/iterator/transform_iterator.hpp> template<typename Iterator, typename T> void iota(Iterator first, Iterator last, T value) { while (first != last) { *first++ = value++; } } template<typename CharIterator, typename Index> struct suffix_less : public std::binary_function<Index, Index, bool> { public: suffix_less(CharIterator first, CharIterator last) : first_(first), last_(last) { } bool operator()(Index x, Index y) const { return std::lexicographical_compare(first_ + x, last_, first_ + y, last_); } private: CharIterator first_, last_; }; template<typename CharIterator, typename Index> struct index_to_substring : public std::unary_function<Index, std::pair<CharIterator, CharIterator> > { index_to_substring() { } index_to_substring(CharIterator first, CharIterator last, Index length) : first_(first), last_(last), length_(length) { } std::pair<CharIterator, CharIterator> operator()(Index i) const { if (first_ + i + length_ <= last_) { return std::make_pair(first_ + i, first_ + i + length_); } else { return std::make_pair(first_ + i, last_); } } private: CharIterator first_, last_; Index length_; }; template<typename CharIterator, typename SAIterator> void suffix_sort(CharIterator textFirst, CharIterator textLast, SAIterator saFirst, SAIterator saLast) { typedef typename std::iterator_traits<SAIterator>::value_type Index; suffix_less<CharIterator, Index> cmp(textFirst, textLast); std::sort(saFirst, saLast, cmp); } template<typename Iterator> struct pair_lexicographical_compare : public std::binary_function<std::pair<Iterator, Iterator>, std::pair<Iterator, Iterator>, bool> { bool operator()(const std::pair<Iterator, Iterator> a, const std::pair<Iterator, Iterator> b) const { return std::lexicographical_compare(a.first, a.second, b.first, b.second); } }; template<typename Iterator, typename T, typename Comparator> std::pair<typename std::iterator_traits<Iterator>::difference_type, typename std::iterator_traits<Iterator>::difference_type> equal_range_index(Iterator first, Iterator last, T val, Comparator cmp) { std::pair<Iterator, Iterator> range = std::equal_range(first, last, val, cmp); return std::make_pair(std::distance(first, range.first), std::distance(first, range.second)); } template<typename CharIterator, typename IndexIterator> std::pair<typename std::iterator_traits<IndexIterator>::value_type, typename std::iterator_traits<IndexIterator>::value_type > suffix_search(CharIterator textFirst, CharIterator textLast, IndexIterator saFirst, IndexIterator saLast, CharIterator wordFirst, CharIterator wordLast) { typedef typename std::iterator_traits<IndexIterator>::value_type Index; typedef typename std::iterator_traits<CharIterator>::difference_type size_type; size_type n = std::distance(wordFirst, wordLast); index_to_substring<CharIterator, Index> func(textFirst, textLast, n); return equal_range_index(boost::make_transform_iterator(saFirst, func), boost::make_transform_iterator(saLast, func), std::make_pair(wordFirst, wordLast), pair_lexicographical_compare<CharIterator>()); } int main(int argc, char** argv) { std::string text(std::istreambuf_iterator<char>(std::cin), std::istreambuf_iterator<char>()); std::vector<size_t> sa(text.length()); iota(sa.begin(), sa.end(), 0); suffix_sort(text.begin(), text.end(), sa.begin(), sa.end()); for (size_t i = 0; i < sa.size(); i++) { std::cout << i << '\t' << sa[i] << '\t' << text.substr(sa[i], std::string::npos) << std::endl; } for (int i = 1; i < argc; i++) { std::string search_word(argv[i]); std::pair<size_t, size_t> range = suffix_search(text.begin(), text.end(), sa.begin(), sa.end(), search_word.begin(), search_word.end()); std::cout << search_word << ": [" << range.first << ", " << range.second << "]" << std::endl; } return 0; }
なんか、半分近くが template か型か typedef の宣言になっている気がする。