diff options
Diffstat (limited to 'libs/algorithm/string/example/rle_example.cpp')
-rw-r--r-- | libs/algorithm/string/example/rle_example.cpp | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/libs/algorithm/string/example/rle_example.cpp b/libs/algorithm/string/example/rle_example.cpp new file mode 100644 index 000000000..9e52b96b3 --- /dev/null +++ b/libs/algorithm/string/example/rle_example.cpp @@ -0,0 +1,248 @@ +// Boost string_algo library example file ---------------------------------// + +// Copyright Pavol Droba 2002-2003. Use, modification and +// distribution is subject to the Boost Software License, Version +// 1.0. (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// See http://www.boost.org for updates, documentation, and revision history. + +/* + RLE compression using replace framework. Goal is to compress a sequence of + repeating characters into 3 bytes ( repeat mark, character and repetition count ). + For simplification, it works only on numeric-value sequences. +*/ + +#include <string> +#include <iostream> +#include <limits> +#include <boost/detail/iterator.hpp> +#include <boost/algorithm/string/find_format.hpp> +#include <boost/algorithm/string/finder.hpp> + +using namespace std; +using namespace boost; + +// replace mark specification, specialize for a specific element type +template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); }; + +// Compression ----------------------------------------------------------------------- + + +// compress finder -rle +/* + Find a sequence which can be compressed. It has to be at least 3-character long + sequence of repetitive characters +*/ +struct find_compressF +{ + // Construction + find_compressF() {} + + // Operation + template<typename ForwardIteratorT> + iterator_range<ForwardIteratorT> operator()( + ForwardIteratorT Begin, + ForwardIteratorT End ) const + { + typedef ForwardIteratorT input_iterator_type; + typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type; + typedef iterator_range<input_iterator_type> result_type; + + // begin of the matching segment + input_iterator_type MStart=End; + // Repetition counter + value_type Cnt=0; + + // Search for a sequence of repetitive characters + for(input_iterator_type It=Begin; It!=End;) + { + input_iterator_type It2=It++; + + if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() ) + { + return result_type( MStart, It ); + } + + if ( *It==*It2 ) + { + if ( MStart==End ) + { + // Mark the start + MStart=It2; + } + + // Increate repetition counter + Cnt++; + } + else + { + if ( MStart!=End ) + { + if ( Cnt>2 ) + return result_type( MStart, It ); + else + { + MStart=End; + Cnt=0; + } + } + } + } + + return result_type( End, End ); + } +}; + +// rle compress format +/* + Transform a sequence into repeat mark, character and count +*/ +template<typename SeqT> +struct format_compressF +{ +private: + typedef SeqT result_type; + typedef typename SeqT::value_type value_type; + +public: + // Construction + format_compressF() {}; + + // Operation + template< typename ReplaceT > + result_type operator()( const ReplaceT& Replace ) const + { + SeqT r; + if(!Replace.empty()) + { + r.push_back( repeat_mark<value_type>() ); + r.push_back( *(Replace.begin()) ); + r.push_back( value_type( Replace.size() ) ); + } + + return r; + } +}; + +// Decompression ----------------------------------------------------------------------- + + +// find decompress-rle functor +/* + find a repetition block +*/ +struct find_decompressF +{ + // Construction + find_decompressF() {} + + // Operation + template<typename ForwardIteratorT> + iterator_range<ForwardIteratorT> operator()( + ForwardIteratorT Begin, + ForwardIteratorT End ) const + { + typedef ForwardIteratorT input_iterator_type; + typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type; + typedef iterator_range<input_iterator_type> result_type; + + for(input_iterator_type It=Begin; It!=End; It++) + { + if( *It==repeat_mark<value_type>() ) + { + // Repeat mark found, extract body + input_iterator_type It2=It++; + + if ( It==End ) break; + It++; + if ( It==End ) break; + It++; + + return result_type( It2, It ); + } + } + + return result_type( End, End ); + } +}; + +// rle decompress format +/* + transform a repetition block into a sequence of characters +*/ +template< typename SeqT > +struct format_decompressF +{ +private: + typedef SeqT result_type; + typedef typename SeqT::value_type value_type; + +public: + // Construction + format_decompressF() {}; + + // Operation + template< typename ReplaceT > + result_type operator()( const ReplaceT& Replace ) const + { + SeqT r; + + if(!Replace.empty()) + { + // extract info + typename ReplaceT::const_iterator It=Replace.begin(); + + value_type Value=*(++It); + value_type Repeat=*(++It); + + for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value ); + } + + return r; + } +}; + + +int main() +{ + cout << "* RLE Compression Example *" << endl << endl; + + string original("123_AA_*ZZZZZZZZZZZZZZ*34"); + + // copy compression + string compress=find_format_all_copy( + original, + find_compressF(), + format_compressF<string>() ); + + cout << "Compressed string: " << compress << endl; + + // Copy decompression + string decompress=find_format_all_copy( + compress, + find_decompressF(), + format_decompressF<string>() ); + + cout << "Decompressed string: " << decompress << endl; + + // in-place compression + find_format_all( + original, + find_compressF(), + format_compressF<string>() ); + + cout << "Compressed string: " << original << endl; + + // in-place decompression + find_format_all( + original, + find_decompressF(), + format_decompressF<string>() ); + + cout << "Decompressed string: " << original << endl; + + cout << endl; + + return 0; +} |