diff options
-rw-r--r-- | src/osd/ErasureCodeInterface.h | 210 | ||||
-rw-r--r-- | src/osd/Makefile.am | 1 |
2 files changed, 211 insertions, 0 deletions
diff --git a/src/osd/ErasureCodeInterface.h b/src/osd/ErasureCodeInterface.h new file mode 100644 index 00000000000..5ce2842d562 --- /dev/null +++ b/src/osd/ErasureCodeInterface.h @@ -0,0 +1,210 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_ERASURE_CODE_INTERFACE_H +#define CEPH_ERASURE_CODE_INTERFACE_H + +/*! @file ErasureCodeInterface.h + @brief Interface provided by erasure code plugins + + The erasure coded pools rely on plugins implementing + **ErasureCodeInterface** to encode and decode content. All codes + are systematic (i.e. the data is not mangled and can be + reconstructed by concatenating chunks ). + + All methods returns **0** on success and a negative value on + error. If the value returned on error is not explained in + **ErasureCodeInterface**, the sources or the documentation of the + interface implementer must be read to figure out what it means. It + is recommended that each error code matches an *errno* value that + relates to the cause of the error. + + Assuming the interface implementer provides three data chunks ( K + = 3 ) and two coding chunks ( M = 2 ), a buffer can be encoded as + follows: + + ~~~~~~~~~~~~~~~~{.c} + set<int> want_to_encode(0, 1, 2, // data chunks + 3, 4 // coding chunks + ); + bufferlist in = "ABCDEF"; + map<int, bufferlist> encoded + encode(want_to_encode, in, &encoded); + encoded[0] == "AB" // data chunk 0 + encoded[1] == "CD" // data chunk 1 + encoded[2] == "EF" // data chunk 2 + encoded[3] // coding chunk 0 + encoded[4] // coding chunk 1 + ~~~~~~~~~~~~~~~~ + + If encoded[2] ( which contains **EF** ) is missing and accessing + encoded[3] ( the first coding chunk ) is more expensive than + accessing encoded[4] ( the second coding chunk ), the + **minimum_to_decode_with_cost** method can be called as follows: + + ~~~~~~~~~~~~~~~~{.c} + set<int> want_to_read(2); // want the chunk containing "EF" + map<int,int> available( + 0 => 1, // data chunk 0 : available and costs 1 + 1 => 1, // data chunk 1 : available and costs 1 + 3 => 9, // coding chunk 1 : available and costs 9 + 4 => 1, // coding chunk 2 : available and costs 1 + ); + set<int> minimum; + minimum_to_decode_with_cost(want_to_read, + available, + &minimum); + minimum == set<int>(0, 1, 4); + ~~~~~~~~~~~~~~~~ + + It sets **minimum** with three chunks to reconstruct the desired + data chunk and will pick the second coding chunk ( 4 ) because it + is less expensive ( 1 < 9 ) to retrieve than the first coding + chunk ( 3 ). The caller is responsible for retrieving the chunks + and call **decode** to reconstruct the second data chunk content. + + ~~~~~~~~~~~~~~~~{.c} + map<int,bufferlist> chunks; + for i in minimum.keys(): + chunks[i] = fetch_chunk(i); // get chunk from storage + map<int, bufferlist> decoded; + decode(want_to_read, chunks, &decoded); + decoded[2] == "EF" + ~~~~~~~~~~~~~~~~ + + */ + +#include <map> +#include <set> +#include <tr1/memory> +#include "include/buffer.h" + +using namespace std; + +namespace ceph { + + class ErasureCodeInterface { + public: + virtual ~ErasureCodeInterface() {} + + /** + * Compute the smallest subset of **available** chunks that needs + * to be retrieved in order to successfully decode + * **want_to_read** chunks. + * + * It is strictly equivalent to calling + * **minimum_to_decode_with_cost** where each **available** chunk + * has the same cost. + * + * @see minimum_to_decode_with_cost + * + * @param [in] want_to_read chunk indexes to be decoded + * @param [in] available chunk indexes containing valid data + * @param [out] minimum chunk indexes to retrieve for decode + * @return **0** on success or a negative errno on error. + */ + virtual int minimum_to_decode(const set<int> &want_to_read, + const set<int> &available, + set<int> *minimum) = 0; + + /** + * Compute the smallest subset of **available** chunks that needs + * to be retrieved in order to successfully decode + * **want_to_read** chunks. If there are more than one possible + * subset, select the subset that contains the chunks with the + * lowest cost. + * + * The **available** parameter maps chunk indexes to their + * retrieval cost. The higher the cost value, the more costly it + * is to retrieve the chunk content. + * + * Returns -EIO if there are not enough chunk indexes in + * **available** to decode **want_to_read**. + * + * Returns 0 on success. + * + * The **minimum** argument must be a pointer to an empty set. + * + * @param [in] want_to_read chunk indexes to be decoded + * @param [in] available map chunk indexes containing valid data + * to their retrieval cost + * @param [out] minimum chunk indexes to retrieve for decode + * @return **0** on success or a negative errno on error. + */ + virtual int minimum_to_decode_with_cost(const set<int> &want_to_read, + const map<int, int> &available, + set<int> *minimum) = 0; + + /** + * Encode the content of **in** and store the result in + * **encoded**. The **encoded** map contains at least all + * chunk indexes found in the **want_to_encode** set. + * + * The **encoded** map is expected to be a pointer to an empty + * map. + * + * The **encoded** map may contain more chunks than required by + * **want_to_encode** and the caller is expected to permanently + * store all of them, not just the chunks from **want_to_encode**. + * + * Returns 0 on success. + * + * @param [in] want_to_encode chunk indexes to be encoded + * @param [in] in data to be encoded + * @param [out] encoded map chunk indexes to chunk data + * @return **0** on success or a negative errno on error. + */ + virtual int encode(const set<int> &want_to_encode, + const bufferlist &in, + map<int, bufferlist> *encoded) = 0; + + /** + * Decode the **chunks** and store at least **want_to_read** chunks + * in **decoded**. + * + * There must be enough **chunks** ( as returned by + * **minimum_to_decode** or **minimum_to_decode_with_cost** ) to + * perform a successfull decoding of all chunks found in + * **want_to_read**. + * + * The **decoded** map is expected to be a pointer to an empty + * map. + * + * The **decoded** map may contain more chunks than required by + * **want_to_read** and they can safely be used by the caller. + * + * If a chunk is listed in **want_to_read** and there is + * corresponding **bufferlist** in **chunks**, it will be copied + * verbatim into **decoded**. If not it will be reconstructed from + * the existing chunks. + * + * Returns 0 on success. + * + * @param [in] want_to_read chunk indexes to be decoded + * @param [in] chunks map chunk indexes to chunk data + * @param [out] decoded map chunk indexes to chunk data + * @return **0** on success or a negative errno on error. + */ + virtual int decode(const set<int> &want_to_read, + const map<int, bufferlist> &chunks, + map<int, bufferlist> *decoded) = 0; + }; + + typedef std::tr1::shared_ptr<ErasureCodeInterface> ErasureCodeInterfaceRef; + +} + +#endif diff --git a/src/osd/Makefile.am b/src/osd/Makefile.am index ca82d7632a8..6aaf8466670 100644 --- a/src/osd/Makefile.am +++ b/src/osd/Makefile.am @@ -17,6 +17,7 @@ noinst_LTLIBRARIES += libosd.la noinst_HEADERS += \ osd/Ager.h \ osd/ClassHandler.h \ + osd/ErasureCodeInterface.h \ osd/OSD.h \ osd/OSDCap.h \ osd/OSDMap.h \ |