diff options
author | Dana Powers <dana.powers@rd.io> | 2016-01-07 18:51:14 -0800 |
---|---|---|
committer | Dana Powers <dana.powers@rd.io> | 2016-01-07 18:51:14 -0800 |
commit | 828377377da43749af0d27ee256ef31bf714cf17 (patch) | |
tree | fbad4d4381fc4d1ea2be7ce2009214d18fbeb674 /kafka/codec.py | |
parent | 71e7568fcb8132899f366b37c32645fd5a40dc4b (diff) | |
parent | 9a8af1499ca425366d934487469d9977fae7fe5f (diff) | |
download | kafka-python-828377377da43749af0d27ee256ef31bf714cf17.tar.gz |
Merge branch '0.9'
Conflicts:
kafka/codec.py
kafka/version.py
test/test_producer.py
test/test_producer_integration.py
Diffstat (limited to 'kafka/codec.py')
-rw-r--r-- | kafka/codec.py | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/kafka/codec.py b/kafka/codec.py index c01fe20..c27d89b 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -55,24 +55,30 @@ def gzip_decode(payload): return result -def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024): - """Encodes the given data with snappy if xerial_compatible is set then the - stream is encoded in a fashion compatible with the xerial snappy library +def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32*1024): + """Encodes the given data with snappy compression. + + If xerial_compatible is set then the stream is encoded in a fashion + compatible with the xerial snappy library. + + The block size (xerial_blocksize) controls how frequent the blocking occurs + 32k is the default in the xerial library. + + The format winds up being: - The block size (xerial_blocksize) controls how frequent the blocking - occurs 32k is the default in the xerial library. - The format winds up being +-------------+------------+--------------+------------+--------------+ | Header | Block1 len | Block1 data | Blockn len | Blockn data | - |-------------+------------+--------------+------------+--------------| + +-------------+------------+--------------+------------+--------------+ | 16 bytes | BE int32 | snappy bytes | BE int32 | snappy bytes | +-------------+------------+--------------+------------+--------------+ - It is important to note that the blocksize is the amount of uncompressed - data presented to snappy at each block, whereas the blocklen is the - number of bytes that will be present in the stream, that is the - length will always be <= blocksize. + + It is important to note that the blocksize is the amount of uncompressed + data presented to snappy at each block, whereas the blocklen is the number + of bytes that will be present in the stream; so the length will always be + <= blocksize. + """ if not has_snappy(): @@ -109,9 +115,9 @@ def _detect_xerial_stream(payload): This mode writes a magic header of the format: +--------+--------------+------------+---------+--------+ | Marker | Magic String | Null / Pad | Version | Compat | - |--------+--------------+------------+---------+--------| + +--------+--------------+------------+---------+--------+ | byte | c-string | byte | int32 | int32 | - |--------+--------------+------------+---------+--------| + +--------+--------------+------------+---------+--------+ | -126 | 'SNAPPY' | \0 | | | +--------+--------------+------------+---------+--------+ |