summaryrefslogtreecommitdiff
path: root/kafka/codec.py
diff options
context:
space:
mode:
authorDana Powers <dana.powers@rd.io>2016-01-07 18:51:14 -0800
committerDana Powers <dana.powers@rd.io>2016-01-07 18:51:14 -0800
commit828377377da43749af0d27ee256ef31bf714cf17 (patch)
treefbad4d4381fc4d1ea2be7ce2009214d18fbeb674 /kafka/codec.py
parent71e7568fcb8132899f366b37c32645fd5a40dc4b (diff)
parent9a8af1499ca425366d934487469d9977fae7fe5f (diff)
downloadkafka-python-828377377da43749af0d27ee256ef31bf714cf17.tar.gz
Merge branch '0.9'
Conflicts: kafka/codec.py kafka/version.py test/test_producer.py test/test_producer_integration.py
Diffstat (limited to 'kafka/codec.py')
-rw-r--r--kafka/codec.py32
1 files changed, 19 insertions, 13 deletions
diff --git a/kafka/codec.py b/kafka/codec.py
index c01fe20..c27d89b 100644
--- a/kafka/codec.py
+++ b/kafka/codec.py
@@ -55,24 +55,30 @@ def gzip_decode(payload):
return result
-def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
- """Encodes the given data with snappy if xerial_compatible is set then the
- stream is encoded in a fashion compatible with the xerial snappy library
+def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32*1024):
+ """Encodes the given data with snappy compression.
+
+ If xerial_compatible is set then the stream is encoded in a fashion
+ compatible with the xerial snappy library.
+
+ The block size (xerial_blocksize) controls how frequent the blocking occurs
+ 32k is the default in the xerial library.
+
+ The format winds up being:
- The block size (xerial_blocksize) controls how frequent the blocking
- occurs 32k is the default in the xerial library.
- The format winds up being
+-------------+------------+--------------+------------+--------------+
| Header | Block1 len | Block1 data | Blockn len | Blockn data |
- |-------------+------------+--------------+------------+--------------|
+ +-------------+------------+--------------+------------+--------------+
| 16 bytes | BE int32 | snappy bytes | BE int32 | snappy bytes |
+-------------+------------+--------------+------------+--------------+
- It is important to note that the blocksize is the amount of uncompressed
- data presented to snappy at each block, whereas the blocklen is the
- number of bytes that will be present in the stream, that is the
- length will always be <= blocksize.
+
+ It is important to note that the blocksize is the amount of uncompressed
+ data presented to snappy at each block, whereas the blocklen is the number
+ of bytes that will be present in the stream; so the length will always be
+ <= blocksize.
+
"""
if not has_snappy():
@@ -109,9 +115,9 @@ def _detect_xerial_stream(payload):
This mode writes a magic header of the format:
+--------+--------------+------------+---------+--------+
| Marker | Magic String | Null / Pad | Version | Compat |
- |--------+--------------+------------+---------+--------|
+ +--------+--------------+------------+---------+--------+
| byte | c-string | byte | int32 | int32 |
- |--------+--------------+------------+---------+--------|
+ +--------+--------------+------------+---------+--------+
| -126 | 'SNAPPY' | \0 | | |
+--------+--------------+------------+---------+--------+