summaryrefslogtreecommitdiff
path: root/gitdb/utils
diff options
context:
space:
mode:
authorKevin Brown <kevin@kevinbrown.in>2014-07-16 20:15:31 -0400
committerKevin Brown <kevin@kevinbrown.in>2014-07-16 20:15:31 -0400
commit0465cf327d232101b2de69d714a468b7e1a66a74 (patch)
tree91cc5e78c8c065321ccf23e048deb7403ec54c59 /gitdb/utils
parent0cf09d3310cba7f33b9ebc9badf61ab721d12857 (diff)
downloadgitdb-0465cf327d232101b2de69d714a468b7e1a66a74.tar.gz
Start up compat and encoding files
There were a few things which were being reused consistently for compatibility purposes, such as the `buffer`/`memoryview` functions as well as the `izip` method which needed to be aliased for Python 3. The `buffer` function was taken from `smmap` [1] and reworked slightly to handle the optional third parameter. This also adds a compatibility file dedicated entirely to encoding issues, which seem to be the biggest problem. The main functions were taken in part from the Django project [2] and rewritten slightly because our needs are a bit more narrow. A constants file has been added to consistently handle the constants which are required for the gitdb project in the core and the tests. This is part of a greater plan to reorganize the `util.py` file included in this project. This points the async extension back at the original repository and points it to the latest commit. [1]: https://github.com/Byron/smmap/blob/1af4b42a2354acbb53c7956d647655922658fd80/smmap/util.py#L20-L26 [2]: https://github.com/django/django/blob/b8d255071ead897cf68120cd2fae7c91326ca2cc/django/utils/encoding.py
Diffstat (limited to 'gitdb/utils')
-rw-r--r--gitdb/utils/__init__.py0
-rw-r--r--gitdb/utils/compat.py39
-rw-r--r--gitdb/utils/encoding.py35
3 files changed, 74 insertions, 0 deletions
diff --git a/gitdb/utils/__init__.py b/gitdb/utils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gitdb/utils/__init__.py
diff --git a/gitdb/utils/compat.py b/gitdb/utils/compat.py
new file mode 100644
index 0000000..b9da683
--- /dev/null
+++ b/gitdb/utils/compat.py
@@ -0,0 +1,39 @@
+import sys
+
+PY3 = sys.version_info[0] == 3
+
+try:
+ from itertools import izip
+except ImportError:
+ izip = zip
+
+try:
+ # Python 2
+ buffer = buffer
+ memoryview = buffer
+except NameError:
+ # Python 3 has no `buffer`; only `memoryview`
+ def buffer(obj, offset, size=None):
+ if size is None:
+ return memoryview(obj)[offset:]
+ else:
+ return memoryview(obj[offset:offset+size])
+
+ memoryview = memoryview
+
+if PY3:
+ MAXSIZE = sys.maxsize
+else:
+ # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
+ class X(object):
+ def __len__(self):
+ return 1 << 31
+ try:
+ len(X())
+ except OverflowError:
+ # 32-bit
+ MAXSIZE = int((1 << 31) - 1)
+ else:
+ # 64-bit
+ MAXSIZE = int((1 << 63) - 1)
+ del X
diff --git a/gitdb/utils/encoding.py b/gitdb/utils/encoding.py
new file mode 100644
index 0000000..12164e7
--- /dev/null
+++ b/gitdb/utils/encoding.py
@@ -0,0 +1,35 @@
+from gitdb.utils import compat
+
+if compat.PY3:
+ string_types = (str, )
+ text_type = str
+else:
+ string_types = (basestring, )
+ text_type = unicode
+
+def force_bytes(data, encoding="utf-8"):
+ if isinstance(data, bytes):
+ return data
+
+ if isinstance(data, compat.memoryview):
+ return bytes(data)
+
+ if isinstance(data, string_types):
+ return data.encode(encoding)
+
+ return data
+
+def force_text(data, encoding="utf-8"):
+ if isinstance(data, text_type):
+ return data
+
+ if isinstance(data, string_types):
+ return data.decode(encoding)
+
+ if not isinstance(data, bytes):
+ data = force_bytes(data, encoding)
+
+ if compat.PY3:
+ return text_type(data, encoding)
+ else:
+ return text_type(data)