summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Collins <elic@assurancetechnologies.com>2016-06-22 18:30:44 -0400
committerEli Collins <elic@assurancetechnologies.com>2016-06-22 18:30:44 -0400
commitfebffda5c48ce918258cd89e200c314e92ee5bb7 (patch)
treedc808043bf1ea1b6bcafb7ac736db6f096b590aa
parentd94b6212236e433b515b009581f1c165ecf69059 (diff)
downloadpasslib-febffda5c48ce918258cd89e200c314e92ee5bb7.tar.gz
utils: pinned down unicode/bytes handling for ab64/b64s helpers
-rw-r--r--docs/modular_crypt_format.rst6
-rw-r--r--passlib/tests/test_utils.py93
-rw-r--r--passlib/tests/utils.py5
-rw-r--r--passlib/utils/__init__.py31
4 files changed, 115 insertions, 20 deletions
diff --git a/docs/modular_crypt_format.rst b/docs/modular_crypt_format.rst
index 7080897..86988a6 100644
--- a/docs/modular_crypt_format.rst
+++ b/docs/modular_crypt_format.rst
@@ -73,7 +73,7 @@ by the modular crypt format hashes found in Passlib:
they may use the ``$`` character as an internal field separator.
This is the least adhered-to of any modular crypt format convention.
- Other characters (such as ``=,-``) are sometimes
+ Other characters (such as ``+=,-``) are sometimes
used by various formats, though sparingly.
The only hard and fast stricture
@@ -81,11 +81,13 @@ by the modular crypt format hashes found in Passlib:
since this would interfere with parsing of the Unix shadow password file,
where these hashes are typically stored.
- Pretty much all modular-crypt-format hashes
+ Pretty much all older modular-crypt-format hashes
use ascii letters, numbers, ``.``, and ``/``
to provide base64 encoding of their raw data,
though the exact character value assignments vary between hashes
(see :data:`passlib.utils.h64`).
+ Many newer hashes use ``+`` instead of ``.``,
+ to adhere closer to the base64 standard.
4. Hash schemes should put their "digest" portion
at the end of the hash, preferably separated
diff --git a/passlib/tests/test_utils.py b/passlib/tests/test_utils.py
index e6183fe..19761ea 100644
--- a/passlib/tests/test_utils.py
+++ b/passlib/tests/test_utils.py
@@ -11,7 +11,7 @@ import warnings
# module
from passlib.utils import is_ascii_safe
from passlib.utils.compat import irange, PY3, u, unicode, join_bytes, PYPY
-from passlib.tests.utils import TestCase
+from passlib.tests.utils import TestCase, hb
#=============================================================================
# byte funcs
@@ -550,13 +550,94 @@ class Base64EngineTest(TestCase):
# dup charmap letter
self.assertRaises(ValueError, Base64Engine, AB64_CHARS[:-1] + "A")
- def test_ab64(self):
+ def test_ab64_decode(self):
+ """ab64_decode()"""
from passlib.utils import ab64_decode
- # TODO: make ab64_decode (and a b64 variant) *much* stricter about
- # padding chars, etc.
- # 1 mod 4 not valid
- self.assertRaises(ValueError, ab64_decode, "abcde")
+ # accept bytes or unicode
+ self.assertEqual(ab64_decode(b"abc"), hb("69b7"))
+ self.assertEqual(ab64_decode(u("abc")), hb("69b7"))
+
+ # reject non-ascii unicode
+ self.assertRaises(ValueError, ab64_decode, u("ab\xff"))
+
+ # underlying a2b_ascii treats non-base64 chars as "Incorrect padding"
+ self.assertRaises(TypeError, ab64_decode, b"ab\xff")
+ self.assertRaises(TypeError, ab64_decode, b"ab!")
+ self.assertRaises(TypeError, ab64_decode, u("ab!"))
+
+ # insert correct padding, handle dirty padding bits
+ self.assertEqual(ab64_decode(b"abcd"), hb("69b71d")) # 0 mod 4
+ self.assertRaises(ValueError, ab64_decode, b"abcde") # 1 mod 4
+ self.assertEqual(ab64_decode(b"abcdef"), hb("69b71d79")) # 2 mod 4, dirty padding bits
+ self.assertEqual(ab64_decode(b"abcdeQ"), hb("69b71d79")) # 2 mod 4, clean padding bits
+ self.assertEqual(ab64_decode(b"abcdefg"), hb("69b71d79f8")) # 3 mod 4, clean padding bits
+
+ # support "./" or "+/" altchars
+ # (lets us transition to "+/" representation, merge w/ b64s_decode)
+ self.assertEqual(ab64_decode(b"ab+/"), hb("69bfbf"))
+ self.assertEqual(ab64_decode(b"ab./"), hb("69bfbf"))
+
+ def test_ab64_encode(self):
+ """ab64_encode()"""
+ from passlib.utils import ab64_encode
+
+ # accept bytes
+ self.assertEqual(ab64_encode(hb("69b7")), b"abc")
+
+ # reject unicode
+ self.assertRaises(TypeError if PY3 else UnicodeEncodeError,
+ ab64_encode, hb("69b7").decode("latin-1"))
+
+ # insert correct padding before decoding
+ self.assertEqual(ab64_encode(hb("69b71d")), b"abcd") # 0 mod 4
+ self.assertEqual(ab64_encode(hb("69b71d79")), b"abcdeQ") # 2 mod 4
+ self.assertEqual(ab64_encode(hb("69b71d79f8")), b"abcdefg") # 3 mod 4
+
+ # output "./" altchars
+ self.assertEqual(ab64_encode(hb("69bfbf")), b"ab./")
+
+ def test_b64s_decode(self):
+ """b64s_decode()"""
+ from passlib.utils import b64s_decode
+
+ # accept bytes or unicode
+ self.assertEqual(b64s_decode(b"abc"), hb("69b7"))
+ self.assertEqual(b64s_decode(u("abc")), hb("69b7"))
+
+ # reject non-ascii unicode
+ self.assertRaises(ValueError, b64s_decode, u("ab\xff"))
+
+ # underlying a2b_ascii treats non-base64 chars as "Incorrect padding"
+ self.assertRaises(TypeError, b64s_decode, b"ab\xff")
+ self.assertRaises(TypeError, b64s_decode, b"ab!")
+ self.assertRaises(TypeError, b64s_decode, u("ab!"))
+
+ # insert correct padding, handle dirty padding bits
+ self.assertEqual(b64s_decode(b"abcd"), hb("69b71d")) # 0 mod 4
+ self.assertRaises(ValueError, b64s_decode, b"abcde") # 1 mod 4
+ self.assertEqual(b64s_decode(b"abcdef"), hb("69b71d79")) # 2 mod 4, dirty padding bits
+ self.assertEqual(b64s_decode(b"abcdeQ"), hb("69b71d79")) # 2 mod 4, clean padding bits
+ self.assertEqual(b64s_decode(b"abcdefg"), hb("69b71d79f8")) # 3 mod 4, clean padding bits
+
+ def test_b64s_encode(self):
+ """b64s_encode()"""
+ from passlib.utils import b64s_encode
+
+ # accept bytes
+ self.assertEqual(b64s_encode(hb("69b7")), b"abc")
+
+ # reject unicode
+ self.assertRaises(TypeError if PY3 else UnicodeEncodeError,
+ b64s_encode, hb("69b7").decode("latin-1"))
+
+ # insert correct padding before decoding
+ self.assertEqual(b64s_encode(hb("69b71d")), b"abcd") # 0 mod 4
+ self.assertEqual(b64s_encode(hb("69b71d79")), b"abcdeQ") # 2 mod 4
+ self.assertEqual(b64s_encode(hb("69b71d79f8")), b"abcdefg") # 3 mod 4
+
+ # output "+/" altchars
+ self.assertEqual(b64s_encode(hb("69bfbf")), b"ab+/")
class _Base64Test(TestCase):
"""common tests for all Base64Engine instances"""
diff --git a/passlib/tests/utils.py b/passlib/tests/utils.py
index 4b23082..917cae4 100644
--- a/passlib/tests/utils.py
+++ b/passlib/tests/utils.py
@@ -232,10 +232,7 @@ def hb(source):
usage: ``hb("deadbeef23")``
"""
- source = re.sub("\s", "", source)
- if PY3:
- source = source.encode("ascii")
- return unhexlify(source)
+ return unhexlify(re.sub("\s", "", source))
def limit(value, lower, upper):
if value < lower:
diff --git a/passlib/utils/__init__.py b/passlib/utils/__init__.py
index e9ddea1..e8be28c 100644
--- a/passlib/utils/__init__.py
+++ b/passlib/utils/__init__.py
@@ -1342,10 +1342,13 @@ _BASE64_STRIP = b"=\n"
_BASE64_PAD1 = b"="
_BASE64_PAD2 = b"=="
+# XXX: Passlib 1.8/1.9 -- deprecate everything that's using ab64_encode(),
+# have it start outputing b64s_encode() instead? can use a64_decode() to retain backwards compat.
+
def ab64_encode(data):
"""
- base64 encoder which omits trailing padding & whitespace.
- uses ``.`` instead of ``+``, but otherwise the same as normal base64.
+ encode using shortened base64 format which omits padding & whitespace.
+ uses custom ``./`` altchars.
it is primarily used by Passlib's custom pbkdf2 hashes.
"""
@@ -1353,25 +1356,37 @@ def ab64_encode(data):
def ab64_decode(data):
"""
- base64 decoder which omits trailing padding & whitespace.
- uses ``.`` instead of ``+``, but otherwise the same as normal base64.
+ decode from shortened base64 format which omits padding & whitespace.
+ uses custom ``./`` altchars, but supports decoding normal ``+/`` altchars as well.
it is primarily used by Passlib's custom pbkdf2 hashes.
"""
+ if isinstance(data, unicode):
+ # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
+ try:
+ data = data.encode("ascii")
+ except UnicodeEncodeError:
+ raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
return b64s_decode(data.replace(b".", b"+"))
def b64s_encode(data):
"""
- base64 encoder which omits trailing padding & whitespace.
- otherwise uses default ``+/`` altchars.
+ encode using shortened base64 format which omits padding & whitespace.
+ uses default ``+/`` altchars.
"""
return b2a_base64(data).rstrip(_BASE64_STRIP)
def b64s_decode(data):
"""
- base64 decoder which omits trailing padding & whitespace.
- otherwise uses default ``+/`` altchars.
+ decode from shortened base64 format which omits padding & whitespace.
+ uses default ``+/`` altchars.
"""
+ if isinstance(data, unicode):
+ # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
+ try:
+ data = data.encode("ascii")
+ except UnicodeEncodeError:
+ raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
off = len(data) & 3
if off == 0:
pass