diff options
| author | Eli Collins <elic@assurancetechnologies.com> | 2016-06-22 18:30:44 -0400 |
|---|---|---|
| committer | Eli Collins <elic@assurancetechnologies.com> | 2016-06-22 18:30:44 -0400 |
| commit | febffda5c48ce918258cd89e200c314e92ee5bb7 (patch) | |
| tree | dc808043bf1ea1b6bcafb7ac736db6f096b590aa | |
| parent | d94b6212236e433b515b009581f1c165ecf69059 (diff) | |
| download | passlib-febffda5c48ce918258cd89e200c314e92ee5bb7.tar.gz | |
utils: pinned down unicode/bytes handling for ab64/b64s helpers
| -rw-r--r-- | docs/modular_crypt_format.rst | 6 | ||||
| -rw-r--r-- | passlib/tests/test_utils.py | 93 | ||||
| -rw-r--r-- | passlib/tests/utils.py | 5 | ||||
| -rw-r--r-- | passlib/utils/__init__.py | 31 |
4 files changed, 115 insertions, 20 deletions
diff --git a/docs/modular_crypt_format.rst b/docs/modular_crypt_format.rst index 7080897..86988a6 100644 --- a/docs/modular_crypt_format.rst +++ b/docs/modular_crypt_format.rst @@ -73,7 +73,7 @@ by the modular crypt format hashes found in Passlib: they may use the ``$`` character as an internal field separator. This is the least adhered-to of any modular crypt format convention. - Other characters (such as ``=,-``) are sometimes + Other characters (such as ``+=,-``) are sometimes used by various formats, though sparingly. The only hard and fast stricture @@ -81,11 +81,13 @@ by the modular crypt format hashes found in Passlib: since this would interfere with parsing of the Unix shadow password file, where these hashes are typically stored. - Pretty much all modular-crypt-format hashes + Pretty much all older modular-crypt-format hashes use ascii letters, numbers, ``.``, and ``/`` to provide base64 encoding of their raw data, though the exact character value assignments vary between hashes (see :data:`passlib.utils.h64`). + Many newer hashes use ``+`` instead of ``.``, + to adhere closer to the base64 standard. 4. Hash schemes should put their "digest" portion at the end of the hash, preferably separated diff --git a/passlib/tests/test_utils.py b/passlib/tests/test_utils.py index e6183fe..19761ea 100644 --- a/passlib/tests/test_utils.py +++ b/passlib/tests/test_utils.py @@ -11,7 +11,7 @@ import warnings # module from passlib.utils import is_ascii_safe from passlib.utils.compat import irange, PY3, u, unicode, join_bytes, PYPY -from passlib.tests.utils import TestCase +from passlib.tests.utils import TestCase, hb #============================================================================= # byte funcs @@ -550,13 +550,94 @@ class Base64EngineTest(TestCase): # dup charmap letter self.assertRaises(ValueError, Base64Engine, AB64_CHARS[:-1] + "A") - def test_ab64(self): + def test_ab64_decode(self): + """ab64_decode()""" from passlib.utils import ab64_decode - # TODO: make ab64_decode (and a b64 variant) *much* stricter about - # padding chars, etc. - # 1 mod 4 not valid - self.assertRaises(ValueError, ab64_decode, "abcde") + # accept bytes or unicode + self.assertEqual(ab64_decode(b"abc"), hb("69b7")) + self.assertEqual(ab64_decode(u("abc")), hb("69b7")) + + # reject non-ascii unicode + self.assertRaises(ValueError, ab64_decode, u("ab\xff")) + + # underlying a2b_ascii treats non-base64 chars as "Incorrect padding" + self.assertRaises(TypeError, ab64_decode, b"ab\xff") + self.assertRaises(TypeError, ab64_decode, b"ab!") + self.assertRaises(TypeError, ab64_decode, u("ab!")) + + # insert correct padding, handle dirty padding bits + self.assertEqual(ab64_decode(b"abcd"), hb("69b71d")) # 0 mod 4 + self.assertRaises(ValueError, ab64_decode, b"abcde") # 1 mod 4 + self.assertEqual(ab64_decode(b"abcdef"), hb("69b71d79")) # 2 mod 4, dirty padding bits + self.assertEqual(ab64_decode(b"abcdeQ"), hb("69b71d79")) # 2 mod 4, clean padding bits + self.assertEqual(ab64_decode(b"abcdefg"), hb("69b71d79f8")) # 3 mod 4, clean padding bits + + # support "./" or "+/" altchars + # (lets us transition to "+/" representation, merge w/ b64s_decode) + self.assertEqual(ab64_decode(b"ab+/"), hb("69bfbf")) + self.assertEqual(ab64_decode(b"ab./"), hb("69bfbf")) + + def test_ab64_encode(self): + """ab64_encode()""" + from passlib.utils import ab64_encode + + # accept bytes + self.assertEqual(ab64_encode(hb("69b7")), b"abc") + + # reject unicode + self.assertRaises(TypeError if PY3 else UnicodeEncodeError, + ab64_encode, hb("69b7").decode("latin-1")) + + # insert correct padding before decoding + self.assertEqual(ab64_encode(hb("69b71d")), b"abcd") # 0 mod 4 + self.assertEqual(ab64_encode(hb("69b71d79")), b"abcdeQ") # 2 mod 4 + self.assertEqual(ab64_encode(hb("69b71d79f8")), b"abcdefg") # 3 mod 4 + + # output "./" altchars + self.assertEqual(ab64_encode(hb("69bfbf")), b"ab./") + + def test_b64s_decode(self): + """b64s_decode()""" + from passlib.utils import b64s_decode + + # accept bytes or unicode + self.assertEqual(b64s_decode(b"abc"), hb("69b7")) + self.assertEqual(b64s_decode(u("abc")), hb("69b7")) + + # reject non-ascii unicode + self.assertRaises(ValueError, b64s_decode, u("ab\xff")) + + # underlying a2b_ascii treats non-base64 chars as "Incorrect padding" + self.assertRaises(TypeError, b64s_decode, b"ab\xff") + self.assertRaises(TypeError, b64s_decode, b"ab!") + self.assertRaises(TypeError, b64s_decode, u("ab!")) + + # insert correct padding, handle dirty padding bits + self.assertEqual(b64s_decode(b"abcd"), hb("69b71d")) # 0 mod 4 + self.assertRaises(ValueError, b64s_decode, b"abcde") # 1 mod 4 + self.assertEqual(b64s_decode(b"abcdef"), hb("69b71d79")) # 2 mod 4, dirty padding bits + self.assertEqual(b64s_decode(b"abcdeQ"), hb("69b71d79")) # 2 mod 4, clean padding bits + self.assertEqual(b64s_decode(b"abcdefg"), hb("69b71d79f8")) # 3 mod 4, clean padding bits + + def test_b64s_encode(self): + """b64s_encode()""" + from passlib.utils import b64s_encode + + # accept bytes + self.assertEqual(b64s_encode(hb("69b7")), b"abc") + + # reject unicode + self.assertRaises(TypeError if PY3 else UnicodeEncodeError, + b64s_encode, hb("69b7").decode("latin-1")) + + # insert correct padding before decoding + self.assertEqual(b64s_encode(hb("69b71d")), b"abcd") # 0 mod 4 + self.assertEqual(b64s_encode(hb("69b71d79")), b"abcdeQ") # 2 mod 4 + self.assertEqual(b64s_encode(hb("69b71d79f8")), b"abcdefg") # 3 mod 4 + + # output "+/" altchars + self.assertEqual(b64s_encode(hb("69bfbf")), b"ab+/") class _Base64Test(TestCase): """common tests for all Base64Engine instances""" diff --git a/passlib/tests/utils.py b/passlib/tests/utils.py index 4b23082..917cae4 100644 --- a/passlib/tests/utils.py +++ b/passlib/tests/utils.py @@ -232,10 +232,7 @@ def hb(source): usage: ``hb("deadbeef23")`` """ - source = re.sub("\s", "", source) - if PY3: - source = source.encode("ascii") - return unhexlify(source) + return unhexlify(re.sub("\s", "", source)) def limit(value, lower, upper): if value < lower: diff --git a/passlib/utils/__init__.py b/passlib/utils/__init__.py index e9ddea1..e8be28c 100644 --- a/passlib/utils/__init__.py +++ b/passlib/utils/__init__.py @@ -1342,10 +1342,13 @@ _BASE64_STRIP = b"=\n" _BASE64_PAD1 = b"=" _BASE64_PAD2 = b"==" +# XXX: Passlib 1.8/1.9 -- deprecate everything that's using ab64_encode(), +# have it start outputing b64s_encode() instead? can use a64_decode() to retain backwards compat. + def ab64_encode(data): """ - base64 encoder which omits trailing padding & whitespace. - uses ``.`` instead of ``+``, but otherwise the same as normal base64. + encode using shortened base64 format which omits padding & whitespace. + uses custom ``./`` altchars. it is primarily used by Passlib's custom pbkdf2 hashes. """ @@ -1353,25 +1356,37 @@ def ab64_encode(data): def ab64_decode(data): """ - base64 decoder which omits trailing padding & whitespace. - uses ``.`` instead of ``+``, but otherwise the same as normal base64. + decode from shortened base64 format which omits padding & whitespace. + uses custom ``./`` altchars, but supports decoding normal ``+/`` altchars as well. it is primarily used by Passlib's custom pbkdf2 hashes. """ + if isinstance(data, unicode): + # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64() + try: + data = data.encode("ascii") + except UnicodeEncodeError: + raise suppress_cause(ValueError("string argument should contain only ASCII characters")) return b64s_decode(data.replace(b".", b"+")) def b64s_encode(data): """ - base64 encoder which omits trailing padding & whitespace. - otherwise uses default ``+/`` altchars. + encode using shortened base64 format which omits padding & whitespace. + uses default ``+/`` altchars. """ return b2a_base64(data).rstrip(_BASE64_STRIP) def b64s_decode(data): """ - base64 decoder which omits trailing padding & whitespace. - otherwise uses default ``+/`` altchars. + decode from shortened base64 format which omits padding & whitespace. + uses default ``+/`` altchars. """ + if isinstance(data, unicode): + # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64() + try: + data = data.encode("ascii") + except UnicodeEncodeError: + raise suppress_cause(ValueError("string argument should contain only ASCII characters")) off = len(data) & 3 if off == 0: pass |
