summaryrefslogtreecommitdiff
path: root/tests/test_stringio.py
blob: 89e784f7688e698ae2d82485032e8e1a29fd0e58 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from io import StringIO

ENCODING = 'utf-8'

S0 = 'hello world, Umlauts: äöüßÄÖÜ, Chinese: 四是四,十是十,十四是十四,四十是四十,四十四隻不識字之石獅子是死的'
S0_BYTES = 'fe fi foe fam'.encode(ENCODING)

#print("###", StringIO, "###")


def test_001_text():
    # If we throw unicode into the StringIO buffer, we'll
    # get unicode out of it.
    assert isinstance(S0, str)
    buf = StringIO()
    print(S0, file=buf, end="")
    s1 = buf.getvalue()
    assert isinstance(S0, type(s1))
    assert S0 == s1
    assert isinstance(s1, str)


def test_002_bytes():
    buf = StringIO()
    print(S0_BYTES, file=buf, end="")
    s1 = buf.getvalue()

    # In Python 3 StringIO *ALWAYS* returns str (=text=unicode) !
    # Even if we originally write bytes into the buffer, the value
    # we get out of it has type str!

    # Input is bytes
    assert isinstance(S0_BYTES, bytes)
    # Output is NOT bytes...
    assert not isinstance(S0_BYTES, type(s1))
    assert not isinstance(s1, bytes)
    # ...but str!
    assert isinstance(s1, str)
    # So the contents are not equal!
    assert S0_BYTES != s1
    # StringIO coerced bytes into str:
    # b'xyz' ---> "b'xyz'"
    assert str(S0_BYTES) == s1
    # See, the type info is literally present in the output str!
    assert "b'" + str(S0_BYTES, encoding=ENCODING) + "'" == s1
    # Coercion is NOT decoding!
    assert S0_BYTES.decode(ENCODING) != s1
    assert str(S0_BYTES, encoding=ENCODING) != s1
    # These are the same
    assert S0_BYTES.decode(ENCODING) == str(S0_BYTES, encoding=ENCODING)
    # Additional note:
    # If we do not specify an encoding when we create a StringIO
    # buffer, Python 3 automatically uses the locale's preferred
    # encoding: locale.getpreferredencoding()
    # Cf. http://docs.python.org/release/3.0.1/library/io.html#io.TextIOWrapper
    # In my case this is the same encoding as the encoding of this source file,
    # namely UTF-8. If on your system both encodings are different, you may
    # encounter other results than the above.
    #
    # In Python 3.2 the signature of StringIO() has changed. It is no more
    # possible to specify an encoding here.