1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
from io import StringIO
ENCODING = 'utf-8'
S0 = 'hello world, Umlauts: äöüßÄÖÜ, Chinese: 四是四,十是十,十四是十四,四十是四十,四十四隻不識字之石獅子是死的'
S0_BYTES = 'fe fi foe fam'.encode(ENCODING)
#print("###", StringIO, "###")
def test_001_text():
# If we throw unicode into the StringIO buffer, we'll
# get unicode out of it.
assert isinstance(S0, str)
buf = StringIO()
print(S0, file=buf, end="")
s1 = buf.getvalue()
assert isinstance(S0, type(s1))
assert S0 == s1
assert isinstance(s1, str)
def test_002_bytes():
buf = StringIO()
print(S0_BYTES, file=buf, end="")
s1 = buf.getvalue()
# In Python 3 StringIO *ALWAYS* returns str (=text=unicode) !
# Even if we originally write bytes into the buffer, the value
# we get out of it has type str!
# Input is bytes
assert isinstance(S0_BYTES, bytes)
# Output is NOT bytes...
assert not isinstance(S0_BYTES, type(s1))
assert not isinstance(s1, bytes)
# ...but str!
assert isinstance(s1, str)
# So the contents are not equal!
assert S0_BYTES != s1
# StringIO coerced bytes into str:
# b'xyz' ---> "b'xyz'"
assert str(S0_BYTES) == s1
# See, the type info is literally present in the output str!
assert "b'" + str(S0_BYTES, encoding=ENCODING) + "'" == s1
# Coercion is NOT decoding!
assert S0_BYTES.decode(ENCODING) != s1
assert str(S0_BYTES, encoding=ENCODING) != s1
# These are the same
assert S0_BYTES.decode(ENCODING) == str(S0_BYTES, encoding=ENCODING)
# Additional note:
# If we do not specify an encoding when we create a StringIO
# buffer, Python 3 automatically uses the locale's preferred
# encoding: locale.getpreferredencoding()
# Cf. http://docs.python.org/release/3.0.1/library/io.html#io.TextIOWrapper
# In my case this is the same encoding as the encoding of this source file,
# namely UTF-8. If on your system both encodings are different, you may
# encounter other results than the above.
#
# In Python 3.2 the signature of StringIO() has changed. It is no more
# possible to specify an encoding here.
|