diff options
| author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2022-11-07 11:00:43 +0000 |
|---|---|---|
| committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2022-11-07 11:00:43 +0000 |
| commit | fc59cfc7455fd34ab1e293b4e5ac54cffbc7502f (patch) | |
| tree | 9a2ac7a897a11911b5f974f3e30a1e1ead5d597b /docutils/test | |
| parent | 3a1df7d472649aa3241e57e1b3341d7ee58d087d (diff) | |
| download | docutils-fc59cfc7455fd34ab1e293b4e5ac54cffbc7502f.tar.gz | |
Simplify and expand tests for handling the encoding of included files.
Use a simpler sample file for signed UTF-16 input.
Test encoding auto-detection
to ensure it is in sync with encoding handling for the main document.
git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9220 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/test')
3 files changed, 114 insertions, 26 deletions
diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_include.py b/docutils/test/test_parsers/test_rst/test_directives/test_include.py index 2e1c41b86..32f3f88cd 100755 --- a/docutils/test/test_parsers/test_rst/test_directives/test_include.py +++ b/docutils/test/test_parsers/test_rst/test_directives/test_include.py @@ -55,8 +55,8 @@ include15 = mydir('includes/include15.txt') include16 = mydir('includes/include16.txt') include_literal = mydir('include_literal.txt') include_md = mydir('include.md') -utf_16_file = mydir('utf-16.csv') -utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xfe " +utf_16_file = 'data/utf-16-le-sig.txt' +utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xff " "in position 0: ordinal not in range(128)") nonexistent = os.path.join(os.path.dirname(parsers.rst.states.__file__), 'include', 'nonexistent') @@ -498,17 +498,39 @@ Encoding: .. include:: %s :encoding: utf-16 """ % reldir(utf_16_file), -b"""\ +"""\ <document source="test data"> <paragraph> Encoding: <paragraph> - "Treat", "Quantity", "Description" - "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!" - "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be - crunchy, now would it?" - "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?" -""".decode('raw_unicode_escape')], + Grüße +"""], +["""\ +Default encoding: auto-determine (here via BOM). + +.. include:: %s +""" % reldir(utf_16_file), +"""\ +<document source="test data"> + <paragraph> + Default encoding: auto-determine (here via BOM). + <paragraph> + Grüße +"""], +["""\ +Default encoding: auto-determine (via encoding declaration). + +.. include:: data/latin2.txt +""", +"""\ +<document source="test data"> + <paragraph> + Default encoding: auto-determine (via encoding declaration). + <comment xml:space="preserve"> + -*- encoding: latin2 -*- + <paragraph> + škoda +"""], ["""\ Include file is UTF-16-encoded, and is not valid ASCII. diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_raw.py b/docutils/test/test_parsers/test_rst/test_directives/test_raw.py index a37224c53..bb1c87e79 100755 --- a/docutils/test/test_parsers/test_rst/test_directives/test_raw.py +++ b/docutils/test/test_parsers/test_rst/test_directives/test_raw.py @@ -23,9 +23,8 @@ def suite(): mydir = 'test_parsers/test_rst/test_directives/' raw1 = os.path.join(mydir, 'raw1.txt') -utf_16_file = os.path.join(mydir, 'utf-16.csv') -utf_16_file_rel = utils.relative_path(None, utf_16_file) -utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xfe " +utf_16_file = 'data/utf-16-le-sig.txt' +utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xff " "in position 0: ordinal not in range(128)") totest = {} @@ -94,25 +93,34 @@ totest['raw'] = [ """], ["""\ .. raw:: html - :file: %s + :file: data/utf-16-le-sig.txt :encoding: utf-16 -""" % utf_16_file_rel, -b"""\ +""", +"""\ <document source="test data"> - <raw format="html" source="%s" xml:space="preserve"> - "Treat", "Quantity", "Description" - "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!" - "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be - crunchy, now would it?" - "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?" -""".decode('raw_unicode_escape') % utf_16_file_rel], + <raw format="html" source="data/utf-16-le-sig.txt" xml:space="preserve"> + Grüße +"""], +["""\ +Default encoding: auto-determine (here via BOM). + +.. raw:: html + :file: data/utf-16-le-sig.txt +""", +"""\ +<document source="test data"> + <paragraph> + Default encoding: auto-determine (here via BOM). + <raw format="html" source="data/utf-16-le-sig.txt" xml:space="preserve"> + Grüße +"""], ["""\ Raw input file is UTF-16-encoded, and is not valid ASCII. .. raw:: html - :file: %s + :file: data/utf-16-le-sig.txt :encoding: ascii -""" % utf_16_file_rel, +""", """\ <document source="test data"> <paragraph> @@ -123,9 +131,9 @@ Raw input file is UTF-16-encoded, and is not valid ASCII. %s <literal_block xml:space="preserve"> .. raw:: html - :file: %s + :file: data/utf-16-le-sig.txt :encoding: ascii -""" % (utf_16_error_str, utf_16_file_rel)], +""" % utf_16_error_str], ["""\ .. raw:: html :encoding: utf-8 diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_tables.py b/docutils/test/test_parsers/test_rst/test_directives/test_tables.py index a8a73ced2..dc6b065fe 100755 --- a/docutils/test/test_parsers/test_rst/test_directives/test_tables.py +++ b/docutils/test/test_parsers/test_rst/test_directives/test_tables.py @@ -1165,6 +1165,64 @@ bad_encoding_result \u00bfOn a \u03c3\u03c4\u03b9\u03ba? """], ["""\ +.. csv-table:: auto encoding + :file: %s + :header-rows: 1 +""" % utf_16_csv, +"""\ +<document source="test data"> + <table> + <title> + auto encoding + <tgroup cols="3"> + <colspec colwidth="33"> + <colspec colwidth="33"> + <colspec colwidth="33"> + <thead> + <row> + <entry> + <paragraph> + Treat + <entry> + <paragraph> + Quantity + <entry> + <paragraph> + Description + <tbody> + <row> + <entry> + <paragraph> + Albatr\u00b0\u00df + <entry> + <paragraph> + 2.99 + <entry> + <paragraph> + \u00a1On a \u03c3\u03c4\u03b9\u03ba! + <row> + <entry> + <paragraph> + Crunchy Frog + <entry> + <paragraph> + 1.49 + <entry> + <paragraph> + If we took the b\u00f6nes out, it wouldn\u2019t be + crunchy, now would it? + <row> + <entry> + <paragraph> + Gannet Ripple + <entry> + <paragraph> + 1.99 + <entry> + <paragraph> + \u00bfOn a \u03c3\u03c4\u03b9\u03ba? +"""], +["""\ .. csv-table:: no CSV data :file: %s """ % empty_txt, |
