summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakayuki Shimizukawa <shimizukawa@gmail.com>2012-05-01 15:13:06 +0900
committerTakayuki Shimizukawa <shimizukawa@gmail.com>2012-05-01 15:13:06 +0900
commit1bb4923da1aa1878af44c4608d63025e4fae9d3b (patch)
tree555db29968bd2f149c3d04ddd90aba0fc7705fdb
parent9af2094ca44265c892d59792156db4950475b18b (diff)
downloadsphinx-git-1bb4923da1aa1878af44c4608d63025e4fae9d3b.tar.gz
support multibyte filename handling.
https://bitbucket.org/birkenfeld/sphinx/issue/703
-rw-r--r--sphinx/builders/epub.py9
-rw-r--r--sphinx/cmdline.py16
-rw-r--r--sphinx/config.py7
-rw-r--r--sphinx/environment.py6
-rw-r--r--sphinx/util/osutil.py3
-rw-r--r--tests/path.py12
-rw-r--r--tests/test_build.py20
7 files changed, 55 insertions, 18 deletions
diff --git a/sphinx/builders/epub.py b/sphinx/builders/epub.py
index b4c3b2776..c30fe735d 100644
--- a/sphinx/builders/epub.py
+++ b/sphinx/builders/epub.py
@@ -662,7 +662,12 @@ class EpubBuilder(StandaloneHTMLBuilder):
zipfile.ZIP_STORED)
for file in projectfiles:
fp = path.join(outdir, file)
- if isinstance(fp, unicode):
- fp = fp.encode(sys.getfilesystemencoding())
+ if sys.version_info < (2, 6):
+ # When zipile.ZipFile.write call with unicode filename, ZipFile
+ # encode filename to 'utf-8' (only after Python-2.6).
+ if isinstance(file, unicode):
+ # OEBPS Container Format (OCF) 2.0.1 specification require
+ # "File Names MUST be UTF-8 encoded".
+ file = file.encode('utf-8')
epub.write(fp, file, zipfile.ZIP_DEFLATED)
epub.close()
diff --git a/sphinx/cmdline.py b/sphinx/cmdline.py
index 9fc213716..e9d707656 100644
--- a/sphinx/cmdline.py
+++ b/sphinx/cmdline.py
@@ -22,9 +22,17 @@ from sphinx.errors import SphinxError
from sphinx.application import Sphinx
from sphinx.util import Tee, format_exception_cut_frames, save_traceback
from sphinx.util.console import red, nocolor, color_terminal
+from sphinx.util.osutil import fs_encoding
from sphinx.util.pycompat import terminal_safe, bytes
+def abspath(pathdir):
+ pathdir = path.abspath(pathdir)
+ if isinstance(pathdir, bytes):
+ pathdir = pathdir.decode(fs_encoding)
+ return pathdir
+
+
def usage(argv, msg=None):
if msg:
print >>sys.stderr, msg
@@ -65,7 +73,7 @@ def main(argv):
try:
opts, args = getopt.getopt(argv[1:], 'ab:t:d:c:CD:A:ng:NEqQWw:P')
allopts = set(opt[0] for opt in opts)
- srcdir = confdir = path.abspath(args[0])
+ srcdir = confdir = abspath(args[0])
if not path.isdir(srcdir):
print >>sys.stderr, 'Error: Cannot find source directory `%s\'.' % (
srcdir,)
@@ -75,7 +83,7 @@ def main(argv):
print >>sys.stderr, ('Error: Source directory doesn\'t '
'contain conf.py file.')
return 1
- outdir = path.abspath(args[1])
+ outdir = abspath(args[1])
if not path.isdir(outdir):
print >>sys.stderr, 'Making output directory...'
os.makedirs(outdir)
@@ -119,9 +127,9 @@ def main(argv):
elif opt == '-t':
tags.append(val)
elif opt == '-d':
- doctreedir = path.abspath(val)
+ doctreedir = abspath(val)
elif opt == '-c':
- confdir = path.abspath(val)
+ confdir = abspath(val)
if not path.isfile(path.join(confdir, 'conf.py')):
print >>sys.stderr, ('Error: Configuration directory '
'doesn\'t contain conf.py file.')
diff --git a/sphinx/config.py b/sphinx/config.py
index 2012634e1..c020992b5 100644
--- a/sphinx/config.py
+++ b/sphinx/config.py
@@ -16,7 +16,7 @@ from os import path
from sphinx.errors import ConfigError
from sphinx.locale import l_
-from sphinx.util.osutil import make_filename
+from sphinx.util.osutil import make_filename, fs_encoding
from sphinx.util.pycompat import bytes, b, convert_with_2to3
nonascii_re = re.compile(b(r'[\x80-\xff]'))
@@ -208,14 +208,15 @@ class Config(object):
f.close()
try:
# compile to a code object, handle syntax errors
+ config_file_enc = config_file.encode(fs_encoding)
try:
- code = compile(source, config_file, 'exec')
+ code = compile(source, config_file_enc, 'exec')
except SyntaxError:
if convert_with_2to3:
# maybe the file uses 2.x syntax; try to refactor to
# 3.x syntax using 2to3
source = convert_with_2to3(config_file)
- code = compile(source, config_file, 'exec')
+ code = compile(source, config_file_enc, 'exec')
else:
raise
exec code in config
diff --git a/sphinx/environment.py b/sphinx/environment.py
index 5661c468a..ec3023905 100644
--- a/sphinx/environment.py
+++ b/sphinx/environment.py
@@ -41,7 +41,8 @@ from sphinx.util import url_re, get_matching_docs, docname_join, split_into, \
FilenameUniqDict
from sphinx.util.nodes import clean_astext, make_refnode, extract_messages, \
WarningStream
-from sphinx.util.osutil import movefile, SEP, ustrftime, find_catalog
+from sphinx.util.osutil import movefile, SEP, ustrftime, find_catalog, \
+ fs_encoding
from sphinx.util.matching import compile_matchers
from sphinx.util.pycompat import all, class_types
from sphinx.util.websupport import is_commentable
@@ -49,7 +50,6 @@ from sphinx.errors import SphinxError, ExtensionError
from sphinx.locale import _, init as init_locale
from sphinx.versioning import add_uids, merge_doctrees
-fs_encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
orig_role_function = roles.role
orig_directive_function = directives.directive
@@ -1321,7 +1321,7 @@ class BuildEnvironment:
def _entries_from_toctree(toctreenode, parents,
separate=False, subtree=False):
"""Return TOC entries for a toctree node."""
- refs = [(e[0], str(e[1])) for e in toctreenode['entries']]
+ refs = [(e[0], e[1]) for e in toctreenode['entries']]
entries = []
for (title, ref) in refs:
try:
diff --git a/sphinx/util/osutil.py b/sphinx/util/osutil.py
index 5becc37df..8dc3b9d36 100644
--- a/sphinx/util/osutil.py
+++ b/sphinx/util/osutil.py
@@ -148,3 +148,6 @@ def find_catalog(docname, compaction):
ret = docname
return ret
+
+fs_encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
+
diff --git a/tests/path.py b/tests/path.py
index 8e9afeaa8..bd0552d70 100644
--- a/tests/path.py
+++ b/tests/path.py
@@ -16,16 +16,16 @@ from codecs import open
FILESYSTEMENCODING = sys.getfilesystemencoding() or sys.getdefaultencoding()
-class path(str):
+class path(unicode):
"""
Represents a path which behaves like a string.
"""
if sys.version_info < (3, 0):
def __new__(cls, s, encoding=FILESYSTEMENCODING, errors='strict'):
- if isinstance(s, unicode):
- s = s.encode(encoding, errors=errors)
- return str.__new__(cls, s)
- return str.__new__(cls, s)
+ if isinstance(s, str):
+ s = s.decode(encoding, errors)
+ return unicode.__new__(cls, s)
+ return unicode.__new__(cls, s)
@property
def parent(self):
@@ -193,4 +193,4 @@ class path(str):
__div__ = __truediv__ = joinpath
def __repr__(self):
- return '%s(%s)' % (self.__class__.__name__, str.__repr__(self))
+ return '%s(%s)' % (self.__class__.__name__, unicode.__repr__(self))
diff --git a/tests/test_build.py b/tests/test_build.py
index 5f24f89ac..ae3f29211 100644
--- a/tests/test_build.py
+++ b/tests/test_build.py
@@ -10,6 +10,7 @@
"""
from util import *
+from textwrap import dedent
def teardown_module():
@@ -61,3 +62,22 @@ else:
@with_app(buildername='singlehtml', cleanenv=True)
def test_singlehtml(app):
app.builder.build_all()
+
+@with_app(buildername='html', srcdir='(temp)')
+def test_multibyte_path(app):
+ srcdir = path(app.srcdir)
+ mb_name = u'\u65e5\u672c\u8a9e'
+ (srcdir / mb_name).makedirs()
+ (srcdir / mb_name / (mb_name + '.txt')).write_text(dedent("""
+ multi byte file name page
+ ==========================
+ """))
+
+ master_doc = srcdir / 'contents.txt'
+ master_doc.write_bytes((master_doc.text() + dedent("""
+ .. toctree::
+
+ %(mb_name)s/%(mb_name)s
+ """ % locals())
+ ).encode('utf-8'))
+ app.builder.build_all()