diff options
Diffstat (limited to 'rdflib/parser.py')
| -rw-r--r-- | rdflib/parser.py | 104 |
1 files changed, 85 insertions, 19 deletions
diff --git a/rdflib/parser.py b/rdflib/parser.py index 9e501c03..73ce2ba7 100644 --- a/rdflib/parser.py +++ b/rdflib/parser.py @@ -9,15 +9,11 @@ can plugin to rdflib. If you are wanting to invoke a parser you likely want to do so through the Graph class parse method. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - +import codecs import os import sys -from io import BytesIO - +from io import BytesIO, TextIOBase, TextIOWrapper, StringIO, BufferedIOBase from urllib.request import pathname2url from urllib.request import Request @@ -41,6 +37,8 @@ __all__ = [ class Parser(object): + __slots__ = set() + def __init__(self): pass @@ -48,6 +46,37 @@ class Parser(object): pass +class BytesIOWrapper(BufferedIOBase): + __slots__ = ("wrapped", "encoded", "encoding") + + def __init__(self, wrapped: str, encoding="utf-8"): + super(BytesIOWrapper, self).__init__() + self.wrapped = wrapped + self.encoding = encoding + self.encoded = None + + def read(self, *args, **kwargs): + if self.encoded is None: + b, blen = codecs.getencoder(self.encoding)(self.wrapped) + self.encoded = BytesIO(b) + return self.encoded.read(*args, **kwargs) + + def read1(self, *args, **kwargs): + if self.encoded is None: + b = codecs.getencoder(self.encoding)(self.wrapped) + self.encoded = BytesIO(b) + return self.encoded.read1(*args, **kwargs) + + def readinto(self, *args, **kwargs): + raise NotImplementedError() + + def readinto1(self, *args, **kwargs): + raise NotImplementedError() + + def write(self, *args, **kwargs): + raise NotImplementedError() + + class InputSource(xmlreader.InputSource, object): """ TODO: @@ -59,23 +88,39 @@ class InputSource(xmlreader.InputSource, object): self.auto_close = False # see Graph.parse(), true if opened by us def close(self): + c = self.getCharacterStream() + if c and hasattr(c, "close"): + try: + c.close() + except Exception: + pass f = self.getByteStream() if f and hasattr(f, "close"): - f.close() + try: + f.close() + except Exception: + pass class StringInputSource(InputSource): """ - TODO: + Constructs an RDFLib Parser InputSource from a Python String or Bytes """ - def __init__(self, value, system_id=None): + def __init__(self, value, encoding="utf-8", system_id=None): super(StringInputSource, self).__init__(system_id) - stream = BytesIO(value) - self.setByteStream(stream) - # TODO: - # encoding = value.encoding - # self.setEncoding(encoding) + if isinstance(value, str): + stream = StringIO(value) + self.setCharacterStream(stream) + self.setEncoding(encoding) + b_stream = BytesIOWrapper(value, encoding) + self.setByteStream(b_stream) + else: + stream = BytesIO(value) + self.setByteStream(stream) + c_stream = TextIOWrapper(stream, encoding) + self.setCharacterStream(c_stream) + self.setEncoding(c_stream.encoding) headers = { @@ -134,8 +179,18 @@ class FileInputSource(InputSource): system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base) super(FileInputSource, self).__init__(system_id) self.file = file - self.setByteStream(file) - # TODO: self.setEncoding(encoding) + if isinstance(file, TextIOBase): # Python3 unicode fp + self.setCharacterStream(file) + self.setEncoding(file.encoding) + try: + b = file.buffer + self.setByteStream(b) + except (AttributeError, LookupError): + self.setByteStream(file) + else: + self.setByteStream(file) + # We cannot set characterStream here because + # we do not know the Raw Bytes File encoding. def __repr__(self): return repr(self.file) @@ -171,10 +226,21 @@ def create_input_source( else: if isinstance(source, str): location = source + elif isinstance(source, bytes): + data = source elif hasattr(source, "read") and not isinstance(source, Namespace): f = source input_source = InputSource() - input_source.setByteStream(f) + if hasattr(source, "encoding"): + input_source.setCharacterStream(source) + input_source.setEncoding(source.encoding) + try: + b = file.buffer + input_source.setByteStream(b) + except (AttributeError, LookupError): + input_source.setByteStream(source) + else: + input_source.setByteStream(f) if f is sys.stdin: input_source.setSystemId("file:///dev/stdin") elif hasattr(f, "name"): @@ -206,8 +272,8 @@ def create_input_source( input_source = FileInputSource(file) if data is not None: - if isinstance(data, str): - data = data.encode("utf-8") + if not isinstance(data, (str, bytes, bytearray)): + raise RuntimeError("parse data can only str, or bytes.") input_source = StringInputSource(data) auto_close = True |
