diff options
| author | Iuri de Silvio <iurisilvio@gmail.com> | 2014-08-26 08:24:36 -0300 |
|---|---|---|
| committer | Iuri de Silvio <iurisilvio@gmail.com> | 2014-08-26 08:24:36 -0300 |
| commit | 48e576954d4570edc6fca07155d68d9d7a4a4c28 (patch) | |
| tree | 25c0a36283bdec214c5629459040a26f84924ba1 /tablib | |
| parent | 8479df725e1e02e4380acaba2cf93a74ca63b84f (diff) | |
| parent | a21f8187f8bdbe27434478da45fa58f3e2158268 (diff) | |
| download | tablib-48e576954d4570edc6fca07155d68d9d7a4a4c28.tar.gz | |
Merge pull request #153 from phargogh/dbf-support
Support for dBase (DBF) files
Diffstat (limited to 'tablib')
| -rw-r--r-- | tablib/compat.py | 2 | ||||
| -rw-r--r-- | tablib/core.py | 23 | ||||
| -rw-r--r-- | tablib/formats/__init__.py | 3 | ||||
| -rw-r--r-- | tablib/formats/_dbf.py | 93 | ||||
| -rw-r--r-- | tablib/packages/dbfpy/__init__.py | 0 | ||||
| -rw-r--r-- | tablib/packages/dbfpy/dbf.py | 292 | ||||
| -rw-r--r-- | tablib/packages/dbfpy/dbfnew.py | 188 | ||||
| -rw-r--r-- | tablib/packages/dbfpy/fields.py | 466 | ||||
| -rw-r--r-- | tablib/packages/dbfpy/header.py | 275 | ||||
| -rw-r--r-- | tablib/packages/dbfpy/record.py | 262 | ||||
| -rw-r--r-- | tablib/packages/dbfpy/utils.py | 170 | ||||
| -rw-r--r-- | tablib/packages/dbfpy3/__init__.py | 0 | ||||
| -rw-r--r-- | tablib/packages/dbfpy3/dbf.py | 293 | ||||
| -rw-r--r-- | tablib/packages/dbfpy3/dbfnew.py | 182 | ||||
| -rw-r--r-- | tablib/packages/dbfpy3/fields.py | 467 | ||||
| -rw-r--r-- | tablib/packages/dbfpy3/header.py | 273 | ||||
| -rw-r--r-- | tablib/packages/dbfpy3/record.py | 266 | ||||
| -rw-r--r-- | tablib/packages/dbfpy3/utils.py | 170 |
18 files changed, 3424 insertions, 1 deletions
diff --git a/tablib/compat.py b/tablib/compat.py index 919f464..d4582d5 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -28,6 +28,7 @@ if is_py3: from tablib.packages import markup3 as markup from tablib.packages import openpyxl3 as openpyxl from tablib.packages.odf3 import opendocument, style, text, table + import tablib.packages.dbfpy3 as dbfpy import csv from io import StringIO @@ -49,5 +50,6 @@ else: from tablib.packages.odf import opendocument, style, text, table from tablib.packages import unicodecsv as csv + import tablib.packages.dbfpy as dbfpy unicode = unicode diff --git a/tablib/core.py b/tablib/core.py index 9db46c9..02c9085 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -559,6 +559,29 @@ class Dataset(object): """ pass + @property + def dbf(): + """A dBASE representation of the :class:`Dataset` object. + + A dataset object can also be imported by setting the :class:`Dataset.dbf` attribute::: + + # To import data from an existing DBF file: + data = tablib.Dataset() + data.dbf = open('existing_table.dbf').read() + + # to import data from an ASCII-encoded bytestring: + data = tablib.Dataset() + data.dbf = <bytestring of tabular data> + + .. admonition:: Binary Warning + + :class:`Dataset.dbf` contains binary data, so make sure to write in binary mode:: + + with open('output.dbf', 'wb') as f: + f.write(data.dbf) + """ + pass + # ---- # Rows diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 5fdf279..1eda107 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -11,5 +11,6 @@ from . import _tsv as tsv from . import _html as html from . import _xlsx as xlsx from . import _ods as ods +from . import _dbf as dbf -available = (json, xls, yaml, csv, tsv, html, xlsx, ods) +available = (json, xls, yaml, csv, dbf, tsv, html, xlsx, ods) diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py new file mode 100644 index 0000000..41c2ef4 --- /dev/null +++ b/tablib/formats/_dbf.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- + +""" Tablib - DBF Support. +""" +import tempfile +import struct +import os + +from tablib.compat import StringIO +from tablib.compat import dbfpy +from tablib.compat import is_py3 + +if is_py3: + from tablib.packages.dbfpy3 import dbf + from tablib.packages.dbfpy3 import dbfnew + from tablib.packages.dbfpy3 import record as dbfrecord + import io +else: + from tablib.packages.dbfpy import dbf + from tablib.packages.dbfpy import dbfnew + from tablib.packages.dbfpy import record as dbfrecord + + +title = 'dbf' +extensions = ('csv',) + +DEFAULT_ENCODING = 'utf-8' + +def export_set(dataset): + """Returns DBF representation of a Dataset""" + new_dbf = dbfnew.dbf_new() + temp_file, temp_uri = tempfile.mkstemp() + + # create the appropriate fields based on the contents of the first row + first_row = dataset[0] + for fieldname, field_value in zip(dataset.headers, first_row): + if type(field_value) in [int, float]: + new_dbf.add_field(fieldname, 'N', 10, 8) + else: + new_dbf.add_field(fieldname, 'C', 80) + + new_dbf.write(temp_uri) + + dbf_file = dbf.Dbf(temp_uri, readOnly=0) + for row in dataset: + record = dbfrecord.DbfRecord(dbf_file) + for fieldname, field_value in zip(dataset.headers, row): + record[fieldname] = field_value + record.store() + + dbf_file.close() + dbf_stream = open(temp_uri, 'rb') + if is_py3: + stream = io.BytesIO(dbf_stream.read()) + else: + stream = StringIO(dbf_stream.read()) + dbf_stream.close() + os.remove(temp_uri) + return stream.getvalue() + +def import_set(dset, in_stream, headers=True): + """Returns a dataset from a DBF stream.""" + + dset.wipe() + if is_py3: + _dbf = dbf.Dbf(io.BytesIO(in_stream)) + else: + _dbf = dbf.Dbf(StringIO(in_stream)) + dset.headers = _dbf.fieldNames + for record in range(_dbf.recordCount): + row = [_dbf[record][f] for f in _dbf.fieldNames] + dset.append(row) + +def detect(stream): + """Returns True if the given stream is valid DBF""" + #_dbf = dbf.Table(StringIO(stream)) + try: + if is_py3: + if type(stream) is not bytes: + stream = bytes(stream, 'utf-8') + _dbf = dbf.Dbf(io.BytesIO(stream), readOnly=True) + else: + _dbf = dbf.Dbf(StringIO(stream), readOnly=True) + return True + except (ValueError, struct.error): + # When we try to open up a file that's not a DBF, dbfpy raises a + # ValueError. + # When unpacking a string argument with less than 8 chars, struct.error is + # raised. + return False + + + diff --git a/tablib/packages/dbfpy/__init__.py b/tablib/packages/dbfpy/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tablib/packages/dbfpy/__init__.py diff --git a/tablib/packages/dbfpy/dbf.py b/tablib/packages/dbfpy/dbf.py new file mode 100644 index 0000000..b3d2e21 --- /dev/null +++ b/tablib/packages/dbfpy/dbf.py @@ -0,0 +1,292 @@ +#! /usr/bin/env python +"""DBF accessing helpers. + +FIXME: more documentation needed + +Examples: + + Create new table, setup structure, add records: + + dbf = Dbf(filename, new=True) + dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (n, s, i, b) in ( + ("John", "Miller", "YC", (1980, 10, 11)), + ("Andy", "Larkin", "", (1980, 4, 11)), + ): + rec = dbf.newRecord() + rec["NAME"] = n + rec["SURNAME"] = s + rec["INITIALS"] = i + rec["BIRTHDATE"] = b + rec.store() + dbf.close() + + Open existed dbf, read some data: + + dbf = Dbf(filename, True) + for rec in dbf: + for fldName in dbf.fieldNames: + print '%s:\t %s (%s)' % (fldName, rec[fldName], + type(rec[fldName])) + print + dbf.close() + +""" +"""History (most recent first): +11-feb-2007 [als] export INVALID_VALUE; + Dbf: added .ignoreErrors, .INVALID_VALUE +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] removed fromStream and newDbf methods: + use argument of __init__ call must be used instead; + added class fields pointing to the header and + record classes. +17-dec-2005 [yc] split to several modules; reimplemented +13-dec-2005 [yc] adapted to the changes of the `strutil` module. +13-sep-2002 [als] support FoxPro Timestamp datatype +15-nov-1999 [jjk] documentation updates, add demo +24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks +08-jun-1998 [jjk] fix problems, add more features +20-feb-1998 [jjk] fix problems, add more features +19-feb-1998 [jjk] add create/write capabilities +18-feb-1998 [jjk] from dbfload.py +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] +__author__ = "Jeff Kunce <kuncej@mail.conservation.state.mo.us>" + +__all__ = ["Dbf"] + +from . import header +from .import record +from utils import INVALID_VALUE + +class Dbf(object): + """DBF accessor. + + FIXME: + docs and examples needed (dont' forget to tell + about problems adding new fields on the fly) + + Implementation notes: + ``_new`` field is used to indicate whether this is + a new data table. `addField` could be used only for + the new tables! If at least one record was appended + to the table it's structure couldn't be changed. + + """ + + __slots__ = ("name", "header", "stream", + "_changed", "_new", "_ignore_errors") + + HeaderClass = header.DbfHeader + RecordClass = record.DbfRecord + INVALID_VALUE = INVALID_VALUE + + ## initialization and creation helpers + + def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): + """Initialize instance. + + Arguments: + f: + Filename or file-like object. + new: + True if new data table must be created. Assume + data table exists if this argument is False. + readOnly: + if ``f`` argument is a string file will + be opend in read-only mode; in other cases + this argument is ignored. This argument is ignored + even if ``new`` argument is True. + headerObj: + `header.DbfHeader` instance or None. If this argument + is None, new empty header will be used with the + all fields set by default. + ignoreErrors: + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """ + if isinstance(f, basestring): + # a filename + self.name = f + if new: + # new table (table file must be + # created or opened and truncated) + self.stream = file(f, "w+b") + else: + # tabe file must exist + self.stream = file(f, ("r+b", "rb")[bool(readOnly)]) + else: + # a stream + self.name = getattr(f, "name", "") + self.stream = f + if new: + # if this is a new table, header will be empty + self.header = self.HeaderClass() + else: + # or instantiated using stream + self.header = self.HeaderClass.fromStream(self.stream) + self.ignoreErrors = ignoreErrors + self._new = bool(new) + self._changed = False + + ## properties + + closed = property(lambda self: self.stream.closed) + recordCount = property(lambda self: self.header.recordCount) + fieldNames = property( + lambda self: [_fld.name for _fld in self.header.fields]) + fieldDefs = property(lambda self: self.header.fields) + changed = property(lambda self: self._changed or self.header.changed) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on the header object and self""" + self.header.ignoreErrors = self._ignore_errors = bool(value) + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## protected methods + + def _fixIndex(self, index): + """Return fixed index. + + This method fails if index isn't a numeric object + (long or int). Or index isn't in a valid range + (less or equal to the number of records in the db). + + If ``index`` is a negative number, it will be + treated as a negative indexes for list objects. + + Return: + Return value is numeric object maning valid index. + + """ + if not isinstance(index, (int, long)): + raise TypeError("Index must be a numeric object") + if index < 0: + # index from the right side + # fix it to the left-side index + index += len(self) + 1 + if index >= len(self): + raise IndexError("Record index out of range") + return index + + ## iterface methods + + def close(self): + self.flush() + self.stream.close() + + def flush(self): + """Flush data to the associated stream.""" + if self.changed: + self.header.setCurrentDate() + self.header.write(self.stream) + self.stream.flush() + self._changed = False + + def indexOfFieldName(self, name): + """Index of field named ``name``.""" + # FIXME: move this to header class + return self.header.fields.index(name) + + def newRecord(self): + """Return new record, which belong to this table.""" + return self.RecordClass(self) + + def append(self, record): + """Append ``record`` to the database.""" + record.index = self.header.recordCount + record._write() + self.header.recordCount += 1 + self._changed = True + self._new = False + + def addField(self, *defs): + """Add field definitions. + + For more information see `header.DbfHeader.addField`. + + """ + if self._new: + self.header.addField(*defs) + else: + raise TypeError("At least one record was added, " + "structure can't be changed") + + ## 'magic' methods (representation and sequence interface) + + def __repr__(self): + return "Dbf stream '%s'\n" % self.stream + repr(self.header) + + def __len__(self): + """Return number of records.""" + return self.recordCount + + def __getitem__(self, index): + """Return `DbfRecord` instance.""" + return self.RecordClass.fromStream(self, self._fixIndex(index)) + + def __setitem__(self, index, record): + """Write `DbfRecord` instance to the stream.""" + record.index = self._fixIndex(index) + record._write() + self._changed = True + self._new = False + + #def __del__(self): + # """Flush stream upon deletion of the object.""" + # self.flush() + + +def demoRead(filename): + _dbf = Dbf(filename, True) + for _rec in _dbf: + print + print(repr(_rec)) + _dbf.close() + +def demoCreate(filename): + _dbf = Dbf(filename, new=True) + _dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (_n, _s, _i, _b) in ( + ("John", "Miller", "YC", (1981, 1, 2)), + ("Andy", "Larkin", "AL", (1982, 3, 4)), + ("Bill", "Clinth", "", (1983, 5, 6)), + ("Bobb", "McNail", "", (1984, 7, 8)), + ): + _rec = _dbf.newRecord() + _rec["NAME"] = _n + _rec["SURNAME"] = _s + _rec["INITIALS"] = _i + _rec["BIRTHDATE"] = _b + _rec.store() + print(repr(_dbf)) + _dbf.close() + +if (__name__=='__main__'): + import sys + _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" + demoCreate(_name) + demoRead(_name) + +# vim: set et sw=4 sts=4 : diff --git a/tablib/packages/dbfpy/dbfnew.py b/tablib/packages/dbfpy/dbfnew.py new file mode 100644 index 0000000..dea7e52 --- /dev/null +++ b/tablib/packages/dbfpy/dbfnew.py @@ -0,0 +1,188 @@ +#!/usr/bin/python +""".DBF creation helpers. + +Note: this is a legacy interface. New code should use Dbf class + for table creation (see examples in dbf.py) + +TODO: + - handle Memo fields. + - check length of the fields accoring to the + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + +""" +"""History (most recent first) +04-jul-2006 [als] added export declaration; + updated for dbfpy 2.0 +15-dec-2005 [yc] define dbf_new.__slots__ +14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; + dbf_new now is a new class (inherited from object) +??-jun-2000 [--] added by Hans Fiby +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] + +__all__ = ["dbf_new"] + +from dbf import * +from fields import * +from header import * +from record import * + +class _FieldDefinition(object): + """Field definition. + + This is a simple structure, which contains ``name``, ``type``, + ``len``, ``dec`` and ``cls`` fields. + + Objects also implement get/setitem magic functions, so fields + could be accessed via sequence iterface, where 'name' has + index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and + 'cls' could be located at index 4. + + """ + + __slots__ = "name", "type", "len", "dec", "cls" + + # WARNING: be attentive - dictionaries are mutable! + FLD_TYPES = { + # type: (cls, len) + "C": (DbfCharacterFieldDef, None), + "N": (DbfNumericFieldDef, None), + "L": (DbfLogicalFieldDef, 1), + # FIXME: support memos + # "M": (DbfMemoFieldDef), + "D": (DbfDateFieldDef, 8), + # FIXME: I'm not sure length should be 14 characters! + # but temporary I use it, cuz date is 8 characters + # and time 6 (hhmmss) + "T": (DbfDateTimeFieldDef, 14), + } + + def __init__(self, name, type, len=None, dec=0): + _cls, _len = self.FLD_TYPES[type] + if _len is None: + if len is None: + raise ValueError("Field length must be defined") + _len = len + self.name = name + self.type = type + self.len = _len + self.dec = dec + self.cls = _cls + + def getDbfField(self): + "Return `DbfFieldDef` instance from the current definition." + return self.cls(self.name, self.len, self.dec) + + def appendToHeader(self, dbfh): + """Create a `DbfFieldDef` instance and append it to the dbf header. + + Arguments: + dbfh: `DbfHeader` instance. + + """ + _dbff = self.getDbfField() + dbfh.addField(_dbff) + + +class dbf_new(object): + """New .DBF creation helper. + + Example Usage: + + dbfn = dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + + Note: + This module cannot handle Memo-fields, + they are special. + + """ + + __slots__ = ("fields",) + + FieldDefinitionClass = _FieldDefinition + + def __init__(self): + self.fields = [] + + def add_field(self, name, typ, len, dec=0): + """Add field definition. + + Arguments: + name: + field name (str object). field name must not + contain ASCII NULs and it's length shouldn't + exceed 10 characters. + typ: + type of the field. this must be a single character + from the "CNLMDT" set meaning character, numeric, + logical, memo, date and date/time respectively. + len: + length of the field. this argument is used only for + the character and numeric fields. all other fields + have fixed length. + FIXME: use None as a default for this argument? + dec: + decimal precision. used only for the numric fields. + + """ + self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) + + def write(self, filename): + """Create empty .DBF file using current structure.""" + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + _dbfStream = file(filename, "wb") + _dbfh.write(_dbfStream) + _dbfStream.close() + + def write_stream(self, stream): + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + _dbfh.write(stream) + + +if (__name__=='__main__'): + # create a new DBF-File + dbfn=dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + # test new dbf + print "*** created tst.dbf: ***" + dbft = Dbf('tst.dbf', readOnly=0) + print repr(dbft) + # add a record + rec=DbfRecord(dbft) + rec['name']='something' + rec['price']=10.5 + rec['date']=(2000,1,12) + rec.store() + # add another record + rec=DbfRecord(dbft) + rec['name']='foo and bar' + rec['price']=12234 + rec['date']=(1992,7,15) + rec.store() + + # show the records + print "*** inserted 2 records into tst.dbf: ***" + print repr(dbft) + for i1 in range(len(dbft)): + rec = dbft[i1] + for fldName in dbft.fieldNames: + print '%s:\t %s'%(fldName, rec[fldName]) + print + dbft.close() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/fields.py b/tablib/packages/dbfpy/fields.py new file mode 100644 index 0000000..69cd436 --- /dev/null +++ b/tablib/packages/dbfpy/fields.py @@ -0,0 +1,466 @@ +"""DBF fields definitions. + +TODO: + - make memos work +""" +"""History (most recent first): +26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes +05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date +16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point + in the value to select float or integer return type +13-mar-2008 [als] check field name length in constructor +11-feb-2007 [als] handle value conversion errors +10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() +01-dec-2006 [als] Timestamp columns use None for empty values +31-oct-2006 [als] support field types 'F' (float), 'I' (integer) + and 'Y' (currency); + automate export and registration of field classes +04-jul-2006 [als] added export declaration +10-mar-2006 [als] decode empty values for Date and Logical fields; + show field name in errors +10-mar-2006 [als] fix Numeric value decoding: according to spec, + value always is string representation of the number; + ensure that encoded Numeric value fits into the field +20-dec-2005 [yc] use field names in upper case +15-dec-2005 [yc] field definitions moved from `dbf`. +""" + +__version__ = "$Revision: 1.14 $"[11:-2] +__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] + +__all__ = ["lookupFor",] # field classes added at the end of the module + +import datetime +import struct +import sys + +from . import utils + +## abstract definitions + +class DbfFieldDef(object): + """Abstract field definition. + + Child classes must override ``type`` class attribute to provide datatype + infromation of the field definition. For more info about types visit + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + + Also child classes must override ``defaultValue`` field to provide + default value for the field value. + + If child class has fixed length ``length`` class attribute must be + overriden and set to the valid value. None value means, that field + isn't of fixed length. + + Note: ``name`` field must not be changed after instantiation. + + """ + + __slots__ = ("name", "length", "decimalCount", + "start", "end", "ignoreErrors") + + # length of the field, None in case of variable-length field, + # or a number if this field is a fixed-length field + length = None + + # field type. for more information about fields types visit + # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + # must be overriden in child classes + typeCode = None + + # default value for the field. this field must be + # overriden in child classes + defaultValue = None + + def __init__(self, name, length=None, decimalCount=None, + start=None, stop=None, ignoreErrors=False, + ): + """Initialize instance.""" + assert self.typeCode is not None, "Type code must be overriden" + assert self.defaultValue is not None, "Default value must be overriden" + ## fix arguments + if len(name) >10: + raise ValueError("Field name \"%s\" is too long" % name) + name = str(name).upper() + if self.__class__.length is None: + if length is None: + raise ValueError("[%s] Length isn't specified" % name) + length = int(length) + if length <= 0: + raise ValueError("[%s] Length must be a positive integer" + % name) + else: + length = self.length + if decimalCount is None: + decimalCount = 0 + ## set fields + self.name = name + # FIXME: validate length according to the specification at + # http://www.clicketyclick.dk/databases/xbase/format/data_types.html + self.length = length + self.decimalCount = decimalCount + self.ignoreErrors = ignoreErrors + self.start = start + self.end = stop + + def __cmp__(self, other): + return cmp(self.name, str(other).upper()) + + def __hash__(self): + return hash(self.name) + + def fromString(cls, string, start, ignoreErrors=False): + """Decode dbf field definition from the string data. + + Arguments: + string: + a string, dbf definition is decoded from. length of + the string must be 32 bytes. + start: + position in the database file. + ignoreErrors: + initial error processing mode for the new field (boolean) + + """ + assert len(string) == 32 + _length = ord(string[16]) + return cls(utils.unzfill(string)[:11], _length, ord(string[17]), + start, start + _length, ignoreErrors=ignoreErrors) + fromString = classmethod(fromString) + + def toString(self): + """Return encoded field definition. + + Return: + Return value is a string object containing encoded + definition of this field. + + """ + if sys.version_info < (2, 4): + # earlier versions did not support padding character + _name = self.name[:11] + "\0" * (11 - len(self.name)) + else: + _name = self.name.ljust(11, '\0') + return ( + _name + + self.typeCode + + #data address + chr(0) * 4 + + chr(self.length) + + chr(self.decimalCount) + + chr(0) * 14 + ) + + def __repr__(self): + return "%-10s %1s %3d %3d" % self.fieldInfo() + + def fieldInfo(self): + """Return field information. + + Return: + Return value is a (name, type, length, decimals) tuple. + + """ + return (self.name, self.typeCode, self.length, self.decimalCount) + + def rawFromRecord(self, record): + """Return a "raw" field value from the record string.""" + return record[self.start:self.end] + + def decodeFromRecord(self, record): + """Return decoded field value from the record string.""" + try: + return self.decodeValue(self.rawFromRecord(record)) + except: + if self.ignoreErrors: + return utils.INVALID_VALUE + else: + raise + + def decodeValue(self, value): + """Return decoded value from string value. + + This method shouldn't be used publicly. It's called from the + `decodeFromRecord` method. + + This is an abstract method and it must be overridden in child classes. + """ + raise NotImplementedError + + def encodeValue(self, value): + """Return str object containing encoded field value. + + This is an abstract method and it must be overriden in child classes. + """ + raise NotImplementedError + +## real classes + +class DbfCharacterFieldDef(DbfFieldDef): + """Definition of the character field.""" + + typeCode = "C" + defaultValue = "" + + def decodeValue(self, value): + """Return string object. + + Return value is a ``value`` argument with stripped right spaces. + + """ + return value.rstrip(" ") + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``.""" + return str(value)[:self.length].ljust(self.length) + + +class DbfNumericFieldDef(DbfFieldDef): + """Definition of the numeric field.""" + + typeCode = "N" + # XXX: now I'm not sure it was a good idea to make a class field + # `defaultValue` instead of a generic method as it was implemented + # previously -- it's ok with all types except number, cuz + # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. + defaultValue = 0 + + def decodeValue(self, value): + """Return a number decoded from ``value``. + + If decimals is zero, value will be decoded as an integer; + or as a float otherwise. + + Return: + Return value is a int (long) or float instance. + + """ + value = value.strip(" \0") + if "." in value: + # a float (has decimal separator) + return float(value) + elif value: + # must be an integer + return int(value) + else: + return 0 + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + _rv = ("%*.*f" % (self.length, self.decimalCount, value)) + if len(_rv) > self.length: + _ppos = _rv.find(".") + if 0 <= _ppos <= self.length: + _rv = _rv[:self.length] + else: + raise ValueError("[%s] Numeric overflow: %s (field width: %i)" + % (self.name, _rv, self.length)) + return _rv + +class DbfFloatFieldDef(DbfNumericFieldDef): + """Definition of the float field - same as numeric.""" + + typeCode = "F" + +class DbfIntegerFieldDef(DbfFieldDef): + """Definition of the integer field.""" + + typeCode = "I" + length = 4 + defaultValue = 0 + + def decodeValue(self, value): + """Return an integer number decoded from ``value``.""" + return struct.unpack("<i", value)[0] + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<i", int(value)) + +class DbfCurrencyFieldDef(DbfFieldDef): + """Definition of the currency field.""" + + typeCode = "Y" + length = 8 + defaultValue = 0.0 + + def decodeValue(self, value): + """Return float number decoded from ``value``.""" + return struct.unpack("<q", value)[0] / 10000. + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<q", round(value * 10000)) + +class DbfLogicalFieldDef(DbfFieldDef): + """Definition of the logical field.""" + + typeCode = "L" + defaultValue = -1 + length = 1 + + def decodeValue(self, value): + """Return True, False or -1 decoded from ``value``.""" + # Note: value always is 1-char string + if value == "?": + return -1 + if value in "NnFf ": + return False + if value in "YyTt": + return True + raise ValueError("[%s] Invalid logical value %r" % (self.name, value)) + + def encodeValue(self, value): + """Return a character from the "TF?" set. + + Return: + Return value is "T" if ``value`` is True + "?" if value is -1 or False otherwise. + + """ + if value is True: + return "T" + if value == -1: + return "?" + return "F" + + +class DbfMemoFieldDef(DbfFieldDef): + """Definition of the memo field. + + Note: memos aren't currenly completely supported. + + """ + + typeCode = "M" + defaultValue = " " * 10 + length = 10 + + def decodeValue(self, value): + """Return int .dbt block number decoded from the string object.""" + #return int(value) + raise NotImplementedError + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``. + + Note: this is an internal method. + + """ + #return str(value)[:self.length].ljust(self.length) + raise NotImplementedError + + +class DbfDateFieldDef(DbfFieldDef): + """Definition of the date field.""" + + typeCode = "D" + defaultValue = utils.classproperty(lambda cls: datetime.date.today()) + # "yyyymmdd" gives us 8 characters + length = 8 + + def decodeValue(self, value): + """Return a ``datetime.date`` instance decoded from ``value``.""" + if value.strip(): + return utils.getDate(value) + else: + return None + + def encodeValue(self, value): + """Return a string-encoded value. + + ``value`` argument should be a value suitable for the + `utils.getDate` call. + + Return: + Return value is a string in format "yyyymmdd". + + """ + if value: + return utils.getDate(value).strftime("%Y%m%d") + else: + return " " * self.length + + +class DbfDateTimeFieldDef(DbfFieldDef): + """Definition of the timestamp field.""" + + # a difference between JDN (Julian Day Number) + # and GDN (Gregorian Day Number). note, that GDN < JDN + JDN_GDN_DIFF = 1721425 + typeCode = "T" + defaultValue = utils.classproperty(lambda cls: datetime.datetime.now()) + # two 32-bits integers representing JDN and amount of + # milliseconds respectively gives us 8 bytes. + # note, that values must be encoded in LE byteorder. + length = 8 + + def decodeValue(self, value): + """Return a `datetime.datetime` instance.""" + assert len(value) == self.length + # LE byteorder + _jdn, _msecs = struct.unpack("<2I", value) + if _jdn >= 1: + _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) + _rv += datetime.timedelta(0, _msecs / 1000.0) + else: + # empty date + _rv = None + return _rv + + def encodeValue(self, value): + """Return a string-encoded ``value``.""" + if value: + value = utils.getDateTime(value) + # LE byteorder + _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, + (value.hour * 3600 + value.minute * 60 + value.second) * 1000) + else: + _rv = "\0" * self.length + assert len(_rv) == self.length + return _rv + + +_fieldsRegistry = {} + +def registerField(fieldCls): + """Register field definition class. + + ``fieldCls`` should be subclass of the `DbfFieldDef`. + + Use `lookupFor` to retrieve field definition class + by the type code. + + """ + assert fieldCls.typeCode is not None, "Type code isn't defined" + # XXX: use fieldCls.typeCode.upper()? in case of any decign + # don't forget to look to the same comment in ``lookupFor`` method + _fieldsRegistry[fieldCls.typeCode] = fieldCls + + +def lookupFor(typeCode): + """Return field definition class for the given type code. + + ``typeCode`` must be a single character. That type should be + previously registered. + + Use `registerField` to register new field class. + + Return: + Return value is a subclass of the `DbfFieldDef`. + + """ + # XXX: use typeCode.upper()? in case of any decign don't + # forget to look to the same comment in ``registerField`` + return _fieldsRegistry[typeCode] + +## register generic types + +for (_name, _val) in globals().items(): + if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ + and (_name != "DbfFieldDef"): + __all__.append(_name) + registerField(_val) +del _name, _val + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/header.py b/tablib/packages/dbfpy/header.py new file mode 100644 index 0000000..03a877c --- /dev/null +++ b/tablib/packages/dbfpy/header.py @@ -0,0 +1,275 @@ +"""DBF header definition. + +TODO: + - handle encoding of the character fields + (encoding information stored in the DBF header) + +""" +"""History (most recent first): +16-sep-2010 [als] fromStream: fix century of the last update field +11-feb-2007 [als] added .ignoreErrors +10-feb-2007 [als] added __getitem__: return field definitions + by field name or field number (zero-based) +04-jul-2006 [als] added export declaration +15-dec-2005 [yc] created +""" + +__version__ = "$Revision: 1.6 $"[11:-2] +__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] + +__all__ = ["DbfHeader"] + +try: + import cStringIO +except ImportError: + # when we're in python3, we cStringIO has been replaced by io.StringIO + import io as cStringIO +import datetime +import struct +import time + +from . import fields +from . import utils + + +class DbfHeader(object): + """Dbf header definition. + + For more information about dbf header format visit + `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` + + Examples: + Create an empty dbf header and add some field definitions: + dbfh = DbfHeader() + dbfh.addField(("name", "C", 10)) + dbfh.addField(("date", "D")) + dbfh.addField(DbfNumericFieldDef("price", 5, 2)) + Create a dbf header with field definitions: + dbfh = DbfHeader([ + ("name", "C", 10), + ("date", "D"), + DbfNumericFieldDef("price", 5, 2), + ]) + + """ + + __slots__ = ("signature", "fields", "lastUpdate", "recordLength", + "recordCount", "headerLength", "changed", "_ignore_errors") + + ## instance construction and initialization methods + + def __init__(self, fields=None, headerLength=0, recordLength=0, + recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False, + ): + """Initialize instance. + + Arguments: + fields: + a list of field definitions; + recordLength: + size of the records; + headerLength: + size of the header; + recordCount: + number of records stored in DBF; + signature: + version number (aka signature). using 0x03 as a default meaning + "File without DBT". for more information about this field visit + ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` + lastUpdate: + date of the DBF's update. this could be a string ('yymmdd' or + 'yyyymmdd'), timestamp (int or float), datetime/date value, + a sequence (assuming (yyyy, mm, dd, ...)) or an object having + callable ``ticks`` field. + ignoreErrors: + error processing mode for DBF fields (boolean) + + """ + self.signature = signature + if fields is None: + self.fields = [] + else: + self.fields = list(fields) + self.lastUpdate = utils.getDate(lastUpdate) + self.recordLength = recordLength + self.headerLength = headerLength + self.recordCount = recordCount + self.ignoreErrors = ignoreErrors + # XXX: I'm not sure this is safe to + # initialize `self.changed` in this way + self.changed = bool(self.fields) + + # @classmethod + def fromString(cls, string): + """Return header instance from the string object.""" + return cls.fromStream(cStringIO.StringIO(str(string))) + fromString = classmethod(fromString) + + # @classmethod + def fromStream(cls, stream): + """Return header object from the stream.""" + stream.seek(0) + _data = stream.read(32) + (_cnt, _hdrLen, _recLen) = struct.unpack("<I2H", _data[4:12]) + #reserved = _data[12:32] + _year = ord(_data[1]) + if _year < 80: + # dBase II started at 1980. It is quite unlikely + # that actual last update date is before that year. + _year += 2000 + else: + _year += 1900 + ## create header object + _obj = cls(None, _hdrLen, _recLen, _cnt, ord(_data[0]), + (_year, ord(_data[2]), ord(_data[3]))) + ## append field definitions + # position 0 is for the deletion flag + _pos = 1 + _data = stream.read(1) + + # The field definitions are ended either by \x0D OR a newline + # character, so we need to handle both when reading from a stream. + # When writing, dbfpy appears to write newlines instead of \x0D. + while _data[0] not in ["\x0D", "\n"]: + _data += stream.read(31) + _fld = fields.lookupFor(_data[11]).fromString(_data, _pos) + _obj._addField(_fld) + _pos = _fld.end + _data = stream.read(1) + return _obj + fromStream = classmethod(fromStream) + + ## properties + + year = property(lambda self: self.lastUpdate.year) + month = property(lambda self: self.lastUpdate.month) + day = property(lambda self: self.lastUpdate.day) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on self and all fields""" + self._ignore_errors = value = bool(value) + for _field in self.fields: + _field.ignoreErrors = value + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## object representation + + def __repr__(self): + _rv = """\ +Version (signature): 0x%02x + Last update: %s + Header length: %d + Record length: %d + Record count: %d + FieldName Type Len Dec +""" % (self.signature, self.lastUpdate, self.headerLength, + self.recordLength, self.recordCount) + _rv += "\n".join( + ["%10s %4s %3s %3s" % _fld.fieldInfo() for _fld in self.fields] + ) + return _rv + + ## internal methods + + def _addField(self, *defs): + """Internal variant of the `addField` method. + + This method doesn't set `self.changed` field to True. + + Return value is a length of the appended records. + Note: this method doesn't modify ``recordLength`` and + ``headerLength`` fields. Use `addField` instead of this + method if you don't exactly know what you're doing. + + """ + # insure we have dbf.DbfFieldDef instances first (instantiation + # from the tuple could raise an error, in such a case I don't + # wanna add any of the definitions -- all will be ignored) + _defs = [] + _recordLength = 0 + for _def in defs: + if isinstance(_def, fields.DbfFieldDef): + _obj = _def + else: + (_name, _type, _len, _dec) = (tuple(_def) + (None,) * 4)[:4] + _cls = fields.lookupFor(_type) + _obj = _cls(_name, _len, _dec, + ignoreErrors=self._ignore_errors) + _recordLength += _obj.length + _defs.append(_obj) + # and now extend field definitions and + # update record length + self.fields += _defs + return _recordLength + + ## interface methods + + def addField(self, *defs): + """Add field definition to the header. + + Examples: + dbfh.addField( + ("name", "C", 20), + dbf.DbfCharacterFieldDef("surname", 20), + dbf.DbfDateFieldDef("birthdate"), + ("member", "L"), + ) + dbfh.addField(("price", "N", 5, 2)) + dbfh.addField(dbf.DbfNumericFieldDef("origprice", 5, 2)) + + """ + _oldLen = self.recordLength + self.recordLength += self._addField(*defs) + if not _oldLen: + self.recordLength += 1 + # XXX: may be just use: + # self.recordeLength += self._addField(*defs) + bool(not _oldLen) + # recalculate headerLength + self.headerLength = 32 + (32 * len(self.fields)) + 1 + self.changed = True + + def write(self, stream): + """Encode and write header to the stream.""" + stream.seek(0) + stream.write(self.toString()) + stream.write("".join([_fld.toString() for _fld in self.fields])) + stream.write(chr(0x0D)) # cr at end of all hdr data + self.changed = False + + def toString(self): + """Returned 32 chars length string with encoded header.""" + return struct.pack("<4BI2H", + self.signature, + self.year - 1900, + self.month, + self.day, + self.recordCount, + self.headerLength, + self.recordLength) + "\0" * 20 + + def setCurrentDate(self): + """Update ``self.lastUpdate`` field with current date value.""" + self.lastUpdate = datetime.date.today() + + def __getitem__(self, item): + """Return a field definition by numeric index or name string""" + if isinstance(item, basestring): + _name = item.upper() + for _field in self.fields: + if _field.name == _name: + return _field + else: + raise KeyError(item) + else: + # item must be field index + return self.fields[item] + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/record.py b/tablib/packages/dbfpy/record.py new file mode 100644 index 0000000..97bbfb3 --- /dev/null +++ b/tablib/packages/dbfpy/record.py @@ -0,0 +1,262 @@ +"""DBF record definition. + +""" +"""History (most recent first): +11-feb-2007 [als] __repr__: added special case for invalid field values +10-feb-2007 [als] added .rawFromStream() +30-oct-2006 [als] fix record length in .fromStream() +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] DbfRecord.write() -> DbfRecord._write(); + added delete() method. +16-dec-2005 [yc] record definition moved from `dbf`. +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2] + +__all__ = ["DbfRecord"] + +from itertools import izip + +import utils + +class DbfRecord(object): + """DBF record. + + Instances of this class shouldn't be created manualy, + use `dbf.Dbf.newRecord` instead. + + Class implements mapping/sequence interface, so + fields could be accessed via their names or indexes + (names is a preffered way to access fields). + + Hint: + Use `store` method to save modified record. + + Examples: + Add new record to the database: + db = Dbf(filename) + rec = db.newRecord() + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + Or the same, but modify existed + (second in this case) record: + db = Dbf(filename) + rec = db[2] + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + + """ + + __slots__ = "dbf", "index", "deleted", "fieldData" + + ## creation and initialization + + def __init__(self, dbf, index=None, deleted=False, data=None): + """Instance initialiation. + + Arguments: + dbf: + A `Dbf.Dbf` instance this record belonogs to. + index: + An integer record index or None. If this value is + None, record will be appended to the DBF. + deleted: + Boolean flag indicating whether this record + is a deleted record. + data: + A sequence or None. This is a data of the fields. + If this argument is None, default values will be used. + + """ + self.dbf = dbf + # XXX: I'm not sure ``index`` is necessary + self.index = index + self.deleted = deleted + if data is None: + self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] + else: + self.fieldData = list(data) + + # XXX: validate self.index before calculating position? + position = property(lambda self: self.dbf.header.headerLength + \ + self.index * self.dbf.header.recordLength) + + def rawFromStream(cls, dbf, index): + """Return raw record contents read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance containing the record. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is a string containing record data in DBF format. + + """ + # XXX: may be write smth assuming, that current stream + # position is the required one? it could save some + # time required to calculate where to seek in the file + dbf.stream.seek(dbf.header.headerLength + + index * dbf.header.recordLength) + return dbf.stream.read(dbf.header.recordLength) + rawFromStream = classmethod(rawFromStream) + + def fromStream(cls, dbf, index): + """Return a record read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is an instance of the current class. + + """ + return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) + fromStream = classmethod(fromStream) + + def fromString(cls, dbf, string, index=None): + """Return record read from the string object. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + string: + A string new record should be created from. + index: + Index of the record in the container. If this + argument is None, record will be appended. + + Return value is an instance of the current class. + + """ + return cls(dbf, index, string[0]=="*", + [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) + fromString = classmethod(fromString) + + ## object representation + + def __repr__(self): + _template = "%%%ds: %%s (%%s)" % max([len(_fld) + for _fld in self.dbf.fieldNames]) + _rv = [] + for _fld in self.dbf.fieldNames: + _val = self[_fld] + if _val is utils.INVALID_VALUE: + _rv.append(_template % + (_fld, "None", "value cannot be decoded")) + else: + _rv.append(_template % (_fld, _val, type(_val))) + return "\n".join(_rv) + + ## protected methods + + def _write(self): + """Write data to the dbf stream. + + Note: + This isn't a public method, it's better to + use 'store' instead publically. + Be design ``_write`` method should be called + only from the `Dbf` instance. + + + """ + self._validateIndex(False) + self.dbf.stream.seek(self.position) + self.dbf.stream.write(self.toString()) + # FIXME: may be move this write somewhere else? + # why we should check this condition for each record? + if self.index == len(self.dbf): + # this is the last record, + # we should write SUB (ASCII 26) + self.dbf.stream.write("\x1A") + + ## utility methods + + def _validateIndex(self, allowUndefined=True, checkRange=False): + """Valid ``self.index`` value. + + If ``allowUndefined`` argument is True functions does nothing + in case of ``self.index`` pointing to None object. + + """ + if self.index is None: + if not allowUndefined: + raise ValueError("Index is undefined") + elif self.index < 0: + raise ValueError("Index can't be negative (%s)" % self.index) + elif checkRange and self.index <= self.dbf.header.recordCount: + raise ValueError("There are only %d records in the DBF" % + self.dbf.header.recordCount) + + ## interface methods + + def store(self): + """Store current record in the DBF. + + If ``self.index`` is None, this record will be appended to the + records of the DBF this records belongs to; or replaced otherwise. + + """ + self._validateIndex() + if self.index is None: + self.index = len(self.dbf) + self.dbf.append(self) + else: + self.dbf[self.index] = self + + def delete(self): + """Mark method as deleted.""" + self.deleted = True + + def toString(self): + """Return string packed record values.""" + return "".join([" *"[self.deleted]] + [ + _def.encodeValue(_dat) + for (_def, _dat) in izip(self.dbf.header.fields, self.fieldData) + ]) + + def asList(self): + """Return a flat list of fields. + + Note: + Change of the list's values won't change + real values stored in this object. + + """ + return self.fieldData[:] + + def asDict(self): + """Return a dictionary of fields. + + Note: + Change of the dicts's values won't change + real values stored in this object. + + """ + return dict([_i for _i in izip(self.dbf.fieldNames, self.fieldData)]) + + def __getitem__(self, key): + """Return value by field name or field index.""" + if isinstance(key, (long, int)): + # integer index of the field + return self.fieldData[key] + # assuming string field name + return self.fieldData[self.dbf.indexOfFieldName(key)] + + def __setitem__(self, key, value): + """Set field value by integer index of the field or string name.""" + if isinstance(key, (int, long)): + # integer index of the field + return self.fieldData[key] + # assuming string field name + self.fieldData[self.dbf.indexOfFieldName(key)] = value + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/utils.py b/tablib/packages/dbfpy/utils.py new file mode 100644 index 0000000..cef8aa5 --- /dev/null +++ b/tablib/packages/dbfpy/utils.py @@ -0,0 +1,170 @@ +"""String utilities. + +TODO: + - allow strings in getDateTime routine; +""" +"""History (most recent first): +11-feb-2007 [als] added INVALID_VALUE +10-feb-2007 [als] allow date strings padded with spaces instead of zeroes +20-dec-2005 [yc] handle long objects in getDate/getDateTime +16-dec-2005 [yc] created from ``strutil`` module. +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] + +import datetime +import time + + +def unzfill(str): + """Return a string without ASCII NULs. + + This function searchers for the first NUL (ASCII 0) occurance + and truncates string till that position. + + """ + try: + return str[:str.index('\0')] + except ValueError: + return str + + +def getDate(date=None): + """Return `datetime.date` instance. + + Type of the ``date`` argument could be one of the following: + None: + use current date value; + datetime.date: + this value will be returned; + datetime.datetime: + the result of the date.date() will be returned; + string: + assuming "%Y%m%d" or "%y%m%dd" format; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``date`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if date is None: + # use current value + return datetime.date.today() + if isinstance(date, datetime.date): + return date + if isinstance(date, datetime.datetime): + return date.date() + if isinstance(date, (int, long, float)): + # date is a timestamp + return datetime.date.fromtimestamp(date) + if isinstance(date, basestring): + date = date.replace(" ", "0") + if len(date) == 6: + # yymmdd + return datetime.date(*time.strptime(date, "%y%m%d")[:3]) + # yyyymmdd + return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) + if hasattr(date, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.date(*date[:3]) + return datetime.date.fromtimestamp(date.ticks()) + + +def getDateTime(value=None): + """Return `datetime.datetime` instance. + + Type of the ``value`` argument could be one of the following: + None: + use current date value; + datetime.date: + result will be converted to the `datetime.datetime` instance + using midnight; + datetime.datetime: + ``value`` will be returned as is; + string: + *** CURRENTLY NOT SUPPORTED ***; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``value`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if value is None: + # use current value + return datetime.datetime.today() + if isinstance(value, datetime.datetime): + return value + if isinstance(value, datetime.date): + return datetime.datetime.fromordinal(value.toordinal()) + if isinstance(value, (int, long, float)): + # value is a timestamp + return datetime.datetime.fromtimestamp(value) + if isinstance(value, basestring): + raise NotImplementedError("Strings aren't currently implemented") + if hasattr(value, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.datetime(*tuple(value)[:6]) + return datetime.datetime.fromtimestamp(value.ticks()) + + +class classproperty(property): + """Works in the same way as a ``property``, but for the classes.""" + + def __get__(self, obj, cls): + return self.fget(cls) + + +class _InvalidValue(object): + + """Value returned from DBF records when field validation fails + + The value is not equal to anything except for itself + and equal to all empty values: None, 0, empty string etc. + In other words, invalid value is equal to None and not equal + to None at the same time. + + This value yields zero upon explicit conversion to a number type, + empty string for string types, and False for boolean. + + """ + + def __eq__(self, other): + return not other + + def __ne__(self, other): + return not (other is self) + + def __nonzero__(self): + return False + + def __int__(self): + return 0 + __long__ = __int__ + + def __float__(self): + return 0.0 + + def __str__(self): + return "" + + def __unicode__(self): + return u"" + + def __repr__(self): + return "<INVALID>" + +# invalid value is a constant singleton +INVALID_VALUE = _InvalidValue() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/__init__.py b/tablib/packages/dbfpy3/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tablib/packages/dbfpy3/__init__.py diff --git a/tablib/packages/dbfpy3/dbf.py b/tablib/packages/dbfpy3/dbf.py new file mode 100644 index 0000000..42de8a4 --- /dev/null +++ b/tablib/packages/dbfpy3/dbf.py @@ -0,0 +1,293 @@ +#! /usr/bin/env python +"""DBF accessing helpers. + +FIXME: more documentation needed + +Examples: + + Create new table, setup structure, add records: + + dbf = Dbf(filename, new=True) + dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (n, s, i, b) in ( + ("John", "Miller", "YC", (1980, 10, 11)), + ("Andy", "Larkin", "", (1980, 4, 11)), + ): + rec = dbf.newRecord() + rec["NAME"] = n + rec["SURNAME"] = s + rec["INITIALS"] = i + rec["BIRTHDATE"] = b + rec.store() + dbf.close() + + Open existed dbf, read some data: + + dbf = Dbf(filename, True) + for rec in dbf: + for fldName in dbf.fieldNames: + print '%s:\t %s (%s)' % (fldName, rec[fldName], + type(rec[fldName])) + print + dbf.close() + +""" +"""History (most recent first): +11-feb-2007 [als] export INVALID_VALUE; + Dbf: added .ignoreErrors, .INVALID_VALUE +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] removed fromStream and newDbf methods: + use argument of __init__ call must be used instead; + added class fields pointing to the header and + record classes. +17-dec-2005 [yc] split to several modules; reimplemented +13-dec-2005 [yc] adapted to the changes of the `strutil` module. +13-sep-2002 [als] support FoxPro Timestamp datatype +15-nov-1999 [jjk] documentation updates, add demo +24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks +08-jun-1998 [jjk] fix problems, add more features +20-feb-1998 [jjk] fix problems, add more features +19-feb-1998 [jjk] add create/write capabilities +18-feb-1998 [jjk] from dbfload.py +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] +__author__ = "Jeff Kunce <kuncej@mail.conservation.state.mo.us>" + +__all__ = ["Dbf"] + +from . import header +from . import record +from .utils import INVALID_VALUE + +class Dbf(object): + """DBF accessor. + + FIXME: + docs and examples needed (dont' forget to tell + about problems adding new fields on the fly) + + Implementation notes: + ``_new`` field is used to indicate whether this is + a new data table. `addField` could be used only for + the new tables! If at least one record was appended + to the table it's structure couldn't be changed. + + """ + + __slots__ = ("name", "header", "stream", + "_changed", "_new", "_ignore_errors") + + HeaderClass = header.DbfHeader + RecordClass = record.DbfRecord + INVALID_VALUE = INVALID_VALUE + + ## initialization and creation helpers + + def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): + """Initialize instance. + + Arguments: + f: + Filename or file-like object. + new: + True if new data table must be created. Assume + data table exists if this argument is False. + readOnly: + if ``f`` argument is a string file will + be opend in read-only mode; in other cases + this argument is ignored. This argument is ignored + even if ``new`` argument is True. + headerObj: + `header.DbfHeader` instance or None. If this argument + is None, new empty header will be used with the + all fields set by default. + ignoreErrors: + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """ + if isinstance(f, str): + # a filename + self.name = f + if new: + # new table (table file must be + # created or opened and truncated) + self.stream = open(f, "w+b") + else: + # tabe file must exist + self.stream = open(f, ("r+b", "rb")[bool(readOnly)]) + else: + # a stream + self.name = getattr(f, "name", "") + self.stream = f + if new: + # if this is a new table, header will be empty + self.header = self.HeaderClass() + else: + # or instantiated using stream + self.header = self.HeaderClass.fromStream(self.stream) + self.ignoreErrors = ignoreErrors + self._new = bool(new) + self._changed = False + + ## properties + + closed = property(lambda self: self.stream.closed) + recordCount = property(lambda self: self.header.recordCount) + fieldNames = property( + lambda self: [_fld.name for _fld in self.header.fields]) + fieldDefs = property(lambda self: self.header.fields) + changed = property(lambda self: self._changed or self.header.changed) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on the header object and self""" + self.header.ignoreErrors = self._ignore_errors = bool(value) + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## protected methods + + def _fixIndex(self, index): + """Return fixed index. + + This method fails if index isn't a numeric object + (long or int). Or index isn't in a valid range + (less or equal to the number of records in the db). + + If ``index`` is a negative number, it will be + treated as a negative indexes for list objects. + + Return: + Return value is numeric object maning valid index. + + """ + if not isinstance(index, int): + raise TypeError("Index must be a numeric object") + if index < 0: + # index from the right side + # fix it to the left-side index + index += len(self) + 1 + if index >= len(self): + raise IndexError("Record index out of range") + return index + + ## iterface methods + + def close(self): + self.flush() + self.stream.close() + + def flush(self): + """Flush data to the associated stream.""" + if self.changed: + self.header.setCurrentDate() + self.header.write(self.stream) + self.stream.flush() + self._changed = False + + def indexOfFieldName(self, name): + """Index of field named ``name``.""" + # FIXME: move this to header class + names = [f.name for f in self.header.fields] + return names.index(name.upper()) + + def newRecord(self): + """Return new record, which belong to this table.""" + return self.RecordClass(self) + + def append(self, record): + """Append ``record`` to the database.""" + record.index = self.header.recordCount + record._write() + self.header.recordCount += 1 + self._changed = True + self._new = False + + def addField(self, *defs): + """Add field definitions. + + For more information see `header.DbfHeader.addField`. + + """ + if self._new: + self.header.addField(*defs) + else: + raise TypeError("At least one record was added, " + "structure can't be changed") + + ## 'magic' methods (representation and sequence interface) + + def __repr__(self): + return "Dbf stream '%s'\n" % self.stream + repr(self.header) + + def __len__(self): + """Return number of records.""" + return self.recordCount + + def __getitem__(self, index): + """Return `DbfRecord` instance.""" + return self.RecordClass.fromStream(self, self._fixIndex(index)) + + def __setitem__(self, index, record): + """Write `DbfRecord` instance to the stream.""" + record.index = self._fixIndex(index) + record._write() + self._changed = True + self._new = False + + #def __del__(self): + # """Flush stream upon deletion of the object.""" + # self.flush() + + +def demoRead(filename): + _dbf = Dbf(filename, True) + for _rec in _dbf: + print() + print(repr(_rec)) + _dbf.close() + +def demoCreate(filename): + _dbf = Dbf(filename, new=True) + _dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (_n, _s, _i, _b) in ( + ("John", "Miller", "YC", (1981, 1, 2)), + ("Andy", "Larkin", "AL", (1982, 3, 4)), + ("Bill", "Clinth", "", (1983, 5, 6)), + ("Bobb", "McNail", "", (1984, 7, 8)), + ): + _rec = _dbf.newRecord() + _rec["NAME"] = _n + _rec["SURNAME"] = _s + _rec["INITIALS"] = _i + _rec["BIRTHDATE"] = _b + _rec.store() + print(repr(_dbf)) + _dbf.close() + +if (__name__=='__main__'): + import sys + _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" + demoCreate(_name) + demoRead(_name) + +# vim: set et sw=4 sts=4 : diff --git a/tablib/packages/dbfpy3/dbfnew.py b/tablib/packages/dbfpy3/dbfnew.py new file mode 100644 index 0000000..4051bc6 --- /dev/null +++ b/tablib/packages/dbfpy3/dbfnew.py @@ -0,0 +1,182 @@ +#!/usr/bin/python +""".DBF creation helpers. + +Note: this is a legacy interface. New code should use Dbf class + for table creation (see examples in dbf.py) + +TODO: + - handle Memo fields. + - check length of the fields accoring to the + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + +""" +"""History (most recent first) +04-jul-2006 [als] added export declaration; + updated for dbfpy 2.0 +15-dec-2005 [yc] define dbf_new.__slots__ +14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; + dbf_new now is a new class (inherited from object) +??-jun-2000 [--] added by Hans Fiby +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] + +__all__ = ["dbf_new"] + +from .dbf import * +from .fields import * +from .header import * +from .record import * + +class _FieldDefinition(object): + """Field definition. + + This is a simple structure, which contains ``name``, ``type``, + ``len``, ``dec`` and ``cls`` fields. + + Objects also implement get/setitem magic functions, so fields + could be accessed via sequence iterface, where 'name' has + index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and + 'cls' could be located at index 4. + + """ + + __slots__ = "name", "type", "len", "dec", "cls" + + # WARNING: be attentive - dictionaries are mutable! + FLD_TYPES = { + # type: (cls, len) + "C": (DbfCharacterFieldDef, None), + "N": (DbfNumericFieldDef, None), + "L": (DbfLogicalFieldDef, 1), + # FIXME: support memos + # "M": (DbfMemoFieldDef), + "D": (DbfDateFieldDef, 8), + # FIXME: I'm not sure length should be 14 characters! + # but temporary I use it, cuz date is 8 characters + # and time 6 (hhmmss) + "T": (DbfDateTimeFieldDef, 14), + } + + def __init__(self, name, type, len=None, dec=0): + _cls, _len = self.FLD_TYPES[type] + if _len is None: + if len is None: + raise ValueError("Field length must be defined") + _len = len + self.name = name + self.type = type + self.len = _len + self.dec = dec + self.cls = _cls + + def getDbfField(self): + "Return `DbfFieldDef` instance from the current definition." + return self.cls(self.name, self.len, self.dec) + + def appendToHeader(self, dbfh): + """Create a `DbfFieldDef` instance and append it to the dbf header. + + Arguments: + dbfh: `DbfHeader` instance. + + """ + _dbff = self.getDbfField() + dbfh.addField(_dbff) + + +class dbf_new(object): + """New .DBF creation helper. + + Example Usage: + + dbfn = dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + + Note: + This module cannot handle Memo-fields, + they are special. + + """ + + __slots__ = ("fields",) + + FieldDefinitionClass = _FieldDefinition + + def __init__(self): + self.fields = [] + + def add_field(self, name, typ, len, dec=0): + """Add field definition. + + Arguments: + name: + field name (str object). field name must not + contain ASCII NULs and it's length shouldn't + exceed 10 characters. + typ: + type of the field. this must be a single character + from the "CNLMDT" set meaning character, numeric, + logical, memo, date and date/time respectively. + len: + length of the field. this argument is used only for + the character and numeric fields. all other fields + have fixed length. + FIXME: use None as a default for this argument? + dec: + decimal precision. used only for the numric fields. + + """ + self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) + + def write(self, filename): + """Create empty .DBF file using current structure.""" + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + + _dbfStream = open(filename, "wb") + _dbfh.write(_dbfStream) + _dbfStream.close() + + +if (__name__=='__main__'): + # create a new DBF-File + dbfn=dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + # test new dbf + print("*** created tst.dbf: ***") + dbft = Dbf('tst.dbf', readOnly=0) + print(repr(dbft)) + # add a record + rec=DbfRecord(dbft) + rec['name']='something' + rec['price']=10.5 + rec['date']=(2000,1,12) + rec.store() + # add another record + rec=DbfRecord(dbft) + rec['name']='foo and bar' + rec['price']=12234 + rec['date']=(1992,7,15) + rec.store() + + # show the records + print("*** inserted 2 records into tst.dbf: ***") + print(repr(dbft)) + for i1 in range(len(dbft)): + rec = dbft[i1] + for fldName in dbft.fieldNames: + print('%s:\t %s'%(fldName, rec[fldName])) + print() + dbft.close() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/fields.py b/tablib/packages/dbfpy3/fields.py new file mode 100644 index 0000000..883d035 --- /dev/null +++ b/tablib/packages/dbfpy3/fields.py @@ -0,0 +1,467 @@ +"""DBF fields definitions. + +TODO: + - make memos work +""" +"""History (most recent first): +26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes +05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date +16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point + in the value to select float or integer return type +13-mar-2008 [als] check field name length in constructor +11-feb-2007 [als] handle value conversion errors +10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() +01-dec-2006 [als] Timestamp columns use None for empty values +31-oct-2006 [als] support field types 'F' (float), 'I' (integer) + and 'Y' (currency); + automate export and registration of field classes +04-jul-2006 [als] added export declaration +10-mar-2006 [als] decode empty values for Date and Logical fields; + show field name in errors +10-mar-2006 [als] fix Numeric value decoding: according to spec, + value always is string representation of the number; + ensure that encoded Numeric value fits into the field +20-dec-2005 [yc] use field names in upper case +15-dec-2005 [yc] field definitions moved from `dbf`. +""" + +__version__ = "$Revision: 1.14 $"[11:-2] +__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] + +__all__ = ["lookupFor",] # field classes added at the end of the module + +import datetime +import struct +import sys + +from . import utils + +## abstract definitions + +class DbfFieldDef(object): + """Abstract field definition. + + Child classes must override ``type`` class attribute to provide datatype + infromation of the field definition. For more info about types visit + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + + Also child classes must override ``defaultValue`` field to provide + default value for the field value. + + If child class has fixed length ``length`` class attribute must be + overriden and set to the valid value. None value means, that field + isn't of fixed length. + + Note: ``name`` field must not be changed after instantiation. + + """ + + + __slots__ = ("name", "decimalCount", + "start", "end", "ignoreErrors") + + # length of the field, None in case of variable-length field, + # or a number if this field is a fixed-length field + length = None + + # field type. for more information about fields types visit + # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + # must be overriden in child classes + typeCode = None + + # default value for the field. this field must be + # overriden in child classes + defaultValue = None + + def __init__(self, name, length=None, decimalCount=None, + start=None, stop=None, ignoreErrors=False, + ): + """Initialize instance.""" + assert self.typeCode is not None, "Type code must be overriden" + assert self.defaultValue is not None, "Default value must be overriden" + ## fix arguments + if len(name) >10: + raise ValueError("Field name \"%s\" is too long" % name) + name = str(name).upper() + if self.__class__.length is None: + if length is None: + raise ValueError("[%s] Length isn't specified" % name) + length = int(length) + if length <= 0: + raise ValueError("[%s] Length must be a positive integer" + % name) + else: + length = self.length + if decimalCount is None: + decimalCount = 0 + ## set fields + self.name = name + # FIXME: validate length according to the specification at + # http://www.clicketyclick.dk/databases/xbase/format/data_types.html + self.length = length + self.decimalCount = decimalCount + self.ignoreErrors = ignoreErrors + self.start = start + self.end = stop + + def __cmp__(self, other): + return cmp(self.name, str(other).upper()) + + def __hash__(self): + return hash(self.name) + + def fromString(cls, string, start, ignoreErrors=False): + """Decode dbf field definition from the string data. + + Arguments: + string: + a string, dbf definition is decoded from. length of + the string must be 32 bytes. + start: + position in the database file. + ignoreErrors: + initial error processing mode for the new field (boolean) + + """ + assert len(string) == 32 + _length = string[16] + return cls(utils.unzfill(string)[:11].decode('utf-8'), _length, + string[17], start, start + _length, ignoreErrors=ignoreErrors) + fromString = classmethod(fromString) + + def toString(self): + """Return encoded field definition. + + Return: + Return value is a string object containing encoded + definition of this field. + + """ + if sys.version_info < (2, 4): + # earlier versions did not support padding character + _name = self.name[:11] + "\0" * (11 - len(self.name)) + else: + _name = self.name.ljust(11, '\0') + return ( + _name + + self.typeCode + + #data address + chr(0) * 4 + + chr(self.length) + + chr(self.decimalCount) + + chr(0) * 14 + ) + + def __repr__(self): + return "%-10s %1s %3d %3d" % self.fieldInfo() + + def fieldInfo(self): + """Return field information. + + Return: + Return value is a (name, type, length, decimals) tuple. + + """ + return (self.name, self.typeCode, self.length, self.decimalCount) + + def rawFromRecord(self, record): + """Return a "raw" field value from the record string.""" + return record[self.start:self.end] + + def decodeFromRecord(self, record): + """Return decoded field value from the record string.""" + try: + return self.decodeValue(self.rawFromRecord(record)) + except: + if self.ignoreErrors: + return utils.INVALID_VALUE + else: + raise + + def decodeValue(self, value): + """Return decoded value from string value. + + This method shouldn't be used publicly. It's called from the + `decodeFromRecord` method. + + This is an abstract method and it must be overridden in child classes. + """ + raise NotImplementedError + + def encodeValue(self, value): + """Return str object containing encoded field value. + + This is an abstract method and it must be overriden in child classes. + """ + raise NotImplementedError + +## real classes + +class DbfCharacterFieldDef(DbfFieldDef): + """Definition of the character field.""" + + typeCode = "C" + defaultValue = b'' + + def decodeValue(self, value): + """Return string object. + + Return value is a ``value`` argument with stripped right spaces. + + """ + return value.rstrip(b' ').decode('utf-8') + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``.""" + return str(value)[:self.length].ljust(self.length) + + +class DbfNumericFieldDef(DbfFieldDef): + """Definition of the numeric field.""" + + typeCode = "N" + # XXX: now I'm not sure it was a good idea to make a class field + # `defaultValue` instead of a generic method as it was implemented + # previously -- it's ok with all types except number, cuz + # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. + defaultValue = 0 + + def decodeValue(self, value): + """Return a number decoded from ``value``. + + If decimals is zero, value will be decoded as an integer; + or as a float otherwise. + + Return: + Return value is a int (long) or float instance. + + """ + value = value.strip(b' \0') + if b'.' in value: + # a float (has decimal separator) + return float(value) + elif value: + # must be an integer + return int(value) + else: + return 0 + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + _rv = ("%*.*f" % (self.length, self.decimalCount, value)) + if len(_rv) > self.length: + _ppos = _rv.find(".") + if 0 <= _ppos <= self.length: + _rv = _rv[:self.length] + else: + raise ValueError("[%s] Numeric overflow: %s (field width: %i)" + % (self.name, _rv, self.length)) + return _rv + +class DbfFloatFieldDef(DbfNumericFieldDef): + """Definition of the float field - same as numeric.""" + + typeCode = "F" + +class DbfIntegerFieldDef(DbfFieldDef): + """Definition of the integer field.""" + + typeCode = "I" + length = 4 + defaultValue = 0 + + def decodeValue(self, value): + """Return an integer number decoded from ``value``.""" + return struct.unpack("<i", value)[0] + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<i", int(value)) + +class DbfCurrencyFieldDef(DbfFieldDef): + """Definition of the currency field.""" + + typeCode = "Y" + length = 8 + defaultValue = 0.0 + + def decodeValue(self, value): + """Return float number decoded from ``value``.""" + return struct.unpack("<q", value)[0] / 10000. + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<q", round(value * 10000)) + +class DbfLogicalFieldDef(DbfFieldDef): + """Definition of the logical field.""" + + typeCode = "L" + defaultValue = -1 + length = 1 + + def decodeValue(self, value): + """Return True, False or -1 decoded from ``value``.""" + # Note: value always is 1-char string + if value == "?": + return -1 + if value in "NnFf ": + return False + if value in "YyTt": + return True + raise ValueError("[%s] Invalid logical value %r" % (self.name, value)) + + def encodeValue(self, value): + """Return a character from the "TF?" set. + + Return: + Return value is "T" if ``value`` is True + "?" if value is -1 or False otherwise. + + """ + if value is True: + return "T" + if value == -1: + return "?" + return "F" + + +class DbfMemoFieldDef(DbfFieldDef): + """Definition of the memo field. + + Note: memos aren't currenly completely supported. + + """ + + typeCode = "M" + defaultValue = " " * 10 + length = 10 + + def decodeValue(self, value): + """Return int .dbt block number decoded from the string object.""" + #return int(value) + raise NotImplementedError + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``. + + Note: this is an internal method. + + """ + #return str(value)[:self.length].ljust(self.length) + raise NotImplementedError + + +class DbfDateFieldDef(DbfFieldDef): + """Definition of the date field.""" + + typeCode = "D" + defaultValue = utils.classproperty(lambda cls: datetime.date.today()) + # "yyyymmdd" gives us 8 characters + length = 8 + + def decodeValue(self, value): + """Return a ``datetime.date`` instance decoded from ``value``.""" + if value.strip(): + return utils.getDate(value) + else: + return None + + def encodeValue(self, value): + """Return a string-encoded value. + + ``value`` argument should be a value suitable for the + `utils.getDate` call. + + Return: + Return value is a string in format "yyyymmdd". + + """ + if value: + return utils.getDate(value).strftime("%Y%m%d") + else: + return " " * self.length + + +class DbfDateTimeFieldDef(DbfFieldDef): + """Definition of the timestamp field.""" + + # a difference between JDN (Julian Day Number) + # and GDN (Gregorian Day Number). note, that GDN < JDN + JDN_GDN_DIFF = 1721425 + typeCode = "T" + defaultValue = utils.classproperty(lambda cls: datetime.datetime.now()) + # two 32-bits integers representing JDN and amount of + # milliseconds respectively gives us 8 bytes. + # note, that values must be encoded in LE byteorder. + length = 8 + + def decodeValue(self, value): + """Return a `datetime.datetime` instance.""" + assert len(value) == self.length + # LE byteorder + _jdn, _msecs = struct.unpack("<2I", value) + if _jdn >= 1: + _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) + _rv += datetime.timedelta(0, _msecs / 1000.0) + else: + # empty date + _rv = None + return _rv + + def encodeValue(self, value): + """Return a string-encoded ``value``.""" + if value: + value = utils.getDateTime(value) + # LE byteorder + _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, + (value.hour * 3600 + value.minute * 60 + value.second) * 1000) + else: + _rv = "\0" * self.length + assert len(_rv) == self.length + return _rv + + +_fieldsRegistry = {} + +def registerField(fieldCls): + """Register field definition class. + + ``fieldCls`` should be subclass of the `DbfFieldDef`. + + Use `lookupFor` to retrieve field definition class + by the type code. + + """ + assert fieldCls.typeCode is not None, "Type code isn't defined" + # XXX: use fieldCls.typeCode.upper()? in case of any decign + # don't forget to look to the same comment in ``lookupFor`` method + _fieldsRegistry[fieldCls.typeCode] = fieldCls + + +def lookupFor(typeCode): + """Return field definition class for the given type code. + + ``typeCode`` must be a single character. That type should be + previously registered. + + Use `registerField` to register new field class. + + Return: + Return value is a subclass of the `DbfFieldDef`. + + """ + # XXX: use typeCode.upper()? in case of any decign don't + # forget to look to the same comment in ``registerField`` + return _fieldsRegistry[chr(typeCode)] + +## register generic types + +for (_name, _val) in list(globals().items()): + if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ + and (_name != "DbfFieldDef"): + __all__.append(_name) + registerField(_val) +del _name, _val + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/header.py b/tablib/packages/dbfpy3/header.py new file mode 100644 index 0000000..6c0dc4f --- /dev/null +++ b/tablib/packages/dbfpy3/header.py @@ -0,0 +1,273 @@ +"""DBF header definition. + +TODO: + - handle encoding of the character fields + (encoding information stored in the DBF header) + +""" +"""History (most recent first): +16-sep-2010 [als] fromStream: fix century of the last update field +11-feb-2007 [als] added .ignoreErrors +10-feb-2007 [als] added __getitem__: return field definitions + by field name or field number (zero-based) +04-jul-2006 [als] added export declaration +15-dec-2005 [yc] created +""" + +__version__ = "$Revision: 1.6 $"[11:-2] +__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] + +__all__ = ["DbfHeader"] + +import io +import datetime +import struct +import time +import sys + +from . import fields +from .utils import getDate + + +class DbfHeader(object): + """Dbf header definition. + + For more information about dbf header format visit + `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` + + Examples: + Create an empty dbf header and add some field definitions: + dbfh = DbfHeader() + dbfh.addField(("name", "C", 10)) + dbfh.addField(("date", "D")) + dbfh.addField(DbfNumericFieldDef("price", 5, 2)) + Create a dbf header with field definitions: + dbfh = DbfHeader([ + ("name", "C", 10), + ("date", "D"), + DbfNumericFieldDef("price", 5, 2), + ]) + + """ + + __slots__ = ("signature", "fields", "lastUpdate", "recordLength", + "recordCount", "headerLength", "changed", "_ignore_errors") + + ## instance construction and initialization methods + + def __init__(self, fields=None, headerLength=0, recordLength=0, + recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False, + ): + """Initialize instance. + + Arguments: + fields: + a list of field definitions; + recordLength: + size of the records; + headerLength: + size of the header; + recordCount: + number of records stored in DBF; + signature: + version number (aka signature). using 0x03 as a default meaning + "File without DBT". for more information about this field visit + ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` + lastUpdate: + date of the DBF's update. this could be a string ('yymmdd' or + 'yyyymmdd'), timestamp (int or float), datetime/date value, + a sequence (assuming (yyyy, mm, dd, ...)) or an object having + callable ``ticks`` field. + ignoreErrors: + error processing mode for DBF fields (boolean) + + """ + self.signature = signature + if fields is None: + self.fields = [] + else: + self.fields = list(fields) + self.lastUpdate = getDate(lastUpdate) + self.recordLength = recordLength + self.headerLength = headerLength + self.recordCount = recordCount + self.ignoreErrors = ignoreErrors + # XXX: I'm not sure this is safe to + # initialize `self.changed` in this way + self.changed = bool(self.fields) + + # @classmethod + def fromString(cls, string): + """Return header instance from the string object.""" + return cls.fromStream(io.StringIO(str(string))) + fromString = classmethod(fromString) + + # @classmethod + def fromStream(cls, stream): + """Return header object from the stream.""" + stream.seek(0) + first_32 = stream.read(32) + if type(first_32) != bytes: + _data = bytes(first_32, sys.getfilesystemencoding()) + _data = first_32 + (_cnt, _hdrLen, _recLen) = struct.unpack("<I2H", _data[4:12]) + #reserved = _data[12:32] + _year = _data[1] + if _year < 80: + # dBase II started at 1980. It is quite unlikely + # that actual last update date is before that year. + _year += 2000 + else: + _year += 1900 + ## create header object + _obj = cls(None, _hdrLen, _recLen, _cnt, _data[0], + (_year, _data[2], _data[3])) + ## append field definitions + # position 0 is for the deletion flag + _pos = 1 + _data = stream.read(1) + while _data != b'\r': + _data += stream.read(31) + _fld = fields.lookupFor(_data[11]).fromString(_data, _pos) + _obj._addField(_fld) + _pos = _fld.end + _data = stream.read(1) + return _obj + fromStream = classmethod(fromStream) + + ## properties + + year = property(lambda self: self.lastUpdate.year) + month = property(lambda self: self.lastUpdate.month) + day = property(lambda self: self.lastUpdate.day) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on self and all fields""" + self._ignore_errors = value = bool(value) + for _field in self.fields: + _field.ignoreErrors = value + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## object representation + + def __repr__(self): + _rv = """\ +Version (signature): 0x%02x + Last update: %s + Header length: %d + Record length: %d + Record count: %d + FieldName Type Len Dec +""" % (self.signature, self.lastUpdate, self.headerLength, + self.recordLength, self.recordCount) + _rv += "\n".join( + ["%10s %4s %3s %3s" % _fld.fieldInfo() for _fld in self.fields] + ) + return _rv + + ## internal methods + + def _addField(self, *defs): + """Internal variant of the `addField` method. + + This method doesn't set `self.changed` field to True. + + Return value is a length of the appended records. + Note: this method doesn't modify ``recordLength`` and + ``headerLength`` fields. Use `addField` instead of this + method if you don't exactly know what you're doing. + + """ + # insure we have dbf.DbfFieldDef instances first (instantiation + # from the tuple could raise an error, in such a case I don't + # wanna add any of the definitions -- all will be ignored) + _defs = [] + _recordLength = 0 + for _def in defs: + if isinstance(_def, fields.DbfFieldDef): + _obj = _def + else: + (_name, _type, _len, _dec) = (tuple(_def) + (None,) * 4)[:4] + _cls = fields.lookupFor(_type) + _obj = _cls(_name, _len, _dec, + ignoreErrors=self._ignore_errors) + _recordLength += _obj.length + _defs.append(_obj) + # and now extend field definitions and + # update record length + self.fields += _defs + return _recordLength + + ## interface methods + + def addField(self, *defs): + """Add field definition to the header. + + Examples: + dbfh.addField( + ("name", "C", 20), + dbf.DbfCharacterFieldDef("surname", 20), + dbf.DbfDateFieldDef("birthdate"), + ("member", "L"), + ) + dbfh.addField(("price", "N", 5, 2)) + dbfh.addField(dbf.DbfNumericFieldDef("origprice", 5, 2)) + + """ + _oldLen = self.recordLength + self.recordLength += self._addField(*defs) + if not _oldLen: + self.recordLength += 1 + # XXX: may be just use: + # self.recordeLength += self._addField(*defs) + bool(not _oldLen) + # recalculate headerLength + self.headerLength = 32 + (32 * len(self.fields)) + 1 + self.changed = True + + def write(self, stream): + """Encode and write header to the stream.""" + stream.seek(0) + stream.write(self.toString()) + fields = [_fld.toString() for _fld in self.fields] + stream.write(''.join(fields).encode(sys.getfilesystemencoding())) + stream.write(b'\x0D') # cr at end of all header data + self.changed = False + + def toString(self): + """Returned 32 chars length string with encoded header.""" + return struct.pack("<4BI2H", + self.signature, + self.year - 1900, + self.month, + self.day, + self.recordCount, + self.headerLength, + self.recordLength) + (b'\x00' * 20) + #TODO: figure out if bytes(utf-8) is correct here. + + def setCurrentDate(self): + """Update ``self.lastUpdate`` field with current date value.""" + self.lastUpdate = datetime.date.today() + + def __getitem__(self, item): + """Return a field definition by numeric index or name string""" + if isinstance(item, str): + _name = item.upper() + for _field in self.fields: + if _field.name == _name: + return _field + else: + raise KeyError(item) + else: + # item must be field index + return self.fields[item] + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/record.py b/tablib/packages/dbfpy3/record.py new file mode 100644 index 0000000..73b6952 --- /dev/null +++ b/tablib/packages/dbfpy3/record.py @@ -0,0 +1,266 @@ +"""DBF record definition. + +""" +"""History (most recent first): +11-feb-2007 [als] __repr__: added special case for invalid field values +10-feb-2007 [als] added .rawFromStream() +30-oct-2006 [als] fix record length in .fromStream() +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] DbfRecord.write() -> DbfRecord._write(); + added delete() method. +16-dec-2005 [yc] record definition moved from `dbf`. +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2] + +__all__ = ["DbfRecord"] + +import sys + +from . import utils + +class DbfRecord(object): + """DBF record. + + Instances of this class shouldn't be created manualy, + use `dbf.Dbf.newRecord` instead. + + Class implements mapping/sequence interface, so + fields could be accessed via their names or indexes + (names is a preffered way to access fields). + + Hint: + Use `store` method to save modified record. + + Examples: + Add new record to the database: + db = Dbf(filename) + rec = db.newRecord() + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + Or the same, but modify existed + (second in this case) record: + db = Dbf(filename) + rec = db[2] + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + + """ + + __slots__ = "dbf", "index", "deleted", "fieldData" + + ## creation and initialization + + def __init__(self, dbf, index=None, deleted=False, data=None): + """Instance initialiation. + + Arguments: + dbf: + A `Dbf.Dbf` instance this record belonogs to. + index: + An integer record index or None. If this value is + None, record will be appended to the DBF. + deleted: + Boolean flag indicating whether this record + is a deleted record. + data: + A sequence or None. This is a data of the fields. + If this argument is None, default values will be used. + + """ + self.dbf = dbf + # XXX: I'm not sure ``index`` is necessary + self.index = index + self.deleted = deleted + if data is None: + self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] + else: + self.fieldData = list(data) + + # XXX: validate self.index before calculating position? + position = property(lambda self: self.dbf.header.headerLength + \ + self.index * self.dbf.header.recordLength) + + def rawFromStream(cls, dbf, index): + """Return raw record contents read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance containing the record. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is a string containing record data in DBF format. + + """ + # XXX: may be write smth assuming, that current stream + # position is the required one? it could save some + # time required to calculate where to seek in the file + dbf.stream.seek(dbf.header.headerLength + + index * dbf.header.recordLength) + return dbf.stream.read(dbf.header.recordLength) + rawFromStream = classmethod(rawFromStream) + + def fromStream(cls, dbf, index): + """Return a record read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is an instance of the current class. + + """ + return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) + fromStream = classmethod(fromStream) + + def fromString(cls, dbf, string, index=None): + """Return record read from the string object. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + string: + A string new record should be created from. + index: + Index of the record in the container. If this + argument is None, record will be appended. + + Return value is an instance of the current class. + + """ + return cls(dbf, index, string[0]=="*", + [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) + fromString = classmethod(fromString) + + ## object representation + + def __repr__(self): + _template = "%%%ds: %%s (%%s)" % max([len(_fld) + for _fld in self.dbf.fieldNames]) + _rv = [] + for _fld in self.dbf.fieldNames: + _val = self[_fld] + if _val is utils.INVALID_VALUE: + _rv.append(_template % + (_fld, "None", "value cannot be decoded")) + else: + _rv.append(_template % (_fld, _val, type(_val))) + return "\n".join(_rv) + + ## protected methods + + def _write(self): + """Write data to the dbf stream. + + Note: + This isn't a public method, it's better to + use 'store' instead publically. + Be design ``_write`` method should be called + only from the `Dbf` instance. + + + """ + self._validateIndex(False) + self.dbf.stream.seek(self.position) + self.dbf.stream.write(bytes(self.toString(), + sys.getfilesystemencoding())) + # FIXME: may be move this write somewhere else? + # why we should check this condition for each record? + if self.index == len(self.dbf): + # this is the last record, + # we should write SUB (ASCII 26) + self.dbf.stream.write(b"\x1A") + + ## utility methods + + def _validateIndex(self, allowUndefined=True, checkRange=False): + """Valid ``self.index`` value. + + If ``allowUndefined`` argument is True functions does nothing + in case of ``self.index`` pointing to None object. + + """ + if self.index is None: + if not allowUndefined: + raise ValueError("Index is undefined") + elif self.index < 0: + raise ValueError("Index can't be negative (%s)" % self.index) + elif checkRange and self.index <= self.dbf.header.recordCount: + raise ValueError("There are only %d records in the DBF" % + self.dbf.header.recordCount) + + ## interface methods + + def store(self): + """Store current record in the DBF. + + If ``self.index`` is None, this record will be appended to the + records of the DBF this records belongs to; or replaced otherwise. + + """ + self._validateIndex() + if self.index is None: + self.index = len(self.dbf) + self.dbf.append(self) + else: + self.dbf[self.index] = self + + def delete(self): + """Mark method as deleted.""" + self.deleted = True + + def toString(self): + """Return string packed record values.""" +# for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData): +# + + return "".join([" *"[self.deleted]] + [ + _def.encodeValue(_dat) + for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData) + ]) + + def asList(self): + """Return a flat list of fields. + + Note: + Change of the list's values won't change + real values stored in this object. + + """ + return self.fieldData[:] + + def asDict(self): + """Return a dictionary of fields. + + Note: + Change of the dicts's values won't change + real values stored in this object. + + """ + return dict([_i for _i in zip(self.dbf.fieldNames, self.fieldData)]) + + def __getitem__(self, key): + """Return value by field name or field index.""" + if isinstance(key, int): + # integer index of the field + return self.fieldData[key] + # assuming string field name + return self.fieldData[self.dbf.indexOfFieldName(key)] + + def __setitem__(self, key, value): + """Set field value by integer index of the field or string name.""" + if isinstance(key, int): + # integer index of the field + return self.fieldData[key] + # assuming string field name + self.fieldData[self.dbf.indexOfFieldName(key)] = value + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/utils.py b/tablib/packages/dbfpy3/utils.py new file mode 100644 index 0000000..856ade8 --- /dev/null +++ b/tablib/packages/dbfpy3/utils.py @@ -0,0 +1,170 @@ +"""String utilities. + +TODO: + - allow strings in getDateTime routine; +""" +"""History (most recent first): +11-feb-2007 [als] added INVALID_VALUE +10-feb-2007 [als] allow date strings padded with spaces instead of zeroes +20-dec-2005 [yc] handle long objects in getDate/getDateTime +16-dec-2005 [yc] created from ``strutil`` module. +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] + +import datetime +import time + + +def unzfill(str): + """Return a string without ASCII NULs. + + This function searchers for the first NUL (ASCII 0) occurance + and truncates string till that position. + + """ + try: + return str[:str.index(b'\0')] + except ValueError: + return str + + +def getDate(date=None): + """Return `datetime.date` instance. + + Type of the ``date`` argument could be one of the following: + None: + use current date value; + datetime.date: + this value will be returned; + datetime.datetime: + the result of the date.date() will be returned; + string: + assuming "%Y%m%d" or "%y%m%dd" format; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``date`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if date is None: + # use current value + return datetime.date.today() + if isinstance(date, datetime.date): + return date + if isinstance(date, datetime.datetime): + return date.date() + if isinstance(date, (int, float)): + # date is a timestamp + return datetime.date.fromtimestamp(date) + if isinstance(date, str): + date = date.replace(" ", "0") + if len(date) == 6: + # yymmdd + return datetime.date(*time.strptime(date, "%y%m%d")[:3]) + # yyyymmdd + return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) + if hasattr(date, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.date(*date[:3]) + return datetime.date.fromtimestamp(date.ticks()) + + +def getDateTime(value=None): + """Return `datetime.datetime` instance. + + Type of the ``value`` argument could be one of the following: + None: + use current date value; + datetime.date: + result will be converted to the `datetime.datetime` instance + using midnight; + datetime.datetime: + ``value`` will be returned as is; + string: + *** CURRENTLY NOT SUPPORTED ***; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``value`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if value is None: + # use current value + return datetime.datetime.today() + if isinstance(value, datetime.datetime): + return value + if isinstance(value, datetime.date): + return datetime.datetime.fromordinal(value.toordinal()) + if isinstance(value, (int, float)): + # value is a timestamp + return datetime.datetime.fromtimestamp(value) + if isinstance(value, str): + raise NotImplementedError("Strings aren't currently implemented") + if hasattr(value, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.datetime(*tuple(value)[:6]) + return datetime.datetime.fromtimestamp(value.ticks()) + + +class classproperty(property): + """Works in the same way as a ``property``, but for the classes.""" + + def __get__(self, obj, cls): + return self.fget(cls) + + +class _InvalidValue(object): + + """Value returned from DBF records when field validation fails + + The value is not equal to anything except for itself + and equal to all empty values: None, 0, empty string etc. + In other words, invalid value is equal to None and not equal + to None at the same time. + + This value yields zero upon explicit conversion to a number type, + empty string for string types, and False for boolean. + + """ + + def __eq__(self, other): + return not other + + def __ne__(self, other): + return not (other is self) + + def __bool__(self): + return False + + def __int__(self): + return 0 + __long__ = __int__ + + def __float__(self): + return 0.0 + + def __str__(self): + return "" + + def __unicode__(self): + return "" + + def __repr__(self): + return "<INVALID>" + +# invalid value is a constant singleton +INVALID_VALUE = _InvalidValue() + +# vim: set et sts=4 sw=4 : |
