diff options
| author | Adam Hupp <adam@hupp.org> | 2010-03-31 15:50:55 -0700 |
|---|---|---|
| committer | Adam Hupp <adam@hupp.org> | 2010-03-31 15:50:55 -0700 |
| commit | 7db98f44bccc77c3adaef638b7765aaf0f6230ae (patch) | |
| tree | 31a9c66e495a8cf3b36a2adf3d861a45adc9045b | |
| download | python-magic-7db98f44bccc77c3adaef638b7765aaf0f6230ae.tar.gz | |
initial commit
| -rw-r--r-- | README | 36 | ||||
| -rw-r--r-- | magic.py | 207 | ||||
| -rw-r--r-- | setup.py | 17 | ||||
| -rw-r--r-- | test.py | 51 | ||||
| -rw-r--r-- | testdata/magic.pyc | bin | 0 -> 1797 bytes | |||
| -rw-r--r-- | testdata/test.gz | bin | 0 -> 40 bytes | |||
| -rw-r--r-- | testdata/test.pdf | 199 | ||||
| -rw-r--r-- | testdata/text.txt | 2 |
8 files changed, 512 insertions, 0 deletions
@@ -0,0 +1,36 @@ + += python-magic = + +Adam Hupp <adam at hupp.org> + +Distributed under the PSF License: http://www.python.org/psf/license/ + +python-magic is a simple wrapper for libmagic. libmagic identifies +file types according to their headers. It is the core of the Unix +"file" command. + + += Installation = + +This module depends on libmagic to run. It needs swig and +libmagic-dev to build. + +To build and install run: + +# python setup.py install + + += Example Usage = + +>>> import magic +>>> m = magic.Magic() +>>> m.from_file("testdata/test.pdf") +'PDF document, version 1.2' +>>> m.from_buffer(open("testdata/test.pdf").read(1024)) +'PDF document, version 1.2' + +# For MIME types +>>> mime = magic.Magic(mime=True) +>>> mime.from_file("testdata/test.pdf") +'application/pdf' +>>> diff --git a/magic.py b/magic.py new file mode 100644 index 0000000..60b999b --- /dev/null +++ b/magic.py @@ -0,0 +1,207 @@ +""" +magic is a wrapper around the libmagic file identification library. + +See README for more information. + +Usage: + +>>> import magic +>>> magic.from_file("testdata/test.pdf") +'PDF document, version 1.2' +>>> magic.from_file("testdata/test.pdf", mime=True) +'application/pdf' +>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) +'PDF document, version 1.2' +>>> + + +""" + +import os.path +import ctypes +import ctypes.util + +from ctypes import c_char_p, c_int, c_size_t, c_void_p + +class MagicException(Exception): pass + +class Magic: + """ + Magic is a wrapper around the libmagic C library. + + """ + + def __init__(self, mime=False, magic_file=None): + """ + Create a new libmagic wrapper. + + mime - if True, mimetypes are returned instead of textual descriptions + magic_file - use a mime database other than the system default + + """ + flags = MAGIC_NONE + if mime: + flags |= MAGIC_MIME + + self.cookie = magic_open(flags) + + magic_load(self.cookie, magic_file) + + + def from_buffer(self, buf): + """ + Identify the contents of `buf` + """ + return magic_buffer(self.cookie, buf) + + def from_file(self, filename): + """ + Identify the contents of file `filename` + raises IOError if the file does not exist + """ + + if not os.path.exists(filename): + raise IOError("File does not exist: " + filename) + + return magic_file(self.cookie, filename) + + def __del__(self): + try: + magic_close(self.cookie) + except Exception, e: + print "got thig: ", e + + +_magic_mime = None +_magic = None + +def _get_magic_mime(): + global _magic_mime + if not _magic_mime: + _magic_mime = Magic(mime=True) + return _magic_mime + +def _get_magic(): + global _magic + if not _magic: + _magic = Magic() + return _magic + +def _get_magic_type(mime): + if mime: + return _get_magic_mime() + else: + return _get_magic() + +def from_file(filename, mime=False): + m = _get_magic_type(mime) + return m.from_file(filename) + +def from_buffer(buffer, mime=False): + m = _get_magic_type(mime) + return m.from_buffer(buffer) + + + + +libmagic = ctypes.CDLL(ctypes.util.find_library('magic')) + +magic_t = ctypes.c_void_p + +def errorcheck(result, func, args): + err = magic_error(args[0]) + if err is not None: + raise MagicException(err) + else: + return result + +magic_open = libmagic.magic_open +magic_open.restype = magic_t +magic_open.argtypes = [c_int] + +magic_close = libmagic.magic_close +magic_close.restype = None +magic_close.argtypes = [magic_t] + +magic_error = libmagic.magic_error +magic_error.restype = c_char_p +magic_error.argtypes = [magic_t] + +magic_errno = libmagic.magic_errno +magic_errno.restype = c_int +magic_errno.argtypes = [magic_t] + +magic_file = libmagic.magic_file +magic_file.restype = c_char_p +magic_file.argtypes = [magic_t, c_char_p] +magic_file.errcheck = errorcheck + + +_magic_buffer = libmagic.magic_buffer +_magic_buffer.restype = c_char_p +_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] +_magic_buffer.errcheck = errorcheck + + +def magic_buffer(cookie, buf): + return _magic_buffer(cookie, buf, len(buf)) + + +magic_load = libmagic.magic_load +magic_load.restype = c_int +magic_load.argtypes = [magic_t, c_char_p] +magic_load.errcheck = errorcheck + +magic_setflags = libmagic.magic_setflags +magic_setflags.restype = c_int +magic_setflags.argtypes = [magic_t, c_int] + +magic_check = libmagic.magic_check +magic_check.restype = c_int +magic_check.argtypes = [magic_t, c_char_p] + +magic_compile = libmagic.magic_compile +magic_compile.restype = c_int +magic_compile.argtypes = [magic_t, c_char_p] + + + +MAGIC_NONE = 0x000000 # No flags + +MAGIC_DEBUG = 0x000001 # Turn on debugging + +MAGIC_SYMLINK = 0x000002 # Follow symlinks + +MAGIC_COMPRESS = 0x000004 # Check inside compressed files + +MAGIC_DEVICES = 0x000008 # Look at the contents of devices + +MAGIC_MIME = 0x000010 # Return a mime string + +MAGIC_CONTINUE = 0x000020 # Return all matches + +MAGIC_CHECK = 0x000040 # Print warnings to stderr + +MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit + +MAGIC_RAW = 0x000100 # Don't translate unprintable chars + +MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors + +MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files + +MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files + +MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries + +MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type + +MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details + +MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files + +MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff + +MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran + +MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..22dda6f --- /dev/null +++ b/setup.py @@ -0,0 +1,17 @@ +from setuptools import setup, Extension +#from distutils.core import setup, Extension + +setup(name='python-magic', + description='File type identification using libmagic', + author='Adam Hupp', + author_email='adam@hupp.org', + url="http://hupp.org/adam/hg/python-magic", + version='0.1', + py_modules=['magic'], + long_description="""This module uses ctypes to access the libmagic file type +identification library. It makes use of the local magic database and +supports both textual and MIME-type output. +""", + keywords="mime magic file", + license="PSF", + ) @@ -0,0 +1,51 @@ + +import os.path +import unittest +import random +from StringIO import StringIO +from os import path +from magic import Magic, MagicException + +testfile = [ + ("magic.pyc", "python 2.4 byte-compiled", "application/octet-stream"), + ("test.pdf", "PDF document, version 1.2", "application/pdf"), + ("test.gz", 'gzip compressed data, was "test", from Unix, last modified: ' + 'Sat Jun 28 18:32:52 2008', "application/x-gzip"), + ("text.txt", "ASCII text", "text/plain; charset=us-ascii"), + ] + + +class TestMagic(unittest.TestCase): + + mime = False + + def setUp(self): + self.m = Magic(mime=self.mime) + + def testFileTypes(self): + for filename, desc, mime in testfile: + filename = path.join(path.dirname(__file__), + "testdata", + filename) + if self.mime: + target = mime + else: + target = desc + + self.assertEqual(target, self.m.from_buffer(open(filename).read(1024))) + self.assertEqual(target, self.m.from_file(filename), filename) + + + def testErrors(self): + self.assertRaises(IOError, self.m.from_file, "nonexistent") + self.assertRaises(MagicException, Magic, magic_file="noneexistent") + os.environ['MAGIC'] = '/nonexistetn' + self.assertRaises(MagicException, Magic) + del os.environ['MAGIC'] + +class TestMagicMime(TestMagic): + mime = True + +if __name__ == '__main__': + unittest.main() + diff --git a/testdata/magic.pyc b/testdata/magic.pyc Binary files differnew file mode 100644 index 0000000..ba801e0 --- /dev/null +++ b/testdata/magic.pyc diff --git a/testdata/test.gz b/testdata/test.gz Binary files differnew file mode 100644 index 0000000..5d847dd --- /dev/null +++ b/testdata/test.gz diff --git a/testdata/test.pdf b/testdata/test.pdf new file mode 100644 index 0000000..b986617 --- /dev/null +++ b/testdata/test.pdf @@ -0,0 +1,199 @@ +%PDF-1.2 +7 0 obj +[5 0 R/XYZ 111.6 757.86] +endobj +13 0 obj +<< +/Title(About this document) +/A<< +/S/GoTo +/D(subsection.1.1) +>> +/Parent 12 0 R +/Next 14 0 R +>> +endobj +15 0 obj +<< +/Title(Compiling with GHC) +/A<< +/S/GoTo +/D(subsubsection.1.2.1) +>> +/Parent 14 0 R +/Next 16 0 R +>> +endobj +16 0 obj +<< +/Title(Compiling with Hugs) +/A<< +/S/GoTo +/D(subsubsection.1.2.2) +>> +/Parent 14 0 R +/Prev 15 0 R +>> +endobj +14 0 obj +<< +/Title(Compatibility) +/A<< +/S/GoTo +/D(subsection.1.2) +>> +/Parent 12 0 R +/Prev 13 0 R +/First 15 0 R +/Last 16 0 R +/Count -2 +/Next 17 0 R +>> +endobj +17 0 obj +<< +/Title(Reporting bugs) +/A<< +/S/GoTo +/D(subsection.1.3) +>> +/Parent 12 0 R +/Prev 14 0 R +/Next 18 0 R +>> +endobj +18 0 obj +<< +/Title(History) +/A<< +/S/GoTo +/D(subsection.1.4) +>> +/Parent 12 0 R +/Prev 17 0 R +/Next 19 0 R +>> +endobj +19 0 obj +<< +/Title(License) +/A<< +/S/GoTo +/D(subsection.1.5) +>> +/Parent 12 0 R +/Prev 18 0 R +>> +endobj +12 0 obj +<< +/Title(Introduction) +/A<< +/S/GoTo +/D(section.1) +>> +/Parent 11 0 R +/First 13 0 R +/Last 19 0 R +/Count -5 +/Next 20 0 R +>> +endobj +21 0 obj +<< +/Title(Running a parser) +/A<< +/S/GoTo +/D(subsection.2.1) +>> +/Parent 20 0 R +/Next 22 0 R +>> +endobj +22 0 obj +<< +/Title(Sequence and choice) +/A<< +/S/GoTo +/D(subsection.2.2) +>> +/Parent 20 0 R +/Prev 21 0 R +/Next 23 0 R +>> +endobj +23 0 obj +<< +/Title(Predictive parsers) +/A<< +/S/GoTo +/D(subsection.2.3) +>> +/Parent 20 0 R +/Prev 22 0 R +/Next 24 0 R +>> +endobj +24 0 obj +<< +/Title(Adding semantics) +/A<< +/S/GoTo +/D(subsection.2.4) +>> +/Parent 20 0 R +/Prev 23 0 R +/Next 25 0 R +>> +endobj +25 0 obj +<< +/Title(Sequences and seperators) +/A<< +/S/GoTo +/D(subsection.2.5) +>> +/Parent 20 0 R +/Prev 24 0 R +/Next 26 0 R +>> +endobj +26 0 obj +<< +/Title(Improving error messages) +/A<< +/S/GoTo +/D(subsection.2.6) +>> +/Parent 20 0 R +/Prev 25 0 R +/Next 27 0 R +>> +endobj +27 0 obj +<< +/Title(Expressions) +/A<< +/S/GoTo +/D(subsection.2.7) +>> +/Parent 20 0 R +/Prev 26 0 R +/Next 28 0 R +>> +endobj +28 0 obj +<< +/Title(Lexical analysis) +/A<< +/S/GoTo +/D(subsection.2.8) +>> +/Parent 20 0 R +/Prev 27 0 R +/Next 29 0 R +>> +endobj +30 0 obj +<< +/Title(Lexeme parsers
\ No newline at end of file diff --git a/testdata/text.txt b/testdata/text.txt new file mode 100644 index 0000000..476f506 --- /dev/null +++ b/testdata/text.txt @@ -0,0 +1,2 @@ +Hello, World! + |
