summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Hupp <adam@hupp.org>2010-03-31 15:50:55 -0700
committerAdam Hupp <adam@hupp.org>2010-03-31 15:50:55 -0700
commit7db98f44bccc77c3adaef638b7765aaf0f6230ae (patch)
tree31a9c66e495a8cf3b36a2adf3d861a45adc9045b
downloadpython-magic-7db98f44bccc77c3adaef638b7765aaf0f6230ae.tar.gz
initial commit
-rw-r--r--README36
-rw-r--r--magic.py207
-rw-r--r--setup.py17
-rw-r--r--test.py51
-rw-r--r--testdata/magic.pycbin0 -> 1797 bytes
-rw-r--r--testdata/test.gzbin0 -> 40 bytes
-rw-r--r--testdata/test.pdf199
-rw-r--r--testdata/text.txt2
8 files changed, 512 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..19dc116
--- /dev/null
+++ b/README
@@ -0,0 +1,36 @@
+
+= python-magic =
+
+Adam Hupp <adam at hupp.org>
+
+Distributed under the PSF License: http://www.python.org/psf/license/
+
+python-magic is a simple wrapper for libmagic. libmagic identifies
+file types according to their headers. It is the core of the Unix
+"file" command.
+
+
+= Installation =
+
+This module depends on libmagic to run. It needs swig and
+libmagic-dev to build.
+
+To build and install run:
+
+# python setup.py install
+
+
+= Example Usage =
+
+>>> import magic
+>>> m = magic.Magic()
+>>> m.from_file("testdata/test.pdf")
+'PDF document, version 1.2'
+>>> m.from_buffer(open("testdata/test.pdf").read(1024))
+'PDF document, version 1.2'
+
+# For MIME types
+>>> mime = magic.Magic(mime=True)
+>>> mime.from_file("testdata/test.pdf")
+'application/pdf'
+>>>
diff --git a/magic.py b/magic.py
new file mode 100644
index 0000000..60b999b
--- /dev/null
+++ b/magic.py
@@ -0,0 +1,207 @@
+"""
+magic is a wrapper around the libmagic file identification library.
+
+See README for more information.
+
+Usage:
+
+>>> import magic
+>>> magic.from_file("testdata/test.pdf")
+'PDF document, version 1.2'
+>>> magic.from_file("testdata/test.pdf", mime=True)
+'application/pdf'
+>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
+'PDF document, version 1.2'
+>>>
+
+
+"""
+
+import os.path
+import ctypes
+import ctypes.util
+
+from ctypes import c_char_p, c_int, c_size_t, c_void_p
+
+class MagicException(Exception): pass
+
+class Magic:
+ """
+ Magic is a wrapper around the libmagic C library.
+
+ """
+
+ def __init__(self, mime=False, magic_file=None):
+ """
+ Create a new libmagic wrapper.
+
+ mime - if True, mimetypes are returned instead of textual descriptions
+ magic_file - use a mime database other than the system default
+
+ """
+ flags = MAGIC_NONE
+ if mime:
+ flags |= MAGIC_MIME
+
+ self.cookie = magic_open(flags)
+
+ magic_load(self.cookie, magic_file)
+
+
+ def from_buffer(self, buf):
+ """
+ Identify the contents of `buf`
+ """
+ return magic_buffer(self.cookie, buf)
+
+ def from_file(self, filename):
+ """
+ Identify the contents of file `filename`
+ raises IOError if the file does not exist
+ """
+
+ if not os.path.exists(filename):
+ raise IOError("File does not exist: " + filename)
+
+ return magic_file(self.cookie, filename)
+
+ def __del__(self):
+ try:
+ magic_close(self.cookie)
+ except Exception, e:
+ print "got thig: ", e
+
+
+_magic_mime = None
+_magic = None
+
+def _get_magic_mime():
+ global _magic_mime
+ if not _magic_mime:
+ _magic_mime = Magic(mime=True)
+ return _magic_mime
+
+def _get_magic():
+ global _magic
+ if not _magic:
+ _magic = Magic()
+ return _magic
+
+def _get_magic_type(mime):
+ if mime:
+ return _get_magic_mime()
+ else:
+ return _get_magic()
+
+def from_file(filename, mime=False):
+ m = _get_magic_type(mime)
+ return m.from_file(filename)
+
+def from_buffer(buffer, mime=False):
+ m = _get_magic_type(mime)
+ return m.from_buffer(buffer)
+
+
+
+
+libmagic = ctypes.CDLL(ctypes.util.find_library('magic'))
+
+magic_t = ctypes.c_void_p
+
+def errorcheck(result, func, args):
+ err = magic_error(args[0])
+ if err is not None:
+ raise MagicException(err)
+ else:
+ return result
+
+magic_open = libmagic.magic_open
+magic_open.restype = magic_t
+magic_open.argtypes = [c_int]
+
+magic_close = libmagic.magic_close
+magic_close.restype = None
+magic_close.argtypes = [magic_t]
+
+magic_error = libmagic.magic_error
+magic_error.restype = c_char_p
+magic_error.argtypes = [magic_t]
+
+magic_errno = libmagic.magic_errno
+magic_errno.restype = c_int
+magic_errno.argtypes = [magic_t]
+
+magic_file = libmagic.magic_file
+magic_file.restype = c_char_p
+magic_file.argtypes = [magic_t, c_char_p]
+magic_file.errcheck = errorcheck
+
+
+_magic_buffer = libmagic.magic_buffer
+_magic_buffer.restype = c_char_p
+_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
+_magic_buffer.errcheck = errorcheck
+
+
+def magic_buffer(cookie, buf):
+ return _magic_buffer(cookie, buf, len(buf))
+
+
+magic_load = libmagic.magic_load
+magic_load.restype = c_int
+magic_load.argtypes = [magic_t, c_char_p]
+magic_load.errcheck = errorcheck
+
+magic_setflags = libmagic.magic_setflags
+magic_setflags.restype = c_int
+magic_setflags.argtypes = [magic_t, c_int]
+
+magic_check = libmagic.magic_check
+magic_check.restype = c_int
+magic_check.argtypes = [magic_t, c_char_p]
+
+magic_compile = libmagic.magic_compile
+magic_compile.restype = c_int
+magic_compile.argtypes = [magic_t, c_char_p]
+
+
+
+MAGIC_NONE = 0x000000 # No flags
+
+MAGIC_DEBUG = 0x000001 # Turn on debugging
+
+MAGIC_SYMLINK = 0x000002 # Follow symlinks
+
+MAGIC_COMPRESS = 0x000004 # Check inside compressed files
+
+MAGIC_DEVICES = 0x000008 # Look at the contents of devices
+
+MAGIC_MIME = 0x000010 # Return a mime string
+
+MAGIC_CONTINUE = 0x000020 # Return all matches
+
+MAGIC_CHECK = 0x000040 # Print warnings to stderr
+
+MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
+
+MAGIC_RAW = 0x000100 # Don't translate unprintable chars
+
+MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
+
+MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
+
+MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
+
+MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
+
+MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
+
+MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
+
+MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
+
+MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
+
+MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
+
+MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..22dda6f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,17 @@
+from setuptools import setup, Extension
+#from distutils.core import setup, Extension
+
+setup(name='python-magic',
+ description='File type identification using libmagic',
+ author='Adam Hupp',
+ author_email='adam@hupp.org',
+ url="http://hupp.org/adam/hg/python-magic",
+ version='0.1',
+ py_modules=['magic'],
+ long_description="""This module uses ctypes to access the libmagic file type
+identification library. It makes use of the local magic database and
+supports both textual and MIME-type output.
+""",
+ keywords="mime magic file",
+ license="PSF",
+ )
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..0d5140e
--- /dev/null
+++ b/test.py
@@ -0,0 +1,51 @@
+
+import os.path
+import unittest
+import random
+from StringIO import StringIO
+from os import path
+from magic import Magic, MagicException
+
+testfile = [
+ ("magic.pyc", "python 2.4 byte-compiled", "application/octet-stream"),
+ ("test.pdf", "PDF document, version 1.2", "application/pdf"),
+ ("test.gz", 'gzip compressed data, was "test", from Unix, last modified: '
+ 'Sat Jun 28 18:32:52 2008', "application/x-gzip"),
+ ("text.txt", "ASCII text", "text/plain; charset=us-ascii"),
+ ]
+
+
+class TestMagic(unittest.TestCase):
+
+ mime = False
+
+ def setUp(self):
+ self.m = Magic(mime=self.mime)
+
+ def testFileTypes(self):
+ for filename, desc, mime in testfile:
+ filename = path.join(path.dirname(__file__),
+ "testdata",
+ filename)
+ if self.mime:
+ target = mime
+ else:
+ target = desc
+
+ self.assertEqual(target, self.m.from_buffer(open(filename).read(1024)))
+ self.assertEqual(target, self.m.from_file(filename), filename)
+
+
+ def testErrors(self):
+ self.assertRaises(IOError, self.m.from_file, "nonexistent")
+ self.assertRaises(MagicException, Magic, magic_file="noneexistent")
+ os.environ['MAGIC'] = '/nonexistetn'
+ self.assertRaises(MagicException, Magic)
+ del os.environ['MAGIC']
+
+class TestMagicMime(TestMagic):
+ mime = True
+
+if __name__ == '__main__':
+ unittest.main()
+
diff --git a/testdata/magic.pyc b/testdata/magic.pyc
new file mode 100644
index 0000000..ba801e0
--- /dev/null
+++ b/testdata/magic.pyc
Binary files differ
diff --git a/testdata/test.gz b/testdata/test.gz
new file mode 100644
index 0000000..5d847dd
--- /dev/null
+++ b/testdata/test.gz
Binary files differ
diff --git a/testdata/test.pdf b/testdata/test.pdf
new file mode 100644
index 0000000..b986617
--- /dev/null
+++ b/testdata/test.pdf
@@ -0,0 +1,199 @@
+%PDF-1.2
+7 0 obj
+[5 0 R/XYZ 111.6 757.86]
+endobj
+13 0 obj
+<<
+/Title(About this document)
+/A<<
+/S/GoTo
+/D(subsection.1.1)
+>>
+/Parent 12 0 R
+/Next 14 0 R
+>>
+endobj
+15 0 obj
+<<
+/Title(Compiling with GHC)
+/A<<
+/S/GoTo
+/D(subsubsection.1.2.1)
+>>
+/Parent 14 0 R
+/Next 16 0 R
+>>
+endobj
+16 0 obj
+<<
+/Title(Compiling with Hugs)
+/A<<
+/S/GoTo
+/D(subsubsection.1.2.2)
+>>
+/Parent 14 0 R
+/Prev 15 0 R
+>>
+endobj
+14 0 obj
+<<
+/Title(Compatibility)
+/A<<
+/S/GoTo
+/D(subsection.1.2)
+>>
+/Parent 12 0 R
+/Prev 13 0 R
+/First 15 0 R
+/Last 16 0 R
+/Count -2
+/Next 17 0 R
+>>
+endobj
+17 0 obj
+<<
+/Title(Reporting bugs)
+/A<<
+/S/GoTo
+/D(subsection.1.3)
+>>
+/Parent 12 0 R
+/Prev 14 0 R
+/Next 18 0 R
+>>
+endobj
+18 0 obj
+<<
+/Title(History)
+/A<<
+/S/GoTo
+/D(subsection.1.4)
+>>
+/Parent 12 0 R
+/Prev 17 0 R
+/Next 19 0 R
+>>
+endobj
+19 0 obj
+<<
+/Title(License)
+/A<<
+/S/GoTo
+/D(subsection.1.5)
+>>
+/Parent 12 0 R
+/Prev 18 0 R
+>>
+endobj
+12 0 obj
+<<
+/Title(Introduction)
+/A<<
+/S/GoTo
+/D(section.1)
+>>
+/Parent 11 0 R
+/First 13 0 R
+/Last 19 0 R
+/Count -5
+/Next 20 0 R
+>>
+endobj
+21 0 obj
+<<
+/Title(Running a parser)
+/A<<
+/S/GoTo
+/D(subsection.2.1)
+>>
+/Parent 20 0 R
+/Next 22 0 R
+>>
+endobj
+22 0 obj
+<<
+/Title(Sequence and choice)
+/A<<
+/S/GoTo
+/D(subsection.2.2)
+>>
+/Parent 20 0 R
+/Prev 21 0 R
+/Next 23 0 R
+>>
+endobj
+23 0 obj
+<<
+/Title(Predictive parsers)
+/A<<
+/S/GoTo
+/D(subsection.2.3)
+>>
+/Parent 20 0 R
+/Prev 22 0 R
+/Next 24 0 R
+>>
+endobj
+24 0 obj
+<<
+/Title(Adding semantics)
+/A<<
+/S/GoTo
+/D(subsection.2.4)
+>>
+/Parent 20 0 R
+/Prev 23 0 R
+/Next 25 0 R
+>>
+endobj
+25 0 obj
+<<
+/Title(Sequences and seperators)
+/A<<
+/S/GoTo
+/D(subsection.2.5)
+>>
+/Parent 20 0 R
+/Prev 24 0 R
+/Next 26 0 R
+>>
+endobj
+26 0 obj
+<<
+/Title(Improving error messages)
+/A<<
+/S/GoTo
+/D(subsection.2.6)
+>>
+/Parent 20 0 R
+/Prev 25 0 R
+/Next 27 0 R
+>>
+endobj
+27 0 obj
+<<
+/Title(Expressions)
+/A<<
+/S/GoTo
+/D(subsection.2.7)
+>>
+/Parent 20 0 R
+/Prev 26 0 R
+/Next 28 0 R
+>>
+endobj
+28 0 obj
+<<
+/Title(Lexical analysis)
+/A<<
+/S/GoTo
+/D(subsection.2.8)
+>>
+/Parent 20 0 R
+/Prev 27 0 R
+/Next 29 0 R
+>>
+endobj
+30 0 obj
+<<
+/Title(Lexeme parsers \ No newline at end of file
diff --git a/testdata/text.txt b/testdata/text.txt
new file mode 100644
index 0000000..476f506
--- /dev/null
+++ b/testdata/text.txt
@@ -0,0 +1,2 @@
+Hello, World!
+