summaryrefslogtreecommitdiff
path: root/lib/git/diff.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2009-10-15 10:04:17 +0200
committerSebastian Thiel <byronimo@gmail.com>2009-10-15 10:04:17 +0200
commit6226720b0e6a5f7cb9223fc50363def487831315 (patch)
tree10f70f8e41c91f5bf57f04b616f3e5afdb9f8407 /lib/git/diff.py
parentb0e84a3401c84507dc017d6e4f57a9dfdb31de53 (diff)
parent4186a2dbbd48fd67ff88075c63bbd3e6c1d8a2df (diff)
downloadgitpython-6226720b0e6a5f7cb9223fc50363def487831315.tar.gz
Initial set of improvementes merged into master, including a class hierarchy redesign and performance improvements
Merge commit 'origin/improvements' * commit 'origin/improvements': (38 commits) test_performance: module containing benchmarks to get an idea of the achieved throughput Removed plenty of mocked tree tests as they cannot work anymore with persistent commands that require stdin AND binary data - not even an adapter would help here. These tests will have to be replaced. tree: now reads tress directly by parsing the binary data, allowing it to safe possibly hundreds of command calls Refs are now truly dynamic - this costs a little bit of (persistent command) work, but assures refs behave as expected persistent command signature changed to also return the hexsha from a possible input ref - the objects pointed to by refs are now baked on demand - perhaps it should change to always be re-retrieved using a property as it is relatively fast - this way refs can always be cached test_blob: removed many redundant tests that would fail now as the mock cannot handle the complexity of the command backend Implemented git command facility to keep persistent commands for fast object information retrieval test: Added time-consuming test which could also be a benchmark in fact - currently it cause hundreds of command invocations which is slow cmd: added option to return the process directly, allowing to read the output directly from the output stream added Iterable interface to Ref type renamed find_all to list_all, changed commit to use iterable interface in preparation for command changes Added base for all iteratable objects unified name of utils module, recently it was named util and utils in different packages tree: renamed content_from_string to _from_string to make it private. Removed tests that were testing that method tree: now behaves like a list with string indexing functionality - using a dict as cache is a problem as the tree is ordered, added blobs, trees and traverse method test_base: Improved basic object creation as well as set hash tests repo.active_branch now returns a Head object, not a string IndexObjects are now checking their slots to raise a proper error message in case someone tries to access an unset path or mode - this information cannot be retrieved afterwards as IndexObject information is kept in the object that pointed at them. To find this information, one would have to search all objects which is not feasible refs now take repo as first argument and derive from LazyMixin to allow them to dynamically retrieve their objects renamed from_string and list_from_string to _from_string and _list_from_string to indicate their new status as private method, adjusted all callers respectively ...
Diffstat (limited to 'lib/git/diff.py')
-rw-r--r--lib/git/diff.py169
1 files changed, 88 insertions, 81 deletions
diff --git a/lib/git/diff.py b/lib/git/diff.py
index 44f55602..0db83b4f 100644
--- a/lib/git/diff.py
+++ b/lib/git/diff.py
@@ -5,94 +5,101 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import re
-import blob
+import objects.blob as blob
class Diff(object):
- """
- A Diff contains diff information between two commits.
-
- It contains two sides a and b of the diff, members are prefixed with
- "a" and "b" respectively to inidcate that.
-
- Diffs keep information about the changed blob objects, the file mode, renames,
- deletions and new files.
-
- There are a few cases where None has to be expected as member variable value:
-
- ``New File``::
-
- a_mode is None
- a_blob is None
-
- ``Deleted File``::
-
- b_mode is None
- b_blob is NOne
- """
+ """
+ A Diff contains diff information between two commits.
+
+ It contains two sides a and b of the diff, members are prefixed with
+ "a" and "b" respectively to inidcate that.
+
+ Diffs keep information about the changed blob objects, the file mode, renames,
+ deletions and new files.
+
+ There are a few cases where None has to be expected as member variable value:
+
+ ``New File``::
+
+ a_mode is None
+ a_blob is None
+
+ ``Deleted File``::
+
+ b_mode is None
+ b_blob is NOne
+ """
+
+ # precompiled regex
+ re_header = re.compile(r"""
+ #^diff[ ]--git
+ [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
+ (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
+ ^rename[ ]from[ ](?P<rename_from>\S+)\n
+ ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
+ (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
+ ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
+ (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
+ (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
+ (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
+ \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
+ """, re.VERBOSE | re.MULTILINE)
+ re_is_null_hexsha = re.compile( r'^0{40}$' )
+ __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file",
+ "rename_from", "rename_to", "renamed", "diff")
- def __init__(self, repo, a_path, b_path, a_blob, b_blob, a_mode,
- b_mode, new_file, deleted_file, rename_from,
- rename_to, diff):
- self.repo = repo
+ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
+ b_mode, new_file, deleted_file, rename_from,
+ rename_to, diff):
+ if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id):
+ self.a_blob = None
+ else:
+ self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path)
+ if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id):
+ self.b_blob = None
+ else:
+ self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path)
- if not a_blob or re.search(r'^0{40}$', a_blob):
- self.a_blob = None
- else:
- self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, name=a_path)
- if not b_blob or re.search(r'^0{40}$', b_blob):
- self.b_blob = None
- else:
- self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, name=b_path)
+ self.a_mode = a_mode
+ self.b_mode = b_mode
+ if self.a_mode:
+ self.a_mode = blob.Blob._mode_str_to_int( self.a_mode )
+ if self.b_mode:
+ self.b_mode = blob.Blob._mode_str_to_int( self.b_mode )
+ self.new_file = new_file
+ self.deleted_file = deleted_file
+ self.rename_from = rename_from
+ self.rename_to = rename_to
+ self.renamed = rename_from != rename_to
+ self.diff = diff
- self.a_mode = a_mode
- self.b_mode = b_mode
- self.new_file = new_file
- self.deleted_file = deleted_file
- self.rename_from = rename_from
- self.rename_to = rename_to
- self.renamed = rename_from != rename_to
- self.diff = diff
+ @classmethod
+ def _list_from_string(cls, repo, text):
+ """
+ Create a new diff object from the given text
+ ``repo``
+ is the repository we are operating on - it is required
+
+ ``text``
+ result of 'git diff' between two commits or one commit and the index
+
+ Returns
+ git.Diff[]
+ """
+ diffs = []
- @classmethod
- def list_from_string(cls, repo, text):
- """
- Create a new diff object from the given text
- ``repo``
- is the repository we are operating on - it is required
-
- ``text``
- result of 'git diff' between two commits or one commit and the index
-
- Returns
- git.Diff[]
- """
- diffs = []
+ diff_header = cls.re_header.match
+ for diff in ('\n' + text).split('\ndiff --git')[1:]:
+ header = diff_header(diff)
- diff_header = re.compile(r"""
- #^diff[ ]--git
- [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
- (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
- ^rename[ ]from[ ](?P<rename_from>\S+)\n
- ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
- (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
- ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
- (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
- (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
- (?:^index[ ](?P<a_blob>[0-9A-Fa-f]+)
- \.\.(?P<b_blob>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
- """, re.VERBOSE | re.MULTILINE).match
+ a_path, b_path, similarity_index, rename_from, rename_to, \
+ old_mode, new_mode, new_file_mode, deleted_file_mode, \
+ a_blob_id, b_blob_id, b_mode = header.groups()
+ new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
- for diff in ('\n' + text).split('\ndiff --git')[1:]:
- header = diff_header(diff)
+ diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id,
+ old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,
+ new_file, deleted_file, rename_from, rename_to, diff[header.end():]))
- a_path, b_path, similarity_index, rename_from, rename_to, \
- old_mode, new_mode, new_file_mode, deleted_file_mode, \
- a_blob, b_blob, b_mode = header.groups()
- new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
-
- diffs.append(Diff(repo, a_path, b_path, a_blob, b_blob,
- old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,
- new_file, deleted_file, rename_from, rename_to, diff[header.end():]))
-
- return diffs
+ return diffs