summaryrefslogtreecommitdiff
path: root/lib/git
diff options
context:
space:
mode:
Diffstat (limited to 'lib/git')
-rw-r--r--lib/git/actor.py8
-rw-r--r--lib/git/blob.py111
-rw-r--r--lib/git/diff.py45
3 files changed, 98 insertions, 66 deletions
diff --git a/lib/git/actor.py b/lib/git/actor.py
index bc1a4479..28f50e73 100644
--- a/lib/git/actor.py
+++ b/lib/git/actor.py
@@ -10,6 +10,10 @@ class Actor(object):
"""Actors hold information about a person acting on the repository. They
can be committers and authors or anything with a name and an email as
mentioned in the git log entries."""
+ # precompiled regex
+ name_only_regex = re.compile( r'<.+>' )
+ name_email_regex = re.compile( r'(.*) <(.+?)>' )
+
def __init__(self, name, email):
self.name = name
self.email = email
@@ -34,8 +38,8 @@ class Actor(object):
Returns
Actor
"""
- if re.search(r'<.+>', string):
- m = re.search(r'(.*) <(.+?)>', string)
+ if cls.name_only_regex.search(string):
+ m = cls.name_email_regex.search(string)
name, email = m.groups()
return Actor(name, email)
else:
diff --git a/lib/git/blob.py b/lib/git/blob.py
index dac0888f..1e8aa12b 100644
--- a/lib/git/blob.py
+++ b/lib/git/blob.py
@@ -15,6 +15,12 @@ class Blob(object):
"""A Blob encapsulates a git blob object"""
DEFAULT_MIME_TYPE = "text/plain"
+ # precompiled regex
+ re_whitespace = re.compile(r'\s+')
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_author_committer_start = re.compile(r'^(author|committer)')
+ re_tab_full_line = re.compile(r'^\t(.*)$')
+
def __init__(self, repo, id, mode=None, path=None):
"""
Create an unbaked Blob containing just the specified attributes
@@ -112,49 +118,68 @@ class Blob(object):
info = None
for line in data.splitlines():
- parts = re.split(r'\s+', line, 1)
- if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]):
- if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line):
- m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line)
- id, origin_line, final_line, group_lines = m.groups()
- info = {'id': id}
- blames.append([None, []])
- elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line):
- m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line)
- id, origin_line, final_line = m.groups()
- info = {'id': id}
- elif re.search(r'^(author|committer)', parts[0]):
- if re.search(r'^(.+)-mail$', parts[0]):
- m = re.search(r'^(.+)-mail$', parts[0])
- info["%s_email" % m.groups()[0]] = parts[-1]
- elif re.search(r'^(.+)-time$', parts[0]):
- m = re.search(r'^(.+)-time$', parts[0])
- info["%s_date" % m.groups()[0]] = time.gmtime(int(parts[-1]))
- elif re.search(r'^(author|committer)$', parts[0]):
- m = re.search(r'^(author|committer)$', parts[0])
- info[m.groups()[0]] = parts[-1]
- elif re.search(r'^filename', parts[0]):
- info['filename'] = parts[-1]
- elif re.search(r'^summary', parts[0]):
- info['summary'] = parts[-1]
- elif parts[0] == '':
- if info:
- c = commits.has_key(info['id']) and commits[info['id']]
- if not c:
- c = Commit(repo, id=info['id'],
- author=Actor.from_string(info['author'] + ' ' + info['author_email']),
- authored_date=info['author_date'],
- committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']),
- committed_date=info['committer_date'],
- message=info['summary'])
- commits[info['id']] = c
-
- m = re.search(r'^\t(.*)$', line)
- text, = m.groups()
- blames[-1][0] = c
- blames[-1][1].append( text )
- info = None
-
+ parts = cls.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ if cls.re_hexsha_only.search(firstpart):
+ # handles
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
+ digits = parts[-1].split(" ")
+ if len(digits) == 3:
+ info = {'id': firstpart}
+ blames.append([None, []])
+ # END blame data initialization
+ else:
+ m = cls.re_author_committer_start.search(firstpart)
+ if m:
+ # handles:
+ # author Tom Preston-Werner
+ # author-mail <tom@mojombo.com>
+ # author-time 1192271832
+ # author-tz -0700
+ # committer Tom Preston-Werner
+ # committer-mail <tom@mojombo.com>
+ # committer-time 1192271832
+ # committer-tz -0700 - IGNORED BY US
+ role = m.group(0)
+ if firstpart.endswith('-mail'):
+ info["%s_email" % role] = parts[-1]
+ elif firstpart.endswith('-time'):
+ info["%s_date" % role] = time.gmtime(int(parts[-1]))
+ elif role == firstpart:
+ info[role] = parts[-1]
+ # END distinguish mail,time,name
+ else:
+ # handle
+ # filename lib/grit.rb
+ # summary add Blob
+ # <and rest>
+ if firstpart.startswith('filename'):
+ info['filename'] = parts[-1]
+ elif firstpart.startswith('summary'):
+ info['summary'] = parts[-1]
+ elif firstpart == '':
+ if info:
+ sha = info['id']
+ c = commits.get(sha)
+ if c is None:
+ c = Commit( repo, id=sha,
+ author=Actor.from_string(info['author'] + ' ' + info['author_email']),
+ authored_date=info['author_date'],
+ committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']),
+ committed_date=info['committer_date'],
+ message=info['summary'])
+ commits[sha] = c
+ # END if commit objects needs initial creation
+ m = cls.re_tab_full_line.search(line)
+ text, = m.groups()
+ blames[-1][0] = c
+ blames[-1][1].append( text )
+ info = None
+ # END if we collected commit info
+ # END distinguish filename,summary,rest
+ # END distinguish author|committer vs filename,summary,rest
+ # END distinguish hexsha vs other information
return blames
def __repr__(self):
diff --git a/lib/git/diff.py b/lib/git/diff.py
index db12f1e4..75450d70 100644
--- a/lib/git/diff.py
+++ b/lib/git/diff.py
@@ -29,20 +29,36 @@ class Diff(object):
b_mode is None
b_blob is NOne
"""
+
+ # precompiled regex
+ re_header = re.compile(r"""
+ #^diff[ ]--git
+ [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
+ (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
+ ^rename[ ]from[ ](?P<rename_from>\S+)\n
+ ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
+ (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
+ ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
+ (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
+ (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
+ (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
+ \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
+ """, re.VERBOSE | re.MULTILINE)
+ re_is_null_hexsha = re.compile( r'^0{40}$' )
- def __init__(self, repo, a_path, b_path, a_blob, b_blob, a_mode,
+ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
b_mode, new_file, deleted_file, rename_from,
rename_to, diff):
self.repo = repo
- if not a_blob or re.search(r'^0{40}$', a_blob):
+ if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id):
self.a_blob = None
else:
- self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, path=a_path)
- if not b_blob or re.search(r'^0{40}$', b_blob):
+ self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path)
+ if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id):
self.b_blob = None
else:
- self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, path=b_path)
+ self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path)
self.a_mode = a_mode
self.b_mode = b_mode
@@ -68,29 +84,16 @@ class Diff(object):
"""
diffs = []
- diff_header = re.compile(r"""
- #^diff[ ]--git
- [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
- (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
- ^rename[ ]from[ ](?P<rename_from>\S+)\n
- ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
- (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
- ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
- (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
- (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
- (?:^index[ ](?P<a_blob>[0-9A-Fa-f]+)
- \.\.(?P<b_blob>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
- """, re.VERBOSE | re.MULTILINE).match
-
+ diff_header = cls.re_header.match
for diff in ('\n' + text).split('\ndiff --git')[1:]:
header = diff_header(diff)
a_path, b_path, similarity_index, rename_from, rename_to, \
old_mode, new_mode, new_file_mode, deleted_file_mode, \
- a_blob, b_blob, b_mode = header.groups()
+ a_blob_id, b_blob_id, b_mode = header.groups()
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
- diffs.append(Diff(repo, a_path, b_path, a_blob, b_blob,
+ diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id,
old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,
new_file, deleted_file, rename_from, rename_to, diff[header.end():]))