diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2009-10-09 12:14:02 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2009-10-09 15:13:05 +0200 |
commit | 52ab307935bd2bbda52f853f9fc6b49f01897727 (patch) | |
tree | 8950b9658b4f0fba902e80077ba5ee88f50c4541 /lib/git/blob.py | |
parent | 07c20b4231b12fee42d15f1c44c948ce474f5851 (diff) | |
download | gitpython-52ab307935bd2bbda52f853f9fc6b49f01897727.tar.gz |
diff regex are now precompiled on class level, renamed a|b_blob to a|b_blob_id as it better reflects the actual value
actor regex now precompiled on class level
blob regex now precompiled on class level; made blame method more readable and faster although it can still be improved by making assumptions about the blame format and by reading the git command stream directly ( which is a general issue right now )
Diffstat (limited to 'lib/git/blob.py')
-rw-r--r-- | lib/git/blob.py | 111 |
1 files changed, 68 insertions, 43 deletions
diff --git a/lib/git/blob.py b/lib/git/blob.py index dac0888f..1e8aa12b 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -15,6 +15,12 @@ class Blob(object): """A Blob encapsulates a git blob object""" DEFAULT_MIME_TYPE = "text/plain" + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + def __init__(self, repo, id, mode=None, path=None): """ Create an unbaked Blob containing just the specified attributes @@ -112,49 +118,68 @@ class Blob(object): info = None for line in data.splitlines(): - parts = re.split(r'\s+', line, 1) - if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]): - if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line): - m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line) - id, origin_line, final_line, group_lines = m.groups() - info = {'id': id} - blames.append([None, []]) - elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line): - m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line) - id, origin_line, final_line = m.groups() - info = {'id': id} - elif re.search(r'^(author|committer)', parts[0]): - if re.search(r'^(.+)-mail$', parts[0]): - m = re.search(r'^(.+)-mail$', parts[0]) - info["%s_email" % m.groups()[0]] = parts[-1] - elif re.search(r'^(.+)-time$', parts[0]): - m = re.search(r'^(.+)-time$', parts[0]) - info["%s_date" % m.groups()[0]] = time.gmtime(int(parts[-1])) - elif re.search(r'^(author|committer)$', parts[0]): - m = re.search(r'^(author|committer)$', parts[0]) - info[m.groups()[0]] = parts[-1] - elif re.search(r'^filename', parts[0]): - info['filename'] = parts[-1] - elif re.search(r'^summary', parts[0]): - info['summary'] = parts[-1] - elif parts[0] == '': - if info: - c = commits.has_key(info['id']) and commits[info['id']] - if not c: - c = Commit(repo, id=info['id'], - author=Actor.from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[info['id']] = c - - m = re.search(r'^\t(.*)$', line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - + parts = cls.re_whitespace.split(line, 1) + firstpart = parts[0] + if cls.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = cls.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail <tom@mojombo.com> + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail <tom@mojombo.com> + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = time.gmtime(int(parts[-1])) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # <and rest> + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( repo, id=sha, + author=Actor.from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = cls.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information return blames def __repr__(self): |