diff options
| author | Sebastian Thiel <byronimo@gmail.com> | 2009-10-09 12:14:02 +0200 | 
|---|---|---|
| committer | Sebastian Thiel <byronimo@gmail.com> | 2009-10-09 15:13:05 +0200 | 
| commit | 52ab307935bd2bbda52f853f9fc6b49f01897727 (patch) | |
| tree | 8950b9658b4f0fba902e80077ba5ee88f50c4541 /lib/git | |
| parent | 07c20b4231b12fee42d15f1c44c948ce474f5851 (diff) | |
| download | gitpython-52ab307935bd2bbda52f853f9fc6b49f01897727.tar.gz | |
diff regex are now precompiled on class level, renamed a|b_blob to a|b_blob_id as it better reflects the actual value
actor regex now precompiled on class level
blob regex now precompiled on class level; made blame method more readable and faster although it can still be improved by making assumptions about the blame format and by reading the git command stream directly ( which is a general issue right now )
Diffstat (limited to 'lib/git')
| -rw-r--r-- | lib/git/actor.py | 8 | ||||
| -rw-r--r-- | lib/git/blob.py | 111 | ||||
| -rw-r--r-- | lib/git/diff.py | 45 | 
3 files changed, 98 insertions, 66 deletions
| diff --git a/lib/git/actor.py b/lib/git/actor.py index bc1a4479..28f50e73 100644 --- a/lib/git/actor.py +++ b/lib/git/actor.py @@ -10,6 +10,10 @@ class Actor(object):      """Actors hold information about a person acting on the repository. They       can be committers and authors or anything with a name and an email as       mentioned in the git log entries.""" +    # precompiled regex +    name_only_regex = re.compile( r'<.+>' ) +    name_email_regex = re.compile( r'(.*) <(.+?)>' )  +          def __init__(self, name, email):          self.name = name          self.email = email @@ -34,8 +38,8 @@ class Actor(object):          Returns              Actor          """ -        if re.search(r'<.+>', string): -            m = re.search(r'(.*) <(.+?)>', string) +        if cls.name_only_regex.search(string): +            m = cls.name_email_regex.search(string)              name, email = m.groups()              return Actor(name, email)          else: diff --git a/lib/git/blob.py b/lib/git/blob.py index dac0888f..1e8aa12b 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -15,6 +15,12 @@ class Blob(object):      """A Blob encapsulates a git blob object"""      DEFAULT_MIME_TYPE = "text/plain" +    # precompiled regex +    re_whitespace = re.compile(r'\s+') +    re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') +    re_author_committer_start = re.compile(r'^(author|committer)') +    re_tab_full_line = re.compile(r'^\t(.*)$') +          def __init__(self, repo, id, mode=None, path=None):          """          Create an unbaked Blob containing just the specified attributes @@ -112,49 +118,68 @@ class Blob(object):          info = None          for line in data.splitlines(): -            parts = re.split(r'\s+', line, 1) -            if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]): -                if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line): -                    m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line) -                    id, origin_line, final_line, group_lines = m.groups() -                    info = {'id': id} -                    blames.append([None, []]) -                elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line): -                    m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line) -                    id, origin_line, final_line = m.groups() -                    info = {'id': id} -            elif re.search(r'^(author|committer)', parts[0]): -                if re.search(r'^(.+)-mail$', parts[0]): -                    m = re.search(r'^(.+)-mail$', parts[0]) -                    info["%s_email" % m.groups()[0]] = parts[-1] -                elif re.search(r'^(.+)-time$', parts[0]): -                    m = re.search(r'^(.+)-time$', parts[0]) -                    info["%s_date" % m.groups()[0]] = time.gmtime(int(parts[-1])) -                elif re.search(r'^(author|committer)$', parts[0]): -                    m = re.search(r'^(author|committer)$', parts[0]) -                    info[m.groups()[0]] = parts[-1] -            elif re.search(r'^filename', parts[0]): -                info['filename'] = parts[-1] -            elif re.search(r'^summary', parts[0]): -                info['summary'] = parts[-1] -            elif parts[0] == '': -                if info: -                    c = commits.has_key(info['id']) and commits[info['id']] -                    if not c: -                        c = Commit(repo, id=info['id'], -                                         author=Actor.from_string(info['author'] + ' ' + info['author_email']), -                                         authored_date=info['author_date'], -                                         committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), -                                         committed_date=info['committer_date'], -                                         message=info['summary']) -                        commits[info['id']] = c - -                    m = re.search(r'^\t(.*)$', line) -                    text,  = m.groups() -                    blames[-1][0] = c -                    blames[-1][1].append( text ) -                    info = None - +            parts = cls.re_whitespace.split(line, 1) +            firstpart = parts[0] +            if cls.re_hexsha_only.search(firstpart): +                # handles  +                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7		- indicates blame-data start +                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 +                digits = parts[-1].split(" ") +                if len(digits) == 3: +					info = {'id': firstpart} +					blames.append([None, []]) +				# END blame data initialization +            else: +                m = cls.re_author_committer_start.search(firstpart) +                if m: +                    # handles:  +                    # author Tom Preston-Werner +                    # author-mail <tom@mojombo.com> +                    # author-time 1192271832 +                    # author-tz -0700 +                    # committer Tom Preston-Werner +                    # committer-mail <tom@mojombo.com> +                    # committer-time 1192271832 +                    # committer-tz -0700  - IGNORED BY US +                    role = m.group(0) +                    if firstpart.endswith('-mail'): +                        info["%s_email" % role] = parts[-1] +                    elif firstpart.endswith('-time'): +                        info["%s_date" % role] = time.gmtime(int(parts[-1])) +                    elif role == firstpart: +                        info[role] = parts[-1] +                    # END distinguish mail,time,name +                else: +                    # handle +                    # filename lib/grit.rb +                    # summary add Blob +                    # <and rest> +                    if firstpart.startswith('filename'): +                        info['filename'] = parts[-1] +                    elif firstpart.startswith('summary'): +                        info['summary'] = parts[-1] +                    elif firstpart == '': +                        if info: +                            sha = info['id'] +                            c = commits.get(sha) +                            if c is None: +                                c = Commit(  repo, id=sha, +                                             author=Actor.from_string(info['author'] + ' ' + info['author_email']), +                                             authored_date=info['author_date'], +                                             committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), +                                             committed_date=info['committer_date'], +                                             message=info['summary']) +                                commits[sha] = c +                            # END if commit objects needs initial creation +                            m = cls.re_tab_full_line.search(line) +                            text,  = m.groups() +                            blames[-1][0] = c +                            blames[-1][1].append( text ) +                            info = None +                        # END if we collected commit info +                    # END distinguish filename,summary,rest +                # END distinguish author|committer vs filename,summary,rest +            # END distinguish hexsha vs other information          return blames      def __repr__(self): diff --git a/lib/git/diff.py b/lib/git/diff.py index db12f1e4..75450d70 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -29,20 +29,36 @@ class Diff(object):          b_mode is None          b_blob is NOne      """ +     +    # precompiled regex +    re_header = re.compile(r""" +								#^diff[ ]--git +									[ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n +								(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n +								   ^rename[ ]from[ ](?P<rename_from>\S+)\n +								   ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? +								(?:^old[ ]mode[ ](?P<old_mode>\d+)\n +								   ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? +								(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? +								(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? +								(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) +									\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? +							""", re.VERBOSE | re.MULTILINE) +    re_is_null_hexsha = re.compile( r'^0{40}$' ) -    def __init__(self, repo, a_path, b_path, a_blob, b_blob, a_mode, +    def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,                   b_mode, new_file, deleted_file, rename_from,                   rename_to, diff):          self.repo = repo -        if not a_blob or re.search(r'^0{40}$', a_blob): +        if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id):              self.a_blob = None          else: -            self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, path=a_path) -        if not b_blob or re.search(r'^0{40}$', b_blob): +            self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path) +        if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id):              self.b_blob = None          else: -            self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, path=b_path) +            self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path)          self.a_mode = a_mode          self.b_mode = b_mode @@ -68,29 +84,16 @@ class Diff(object):          """          diffs = [] -        diff_header = re.compile(r""" -            #^diff[ ]--git -                [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n -            (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n -               ^rename[ ]from[ ](?P<rename_from>\S+)\n -               ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? -            (?:^old[ ]mode[ ](?P<old_mode>\d+)\n -               ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? -            (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? -            (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? -            (?:^index[ ](?P<a_blob>[0-9A-Fa-f]+) -                \.\.(?P<b_blob>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? -        """, re.VERBOSE | re.MULTILINE).match - +        diff_header = cls.re_header.match          for diff in ('\n' + text).split('\ndiff --git')[1:]:              header = diff_header(diff)              a_path, b_path, similarity_index, rename_from, rename_to, \                  old_mode, new_mode, new_file_mode, deleted_file_mode, \ -                a_blob, b_blob, b_mode = header.groups() +                a_blob_id, b_blob_id, b_mode = header.groups()              new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) -            diffs.append(Diff(repo, a_path, b_path, a_blob, b_blob, +            diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id,                  old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,                  new_file, deleted_file, rename_from, rename_to, diff[header.end():])) | 
