diff options
| -rw-r--r-- | Lib/difflib.py | 201 | 
1 files changed, 200 insertions, 1 deletions
diff --git a/Lib/difflib.py b/Lib/difflib.py index b09348f0c8..202b815feb 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -6,12 +6,18 @@ Module difflib -- helpers for computing deltas between objects.  Function get_close_matches(word, possibilities, n=3, cutoff=0.6):      Use SequenceMatcher to return list of the best "good enough" matches. +Function context_diff(a, b): +    For two lists of strings, return a delta in context diff format. +  Function ndiff(a, b):      Return a delta: the difference between `a` and `b` (lists of strings).  Function restore(delta, which):      Return one of the two sequences that generated an ndiff delta. +Function unified_diff(a, b): +    For two lists of strings, return a delta in unified diff format. +  Class SequenceMatcher:      A flexible class for comparing pairs of sequences of any type. @@ -20,7 +26,8 @@ Class Differ:  """  __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher', -           'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK'] +           'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff', +           'unified_diff']  class SequenceMatcher: @@ -532,6 +539,54 @@ class SequenceMatcher:                  answer.append( ('equal', ai, i, bj, j) )          return answer +    def get_grouped_opcodes(self, n=3): +        """ Isolate change clusters by eliminating ranges with no changes. + +        Return a generator of groups with upto n lines of context. +        Each group is in the same format as returned by get_opcodes(). + +        >>> from pprint import pprint +        >>> a = map(str, range(1,40)) +        >>> b = a[:] +        >>> b[8:8] = ['i']     # Make an insertion +        >>> b[20] += 'x'       # Make a replacement +        >>> b[23:28] = []      # Make a deletion +        >>> b[30] += 'y'       # Make another replacement +        >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes())) +        [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)], +         [('equal', 16, 19, 17, 20), +          ('replace', 19, 20, 20, 21), +          ('equal', 20, 22, 21, 23), +          ('delete', 22, 27, 23, 23), +          ('equal', 27, 30, 23, 26)], +         [('equal', 31, 34, 27, 30), +          ('replace', 34, 35, 30, 31), +          ('equal', 35, 38, 31, 34)]] +        """ + +        codes = self.get_opcodes() +        # Fixup leading and trailing groups if they show no changes. +        if codes[0][0] == 'equal': +            tag, i1, i2, j1, j2 = codes[0] +            codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2 +        if codes[-1][0] == 'equal': +            tag, i1, i2, j1, j2 = codes[-1] +            codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n) + +        nn = n + n +        group = [] +        for tag, i1, i2, j1, j2 in codes: +            # End the current group and start a new one whenever +            # there is a large range with no changes. +            if tag == 'equal' and i2-i1 > nn: +                group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n))) +                yield group +                group = [] +                i1, j1 = max(i1, i2-n), max(j1, j2-n) +            group.append((tag, i1, i2, j1 ,j2)) +        if group and not (len(group)==1 and group[0][0] == 'equal'): +            yield group +      def ratio(self):          """Return a measure of the sequences' similarity (float in [0,1]). @@ -1042,6 +1097,150 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"):  del re + +def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', +                 tofiledate='', n=3, lineterm='\n'): +    r""" +    Compare two sequences of lines; generate the delta as a unified diff. + +    Unified diffs are a compact way of showing line changes and a few +    lines of context.  The number of context lines is set by 'n' which +    defaults to three. + +    By default, the diff control lines (those with *** or ---) are +    created with a trailing newline.  This is helpful so that inputs +    created from file.readlines() result in diffs that are suitable for +    file.writelines() since both the inputs and outputs have trailing +    newlines. + +    For inputs that do not have trailing newlines, set the lineterm +    argument to "" so that the output will be uniformly newline free. + +    The unidiff format normally has a header for filenames and modification +    times.  Any or all of these may be specified using strings for +    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification +    times are normally expressed in the format returned by time.ctime(). + +    Example: + +    >>> for line in unified_diff('one two three four'.split(), +    ...             'zero one tree four'.split(), 'Original', 'Current', +    ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003', +    ...             lineterm=''): +    ...     print line +    --- Original Sat Jan 26 23:30:50 1991 +    +++ Current Fri Jun 06 10:20:52 2003 +    @@ -1,4 +1,4 @@ +    +zero +     one +    -two +    -three +    +tree +     four +    """ + +    started = False +    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): +        if not started: +            yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm) +            yield '+++ %s %s%s' % (tofile, tofiledate, lineterm) +            started = True +        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] +        yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm) +        for tag, i1, i2, j1, j2 in group: +            if tag == 'equal': +                for line in a[i1:i2]: +                    yield ' ' + line +                continue +            if tag == 'replace' or tag == 'delete': +                for line in a[i1:i2]: +                    yield '-' + line +            if tag == 'replace' or tag == 'insert': +                for line in b[j1:j2]: +                    yield '+' + line + +# See http://www.unix.org/single_unix_specification/ +def context_diff(a, b, fromfile='', tofile='', +                 fromfiledate='', tofiledate='', n=3, lineterm='\n'): +    r""" +    Compare two sequences of lines; generate the delta as a context diff. + +    Context diffs are a compact way of showing line changes and a few +    lines of context.  The number of context lines is set by 'n' which +    defaults to three. + +    By default, the diff control lines (those with *** or ---) are +    created with a trailing newline.  This is helpful so that inputs +    created from file.readlines() result in diffs that are suitable for +    file.writelines() since both the inputs and outputs have trailing +    newlines. + +    For inputs that do not have trailing newlines, set the lineterm +    argument to "" so that the output will be uniformly newline free. + +    The context diff format normally has a header for filenames and +    modification times.  Any or all of these may be specified using +    strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. +    The modification times are normally expressed in the format returned +    by time.ctime().  If not specified, the strings default to blanks. + +    Example: + +    >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1), +    ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current', +    ...       'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')), +    *** Original Sat Jan 26 23:30:50 1991 +    --- Current Fri Jun 06 10:22:46 2003 +    *************** +    *** 1,4 **** +      one +    ! two +    ! three +      four +    --- 1,4 ---- +    + zero +      one +    ! tree +      four +    """ + +    started = False +    prefixmap = dict(insert='+ ', delete='- ', replace='! ', equal='  ') +    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): +        if not started: +            yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm) +            yield '--- %s %s%s' % (tofile, tofiledate, lineterm) +            started = True +        yield '***************%s' % (lineterm,) +        if group[-1][2] - group[0][1] >= 2: +            yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm) +        else: +            yield '*** %d ****%s' % (group[-1][2], lineterm) +        empty = True +        for tag, i1, i2, j1, j2 in group: +            if tag == 'replace' or tag == 'delete': +                empty = False +                break +        if not empty: +            for tag, i1, i2, j1, j2 in group: +                if tag != 'insert': +                    for line in a[i1:i2]: +                        yield prefixmap[tag] + line +        if group[-1][4] - group[0][3] >= 2: +            yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm) +        else: +            yield '--- %d ----%s' % (group[-1][4], lineterm) +        empty = True +        for tag, i1, i2, j1, j2 in group: +            if tag == 'replace' or tag == 'insert': +                empty = False +                break +        if not empty: +            for tag, i1, i2, j1, j2 in group: +                if tag != 'delete': +                    for line in b[j1:j2]: +                        yield prefixmap[tag] + line +  def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):      r"""      Compare `a` and `b` (lists of strings); return a `Differ`-style delta.  | 
