summaryrefslogtreecommitdiff
path: root/lib/git/diff.py
blob: 6a6a097c7c0f345f3e219ad52a259eadeeced0fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# diff.py
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php

import re
import objects.blob as blob

	
class Diffable(object):
	"""
	Common interface for all object that can be diffed against another object of compatible type.
	
	NOTE: 
		Subclasses require a repo member as it is the case for Object instances, for practical 
		reasons we do not derive from Object.
	"""
	__slots__ = tuple()
	
	# subclasses provide additional arguments to the git-diff comamnd by supplynig 
	# them in this tuple
	_diff_args = tuple()
	
	def diff(self, other=None, paths=None, create_patch=False, **kwargs):
		"""
		Creates diffs between two items being trees, trees and index or an 
		index and the working tree.

		``other``
			Is the item to compare us with. 
			If None, we will be compared to the working tree.

		``paths``
			is a list of paths or a single path to limit the diff to.
			It will only include at least one of the givne path or paths.

		``create_patch``
			If True, the returned Diff contains a detailed patch that if applied
			makes the self to other. Patches are somwhat costly as blobs have to be read
			and diffed.

		``kwargs``
			Additional arguments passed to git-diff, such as 
			R=True to swap both sides of the diff.

		Returns
			git.DiffIndex
			
		Note
			Rename detection will only work if create_patch is True
		"""
		args = list(self._diff_args[:])
		args.append( "--abbrev=40" )		# we need full shas
		args.append( "--full-index" )		# get full index paths, not only filenames
		
		if create_patch:
			args.append("-p")
			args.append("-M") # check for renames
		else:
			args.append("--raw")
		
		if paths is not None and not isinstance(paths, (tuple,list)):
			paths = [ paths ]

		if other is not None:
			args.insert(0, other)
		
		args.insert(0,self)
		
		# paths is list here or None
		if paths:
			args.append("--")
			args.extend(paths)
		# END paths handling
		
		kwargs['as_process'] = True
		proc = self.repo.git.diff(*args, **kwargs)
		
		diff_method = Diff._index_from_raw_format
		if create_patch:
			diff_method = Diff._index_from_patch_format
		return diff_method(self.repo, proc.stdout)


class DiffIndex(list):
	"""
	Implements an Index for diffs, allowing a list of Diffs to be queried by 
	the diff properties.
	
	The class improves the diff handling convenience
	"""
	
	

class Diff(object):
	"""
	A Diff contains diff information between two Trees.
	
	It contains two sides a and b of the diff, members are prefixed with 
	"a" and "b" respectively to inidcate that.
	
	Diffs keep information about the changed blob objects, the file mode, renames, 
	deletions and new files.
	
	There are a few cases where None has to be expected as member variable value:
	
	``New File``::
	
		a_mode is None
		a_blob is None
		
	``Deleted File``::
	
		b_mode is None
		b_blob is None
	"""
	
	# precompiled regex
	re_header = re.compile(r"""
								#^diff[ ]--git
									[ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
								(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
								   ^rename[ ]from[ ](?P<rename_from>\S+)\n
								   ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
								(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
								   ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
								(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
								(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
								(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
									\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
							""", re.VERBOSE | re.MULTILINE)
	re_is_null_hexsha = re.compile( r'^0{40}$' )
	__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", 
				 "rename_from", "rename_to", "renamed", "diff")

	def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
				 b_mode, new_file, deleted_file, rename_from,
				 rename_to, diff):
		if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id):
			self.a_blob = None
		else:
			self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path)
		if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id):
			self.b_blob = None
		else:
			self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path)

		self.a_mode = a_mode
		self.b_mode = b_mode
		if self.a_mode:
			self.a_mode = blob.Blob._mode_str_to_int( self.a_mode )
		if self.b_mode:
			self.b_mode = blob.Blob._mode_str_to_int( self.b_mode )
		self.new_file = new_file
		self.deleted_file = deleted_file
		self.rename_from = rename_from
		self.rename_to = rename_to
		self.renamed = rename_from != rename_to
		self.diff = diff

	@classmethod
	def _index_from_patch_format(cls, repo, stream):
		"""
		Create a new DiffIndex from the given text which must be in patch format
		``repo``
			is the repository we are operating on - it is required 
		
		``stream``
			result of 'git diff' as a stream (supporting file protocol)
		
		Returns
			git.DiffIndex
		"""
		# for now, we have to bake the stream
		text = stream.read()
		index = DiffIndex()

		diff_header = cls.re_header.match
		for diff in ('\n' + text).split('\ndiff --git')[1:]:
			header = diff_header(diff)

			a_path, b_path, similarity_index, rename_from, rename_to, \
				old_mode, new_mode, new_file_mode, deleted_file_mode, \
				a_blob_id, b_blob_id, b_mode = header.groups()
			new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)

			index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id,
				old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,
				new_file, deleted_file, rename_from, rename_to, diff[header.end():]))

		return index
		
	@classmethod
	def _index_from_raw_format(cls, repo, stream):
		"""
		Create a new DiffIndex from the given stream which must be in raw format.
		
		NOTE: 
			This format is inherently incapable of detecting renames, hence we only 
			modify, delete and add files
		
		Returns
			git.DiffIndex
		"""
		# handles 
		# :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M	.gitignore
		index = DiffIndex()
		for line in stream:
			if not line.startswith(":"):
				continue
			# END its not a valid diff line
			old_mode, new_mode, a_blob_id, b_blob_id, modification_id, path = line[1:].split()
			a_path = path
			b_path = path
			deleted_file = False
			new_file = False
			if modification_id == 'D':
				b_path = None
				deleted_file = True
			elif modification_id == 'A':
				a_path = None
				new_file = True
			# END add/remove handling
			
			
			diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode,
						new_file, deleted_file, None, None, '')
			index.append(diff)
		# END for each line
		
		return index