##// END OF EJS Templates
py3: stop using util.iterfile()...
Gregory Szorc -
r49796:fd5b8e69 default
parent child Browse files
Show More
@@ -1,597 +1,597 b''
1 # common.py - common code for the convert extension
1 # common.py - common code for the convert extension
2 #
2 #
3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import base64
8 import base64
9 import datetime
9 import datetime
10 import errno
10 import errno
11 import os
11 import os
12 import pickle
12 import pickle
13 import re
13 import re
14 import shlex
14 import shlex
15 import subprocess
15 import subprocess
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.pycompat import open
18 from mercurial.pycompat import open
19 from mercurial import (
19 from mercurial import (
20 encoding,
20 encoding,
21 error,
21 error,
22 phases,
22 phases,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26 from mercurial.utils import procutil
26 from mercurial.utils import procutil
27
27
28 propertycache = util.propertycache
28 propertycache = util.propertycache
29
29
30
30
31 def _encodeornone(d):
31 def _encodeornone(d):
32 if d is None:
32 if d is None:
33 return
33 return
34 return d.encode('latin1')
34 return d.encode('latin1')
35
35
36
36
37 class _shlexpy3proxy(object):
37 class _shlexpy3proxy(object):
38 def __init__(self, l):
38 def __init__(self, l):
39 self._l = l
39 self._l = l
40
40
41 def __iter__(self):
41 def __iter__(self):
42 return (_encodeornone(v) for v in self._l)
42 return (_encodeornone(v) for v in self._l)
43
43
44 def get_token(self):
44 def get_token(self):
45 return _encodeornone(self._l.get_token())
45 return _encodeornone(self._l.get_token())
46
46
47 @property
47 @property
48 def infile(self):
48 def infile(self):
49 return self._l.infile or b'<unknown>'
49 return self._l.infile or b'<unknown>'
50
50
51 @property
51 @property
52 def lineno(self):
52 def lineno(self):
53 return self._l.lineno
53 return self._l.lineno
54
54
55
55
56 def shlexer(data=None, filepath=None, wordchars=None, whitespace=None):
56 def shlexer(data=None, filepath=None, wordchars=None, whitespace=None):
57 if data is None:
57 if data is None:
58 if pycompat.ispy3:
58 if pycompat.ispy3:
59 data = open(filepath, b'r', encoding='latin1')
59 data = open(filepath, b'r', encoding='latin1')
60 else:
60 else:
61 data = open(filepath, b'r')
61 data = open(filepath, b'r')
62 else:
62 else:
63 if filepath is not None:
63 if filepath is not None:
64 raise error.ProgrammingError(
64 raise error.ProgrammingError(
65 b'shlexer only accepts data or filepath, not both'
65 b'shlexer only accepts data or filepath, not both'
66 )
66 )
67 if pycompat.ispy3:
67 if pycompat.ispy3:
68 data = data.decode('latin1')
68 data = data.decode('latin1')
69 l = shlex.shlex(data, infile=filepath, posix=True)
69 l = shlex.shlex(data, infile=filepath, posix=True)
70 if whitespace is not None:
70 if whitespace is not None:
71 l.whitespace_split = True
71 l.whitespace_split = True
72 if pycompat.ispy3:
72 if pycompat.ispy3:
73 l.whitespace += whitespace.decode('latin1')
73 l.whitespace += whitespace.decode('latin1')
74 else:
74 else:
75 l.whitespace += whitespace
75 l.whitespace += whitespace
76 if wordchars is not None:
76 if wordchars is not None:
77 if pycompat.ispy3:
77 if pycompat.ispy3:
78 l.wordchars += wordchars.decode('latin1')
78 l.wordchars += wordchars.decode('latin1')
79 else:
79 else:
80 l.wordchars += wordchars
80 l.wordchars += wordchars
81 if pycompat.ispy3:
81 if pycompat.ispy3:
82 return _shlexpy3proxy(l)
82 return _shlexpy3proxy(l)
83 return l
83 return l
84
84
85
85
86 if pycompat.ispy3:
86 if pycompat.ispy3:
87 base64_encodebytes = base64.encodebytes
87 base64_encodebytes = base64.encodebytes
88 base64_decodebytes = base64.decodebytes
88 base64_decodebytes = base64.decodebytes
89 else:
89 else:
90 base64_encodebytes = base64.encodestring
90 base64_encodebytes = base64.encodestring
91 base64_decodebytes = base64.decodestring
91 base64_decodebytes = base64.decodestring
92
92
93
93
94 def encodeargs(args):
94 def encodeargs(args):
95 def encodearg(s):
95 def encodearg(s):
96 lines = base64_encodebytes(s)
96 lines = base64_encodebytes(s)
97 lines = [l.splitlines()[0] for l in pycompat.iterbytestr(lines)]
97 lines = [l.splitlines()[0] for l in pycompat.iterbytestr(lines)]
98 return b''.join(lines)
98 return b''.join(lines)
99
99
100 s = pickle.dumps(args)
100 s = pickle.dumps(args)
101 return encodearg(s)
101 return encodearg(s)
102
102
103
103
104 def decodeargs(s):
104 def decodeargs(s):
105 s = base64_decodebytes(s)
105 s = base64_decodebytes(s)
106 return pickle.loads(s)
106 return pickle.loads(s)
107
107
108
108
109 class MissingTool(Exception):
109 class MissingTool(Exception):
110 pass
110 pass
111
111
112
112
113 def checktool(exe, name=None, abort=True):
113 def checktool(exe, name=None, abort=True):
114 name = name or exe
114 name = name or exe
115 if not procutil.findexe(exe):
115 if not procutil.findexe(exe):
116 if abort:
116 if abort:
117 exc = error.Abort
117 exc = error.Abort
118 else:
118 else:
119 exc = MissingTool
119 exc = MissingTool
120 raise exc(_(b'cannot find required "%s" tool') % name)
120 raise exc(_(b'cannot find required "%s" tool') % name)
121
121
122
122
123 class NoRepo(Exception):
123 class NoRepo(Exception):
124 pass
124 pass
125
125
126
126
127 SKIPREV = b'SKIP'
127 SKIPREV = b'SKIP'
128
128
129
129
130 class commit(object):
130 class commit(object):
131 def __init__(
131 def __init__(
132 self,
132 self,
133 author,
133 author,
134 date,
134 date,
135 desc,
135 desc,
136 parents,
136 parents,
137 branch=None,
137 branch=None,
138 rev=None,
138 rev=None,
139 extra=None,
139 extra=None,
140 sortkey=None,
140 sortkey=None,
141 saverev=True,
141 saverev=True,
142 phase=phases.draft,
142 phase=phases.draft,
143 optparents=None,
143 optparents=None,
144 ctx=None,
144 ctx=None,
145 ):
145 ):
146 self.author = author or b'unknown'
146 self.author = author or b'unknown'
147 self.date = date or b'0 0'
147 self.date = date or b'0 0'
148 self.desc = desc
148 self.desc = desc
149 self.parents = parents # will be converted and used as parents
149 self.parents = parents # will be converted and used as parents
150 self.optparents = optparents or [] # will be used if already converted
150 self.optparents = optparents or [] # will be used if already converted
151 self.branch = branch
151 self.branch = branch
152 self.rev = rev
152 self.rev = rev
153 self.extra = extra or {}
153 self.extra = extra or {}
154 self.sortkey = sortkey
154 self.sortkey = sortkey
155 self.saverev = saverev
155 self.saverev = saverev
156 self.phase = phase
156 self.phase = phase
157 self.ctx = ctx # for hg to hg conversions
157 self.ctx = ctx # for hg to hg conversions
158
158
159
159
160 class converter_source(object):
160 class converter_source(object):
161 """Conversion source interface"""
161 """Conversion source interface"""
162
162
163 def __init__(self, ui, repotype, path=None, revs=None):
163 def __init__(self, ui, repotype, path=None, revs=None):
164 """Initialize conversion source (or raise NoRepo("message")
164 """Initialize conversion source (or raise NoRepo("message")
165 exception if path is not a valid repository)"""
165 exception if path is not a valid repository)"""
166 self.ui = ui
166 self.ui = ui
167 self.path = path
167 self.path = path
168 self.revs = revs
168 self.revs = revs
169 self.repotype = repotype
169 self.repotype = repotype
170
170
171 self.encoding = b'utf-8'
171 self.encoding = b'utf-8'
172
172
173 def checkhexformat(self, revstr, mapname=b'splicemap'):
173 def checkhexformat(self, revstr, mapname=b'splicemap'):
174 """fails if revstr is not a 40 byte hex. mercurial and git both uses
174 """fails if revstr is not a 40 byte hex. mercurial and git both uses
175 such format for their revision numbering
175 such format for their revision numbering
176 """
176 """
177 if not re.match(br'[0-9a-fA-F]{40,40}$', revstr):
177 if not re.match(br'[0-9a-fA-F]{40,40}$', revstr):
178 raise error.Abort(
178 raise error.Abort(
179 _(b'%s entry %s is not a valid revision identifier')
179 _(b'%s entry %s is not a valid revision identifier')
180 % (mapname, revstr)
180 % (mapname, revstr)
181 )
181 )
182
182
183 def before(self):
183 def before(self):
184 pass
184 pass
185
185
186 def after(self):
186 def after(self):
187 pass
187 pass
188
188
189 def targetfilebelongstosource(self, targetfilename):
189 def targetfilebelongstosource(self, targetfilename):
190 """Returns true if the given targetfile belongs to the source repo. This
190 """Returns true if the given targetfile belongs to the source repo. This
191 is useful when only a subdirectory of the target belongs to the source
191 is useful when only a subdirectory of the target belongs to the source
192 repo."""
192 repo."""
193 # For normal full repo converts, this is always True.
193 # For normal full repo converts, this is always True.
194 return True
194 return True
195
195
196 def setrevmap(self, revmap):
196 def setrevmap(self, revmap):
197 """set the map of already-converted revisions"""
197 """set the map of already-converted revisions"""
198
198
199 def getheads(self):
199 def getheads(self):
200 """Return a list of this repository's heads"""
200 """Return a list of this repository's heads"""
201 raise NotImplementedError
201 raise NotImplementedError
202
202
203 def getfile(self, name, rev):
203 def getfile(self, name, rev):
204 """Return a pair (data, mode) where data is the file content
204 """Return a pair (data, mode) where data is the file content
205 as a string and mode one of '', 'x' or 'l'. rev is the
205 as a string and mode one of '', 'x' or 'l'. rev is the
206 identifier returned by a previous call to getchanges().
206 identifier returned by a previous call to getchanges().
207 Data is None if file is missing/deleted in rev.
207 Data is None if file is missing/deleted in rev.
208 """
208 """
209 raise NotImplementedError
209 raise NotImplementedError
210
210
211 def getchanges(self, version, full):
211 def getchanges(self, version, full):
212 """Returns a tuple of (files, copies, cleanp2).
212 """Returns a tuple of (files, copies, cleanp2).
213
213
214 files is a sorted list of (filename, id) tuples for all files
214 files is a sorted list of (filename, id) tuples for all files
215 changed between version and its first parent returned by
215 changed between version and its first parent returned by
216 getcommit(). If full, all files in that revision is returned.
216 getcommit(). If full, all files in that revision is returned.
217 id is the source revision id of the file.
217 id is the source revision id of the file.
218
218
219 copies is a dictionary of dest: source
219 copies is a dictionary of dest: source
220
220
221 cleanp2 is the set of files filenames that are clean against p2.
221 cleanp2 is the set of files filenames that are clean against p2.
222 (Files that are clean against p1 are already not in files (unless
222 (Files that are clean against p1 are already not in files (unless
223 full). This makes it possible to handle p2 clean files similarly.)
223 full). This makes it possible to handle p2 clean files similarly.)
224 """
224 """
225 raise NotImplementedError
225 raise NotImplementedError
226
226
227 def getcommit(self, version):
227 def getcommit(self, version):
228 """Return the commit object for version"""
228 """Return the commit object for version"""
229 raise NotImplementedError
229 raise NotImplementedError
230
230
231 def numcommits(self):
231 def numcommits(self):
232 """Return the number of commits in this source.
232 """Return the number of commits in this source.
233
233
234 If unknown, return None.
234 If unknown, return None.
235 """
235 """
236 return None
236 return None
237
237
238 def gettags(self):
238 def gettags(self):
239 """Return the tags as a dictionary of name: revision
239 """Return the tags as a dictionary of name: revision
240
240
241 Tag names must be UTF-8 strings.
241 Tag names must be UTF-8 strings.
242 """
242 """
243 raise NotImplementedError
243 raise NotImplementedError
244
244
245 def recode(self, s, encoding=None):
245 def recode(self, s, encoding=None):
246 if not encoding:
246 if not encoding:
247 encoding = self.encoding or b'utf-8'
247 encoding = self.encoding or b'utf-8'
248
248
249 if isinstance(s, str):
249 if isinstance(s, str):
250 return s.encode("utf-8")
250 return s.encode("utf-8")
251 try:
251 try:
252 return s.decode(pycompat.sysstr(encoding)).encode("utf-8")
252 return s.decode(pycompat.sysstr(encoding)).encode("utf-8")
253 except UnicodeError:
253 except UnicodeError:
254 try:
254 try:
255 return s.decode("latin-1").encode("utf-8")
255 return s.decode("latin-1").encode("utf-8")
256 except UnicodeError:
256 except UnicodeError:
257 return s.decode(pycompat.sysstr(encoding), "replace").encode(
257 return s.decode(pycompat.sysstr(encoding), "replace").encode(
258 "utf-8"
258 "utf-8"
259 )
259 )
260
260
261 def getchangedfiles(self, rev, i):
261 def getchangedfiles(self, rev, i):
262 """Return the files changed by rev compared to parent[i].
262 """Return the files changed by rev compared to parent[i].
263
263
264 i is an index selecting one of the parents of rev. The return
264 i is an index selecting one of the parents of rev. The return
265 value should be the list of files that are different in rev and
265 value should be the list of files that are different in rev and
266 this parent.
266 this parent.
267
267
268 If rev has no parents, i is None.
268 If rev has no parents, i is None.
269
269
270 This function is only needed to support --filemap
270 This function is only needed to support --filemap
271 """
271 """
272 raise NotImplementedError
272 raise NotImplementedError
273
273
274 def converted(self, rev, sinkrev):
274 def converted(self, rev, sinkrev):
275 '''Notify the source that a revision has been converted.'''
275 '''Notify the source that a revision has been converted.'''
276
276
277 def hasnativeorder(self):
277 def hasnativeorder(self):
278 """Return true if this source has a meaningful, native revision
278 """Return true if this source has a meaningful, native revision
279 order. For instance, Mercurial revisions are store sequentially
279 order. For instance, Mercurial revisions are store sequentially
280 while there is no such global ordering with Darcs.
280 while there is no such global ordering with Darcs.
281 """
281 """
282 return False
282 return False
283
283
284 def hasnativeclose(self):
284 def hasnativeclose(self):
285 """Return true if this source has ability to close branch."""
285 """Return true if this source has ability to close branch."""
286 return False
286 return False
287
287
288 def lookuprev(self, rev):
288 def lookuprev(self, rev):
289 """If rev is a meaningful revision reference in source, return
289 """If rev is a meaningful revision reference in source, return
290 the referenced identifier in the same format used by getcommit().
290 the referenced identifier in the same format used by getcommit().
291 return None otherwise.
291 return None otherwise.
292 """
292 """
293 return None
293 return None
294
294
295 def getbookmarks(self):
295 def getbookmarks(self):
296 """Return the bookmarks as a dictionary of name: revision
296 """Return the bookmarks as a dictionary of name: revision
297
297
298 Bookmark names are to be UTF-8 strings.
298 Bookmark names are to be UTF-8 strings.
299 """
299 """
300 return {}
300 return {}
301
301
302 def checkrevformat(self, revstr, mapname=b'splicemap'):
302 def checkrevformat(self, revstr, mapname=b'splicemap'):
303 """revstr is a string that describes a revision in the given
303 """revstr is a string that describes a revision in the given
304 source control system. Return true if revstr has correct
304 source control system. Return true if revstr has correct
305 format.
305 format.
306 """
306 """
307 return True
307 return True
308
308
309
309
310 class converter_sink(object):
310 class converter_sink(object):
311 """Conversion sink (target) interface"""
311 """Conversion sink (target) interface"""
312
312
313 def __init__(self, ui, repotype, path):
313 def __init__(self, ui, repotype, path):
314 """Initialize conversion sink (or raise NoRepo("message")
314 """Initialize conversion sink (or raise NoRepo("message")
315 exception if path is not a valid repository)
315 exception if path is not a valid repository)
316
316
317 created is a list of paths to remove if a fatal error occurs
317 created is a list of paths to remove if a fatal error occurs
318 later"""
318 later"""
319 self.ui = ui
319 self.ui = ui
320 self.path = path
320 self.path = path
321 self.created = []
321 self.created = []
322 self.repotype = repotype
322 self.repotype = repotype
323
323
324 def revmapfile(self):
324 def revmapfile(self):
325 """Path to a file that will contain lines
325 """Path to a file that will contain lines
326 source_rev_id sink_rev_id
326 source_rev_id sink_rev_id
327 mapping equivalent revision identifiers for each system."""
327 mapping equivalent revision identifiers for each system."""
328 raise NotImplementedError
328 raise NotImplementedError
329
329
330 def authorfile(self):
330 def authorfile(self):
331 """Path to a file that will contain lines
331 """Path to a file that will contain lines
332 srcauthor=dstauthor
332 srcauthor=dstauthor
333 mapping equivalent authors identifiers for each system."""
333 mapping equivalent authors identifiers for each system."""
334 return None
334 return None
335
335
336 def putcommit(
336 def putcommit(
337 self, files, copies, parents, commit, source, revmap, full, cleanp2
337 self, files, copies, parents, commit, source, revmap, full, cleanp2
338 ):
338 ):
339 """Create a revision with all changed files listed in 'files'
339 """Create a revision with all changed files listed in 'files'
340 and having listed parents. 'commit' is a commit object
340 and having listed parents. 'commit' is a commit object
341 containing at a minimum the author, date, and message for this
341 containing at a minimum the author, date, and message for this
342 changeset. 'files' is a list of (path, version) tuples,
342 changeset. 'files' is a list of (path, version) tuples,
343 'copies' is a dictionary mapping destinations to sources,
343 'copies' is a dictionary mapping destinations to sources,
344 'source' is the source repository, and 'revmap' is a mapfile
344 'source' is the source repository, and 'revmap' is a mapfile
345 of source revisions to converted revisions. Only getfile() and
345 of source revisions to converted revisions. Only getfile() and
346 lookuprev() should be called on 'source'. 'full' means that 'files'
346 lookuprev() should be called on 'source'. 'full' means that 'files'
347 is complete and all other files should be removed.
347 is complete and all other files should be removed.
348 'cleanp2' is a set of the filenames that are unchanged from p2
348 'cleanp2' is a set of the filenames that are unchanged from p2
349 (only in the common merge case where there two parents).
349 (only in the common merge case where there two parents).
350
350
351 Note that the sink repository is not told to update itself to
351 Note that the sink repository is not told to update itself to
352 a particular revision (or even what that revision would be)
352 a particular revision (or even what that revision would be)
353 before it receives the file data.
353 before it receives the file data.
354 """
354 """
355 raise NotImplementedError
355 raise NotImplementedError
356
356
357 def puttags(self, tags):
357 def puttags(self, tags):
358 """Put tags into sink.
358 """Put tags into sink.
359
359
360 tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string.
360 tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string.
361 Return a pair (tag_revision, tag_parent_revision), or (None, None)
361 Return a pair (tag_revision, tag_parent_revision), or (None, None)
362 if nothing was changed.
362 if nothing was changed.
363 """
363 """
364 raise NotImplementedError
364 raise NotImplementedError
365
365
366 def setbranch(self, branch, pbranches):
366 def setbranch(self, branch, pbranches):
367 """Set the current branch name. Called before the first putcommit
367 """Set the current branch name. Called before the first putcommit
368 on the branch.
368 on the branch.
369 branch: branch name for subsequent commits
369 branch: branch name for subsequent commits
370 pbranches: (converted parent revision, parent branch) tuples"""
370 pbranches: (converted parent revision, parent branch) tuples"""
371
371
372 def setfilemapmode(self, active):
372 def setfilemapmode(self, active):
373 """Tell the destination that we're using a filemap
373 """Tell the destination that we're using a filemap
374
374
375 Some converter_sources (svn in particular) can claim that a file
375 Some converter_sources (svn in particular) can claim that a file
376 was changed in a revision, even if there was no change. This method
376 was changed in a revision, even if there was no change. This method
377 tells the destination that we're using a filemap and that it should
377 tells the destination that we're using a filemap and that it should
378 filter empty revisions.
378 filter empty revisions.
379 """
379 """
380
380
381 def before(self):
381 def before(self):
382 pass
382 pass
383
383
384 def after(self):
384 def after(self):
385 pass
385 pass
386
386
387 def putbookmarks(self, bookmarks):
387 def putbookmarks(self, bookmarks):
388 """Put bookmarks into sink.
388 """Put bookmarks into sink.
389
389
390 bookmarks: {bookmarkname: sink_rev_id, ...}
390 bookmarks: {bookmarkname: sink_rev_id, ...}
391 where bookmarkname is an UTF-8 string.
391 where bookmarkname is an UTF-8 string.
392 """
392 """
393
393
394 def hascommitfrommap(self, rev):
394 def hascommitfrommap(self, rev):
395 """Return False if a rev mentioned in a filemap is known to not be
395 """Return False if a rev mentioned in a filemap is known to not be
396 present."""
396 present."""
397 raise NotImplementedError
397 raise NotImplementedError
398
398
399 def hascommitforsplicemap(self, rev):
399 def hascommitforsplicemap(self, rev):
400 """This method is for the special needs for splicemap handling and not
400 """This method is for the special needs for splicemap handling and not
401 for general use. Returns True if the sink contains rev, aborts on some
401 for general use. Returns True if the sink contains rev, aborts on some
402 special cases."""
402 special cases."""
403 raise NotImplementedError
403 raise NotImplementedError
404
404
405
405
406 class commandline(object):
406 class commandline(object):
407 def __init__(self, ui, command):
407 def __init__(self, ui, command):
408 self.ui = ui
408 self.ui = ui
409 self.command = command
409 self.command = command
410
410
411 def prerun(self):
411 def prerun(self):
412 pass
412 pass
413
413
414 def postrun(self):
414 def postrun(self):
415 pass
415 pass
416
416
417 def _cmdline(self, cmd, *args, **kwargs):
417 def _cmdline(self, cmd, *args, **kwargs):
418 kwargs = pycompat.byteskwargs(kwargs)
418 kwargs = pycompat.byteskwargs(kwargs)
419 cmdline = [self.command, cmd] + list(args)
419 cmdline = [self.command, cmd] + list(args)
420 for k, v in kwargs.items():
420 for k, v in kwargs.items():
421 if len(k) == 1:
421 if len(k) == 1:
422 cmdline.append(b'-' + k)
422 cmdline.append(b'-' + k)
423 else:
423 else:
424 cmdline.append(b'--' + k.replace(b'_', b'-'))
424 cmdline.append(b'--' + k.replace(b'_', b'-'))
425 try:
425 try:
426 if len(k) == 1:
426 if len(k) == 1:
427 cmdline.append(b'' + v)
427 cmdline.append(b'' + v)
428 else:
428 else:
429 cmdline[-1] += b'=' + v
429 cmdline[-1] += b'=' + v
430 except TypeError:
430 except TypeError:
431 pass
431 pass
432 cmdline = [procutil.shellquote(arg) for arg in cmdline]
432 cmdline = [procutil.shellquote(arg) for arg in cmdline]
433 if not self.ui.debugflag:
433 if not self.ui.debugflag:
434 cmdline += [b'2>', pycompat.bytestr(os.devnull)]
434 cmdline += [b'2>', pycompat.bytestr(os.devnull)]
435 cmdline = b' '.join(cmdline)
435 cmdline = b' '.join(cmdline)
436 return cmdline
436 return cmdline
437
437
438 def _run(self, cmd, *args, **kwargs):
438 def _run(self, cmd, *args, **kwargs):
439 def popen(cmdline):
439 def popen(cmdline):
440 p = subprocess.Popen(
440 p = subprocess.Popen(
441 procutil.tonativestr(cmdline),
441 procutil.tonativestr(cmdline),
442 shell=True,
442 shell=True,
443 bufsize=-1,
443 bufsize=-1,
444 close_fds=procutil.closefds,
444 close_fds=procutil.closefds,
445 stdout=subprocess.PIPE,
445 stdout=subprocess.PIPE,
446 )
446 )
447 return p
447 return p
448
448
449 return self._dorun(popen, cmd, *args, **kwargs)
449 return self._dorun(popen, cmd, *args, **kwargs)
450
450
451 def _run2(self, cmd, *args, **kwargs):
451 def _run2(self, cmd, *args, **kwargs):
452 return self._dorun(procutil.popen2, cmd, *args, **kwargs)
452 return self._dorun(procutil.popen2, cmd, *args, **kwargs)
453
453
454 def _run3(self, cmd, *args, **kwargs):
454 def _run3(self, cmd, *args, **kwargs):
455 return self._dorun(procutil.popen3, cmd, *args, **kwargs)
455 return self._dorun(procutil.popen3, cmd, *args, **kwargs)
456
456
457 def _dorun(self, openfunc, cmd, *args, **kwargs):
457 def _dorun(self, openfunc, cmd, *args, **kwargs):
458 cmdline = self._cmdline(cmd, *args, **kwargs)
458 cmdline = self._cmdline(cmd, *args, **kwargs)
459 self.ui.debug(b'running: %s\n' % (cmdline,))
459 self.ui.debug(b'running: %s\n' % (cmdline,))
460 self.prerun()
460 self.prerun()
461 try:
461 try:
462 return openfunc(cmdline)
462 return openfunc(cmdline)
463 finally:
463 finally:
464 self.postrun()
464 self.postrun()
465
465
466 def run(self, cmd, *args, **kwargs):
466 def run(self, cmd, *args, **kwargs):
467 p = self._run(cmd, *args, **kwargs)
467 p = self._run(cmd, *args, **kwargs)
468 output = p.communicate()[0]
468 output = p.communicate()[0]
469 self.ui.debug(output)
469 self.ui.debug(output)
470 return output, p.returncode
470 return output, p.returncode
471
471
472 def runlines(self, cmd, *args, **kwargs):
472 def runlines(self, cmd, *args, **kwargs):
473 p = self._run(cmd, *args, **kwargs)
473 p = self._run(cmd, *args, **kwargs)
474 output = p.stdout.readlines()
474 output = p.stdout.readlines()
475 p.wait()
475 p.wait()
476 self.ui.debug(b''.join(output))
476 self.ui.debug(b''.join(output))
477 return output, p.returncode
477 return output, p.returncode
478
478
479 def checkexit(self, status, output=b''):
479 def checkexit(self, status, output=b''):
480 if status:
480 if status:
481 if output:
481 if output:
482 self.ui.warn(_(b'%s error:\n') % self.command)
482 self.ui.warn(_(b'%s error:\n') % self.command)
483 self.ui.warn(output)
483 self.ui.warn(output)
484 msg = procutil.explainexit(status)
484 msg = procutil.explainexit(status)
485 raise error.Abort(b'%s %s' % (self.command, msg))
485 raise error.Abort(b'%s %s' % (self.command, msg))
486
486
487 def run0(self, cmd, *args, **kwargs):
487 def run0(self, cmd, *args, **kwargs):
488 output, status = self.run(cmd, *args, **kwargs)
488 output, status = self.run(cmd, *args, **kwargs)
489 self.checkexit(status, output)
489 self.checkexit(status, output)
490 return output
490 return output
491
491
492 def runlines0(self, cmd, *args, **kwargs):
492 def runlines0(self, cmd, *args, **kwargs):
493 output, status = self.runlines(cmd, *args, **kwargs)
493 output, status = self.runlines(cmd, *args, **kwargs)
494 self.checkexit(status, b''.join(output))
494 self.checkexit(status, b''.join(output))
495 return output
495 return output
496
496
497 @propertycache
497 @propertycache
498 def argmax(self):
498 def argmax(self):
499 # POSIX requires at least 4096 bytes for ARG_MAX
499 # POSIX requires at least 4096 bytes for ARG_MAX
500 argmax = 4096
500 argmax = 4096
501 try:
501 try:
502 argmax = os.sysconf("SC_ARG_MAX")
502 argmax = os.sysconf("SC_ARG_MAX")
503 except (AttributeError, ValueError):
503 except (AttributeError, ValueError):
504 pass
504 pass
505
505
506 # Windows shells impose their own limits on command line length,
506 # Windows shells impose their own limits on command line length,
507 # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
507 # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
508 # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
508 # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
509 # details about cmd.exe limitations.
509 # details about cmd.exe limitations.
510
510
511 # Since ARG_MAX is for command line _and_ environment, lower our limit
511 # Since ARG_MAX is for command line _and_ environment, lower our limit
512 # (and make happy Windows shells while doing this).
512 # (and make happy Windows shells while doing this).
513 return argmax // 2 - 1
513 return argmax // 2 - 1
514
514
515 def _limit_arglist(self, arglist, cmd, *args, **kwargs):
515 def _limit_arglist(self, arglist, cmd, *args, **kwargs):
516 cmdlen = len(self._cmdline(cmd, *args, **kwargs))
516 cmdlen = len(self._cmdline(cmd, *args, **kwargs))
517 limit = self.argmax - cmdlen
517 limit = self.argmax - cmdlen
518 numbytes = 0
518 numbytes = 0
519 fl = []
519 fl = []
520 for fn in arglist:
520 for fn in arglist:
521 b = len(fn) + 3
521 b = len(fn) + 3
522 if numbytes + b < limit or len(fl) == 0:
522 if numbytes + b < limit or len(fl) == 0:
523 fl.append(fn)
523 fl.append(fn)
524 numbytes += b
524 numbytes += b
525 else:
525 else:
526 yield fl
526 yield fl
527 fl = [fn]
527 fl = [fn]
528 numbytes = b
528 numbytes = b
529 if fl:
529 if fl:
530 yield fl
530 yield fl
531
531
532 def xargs(self, arglist, cmd, *args, **kwargs):
532 def xargs(self, arglist, cmd, *args, **kwargs):
533 for l in self._limit_arglist(arglist, cmd, *args, **kwargs):
533 for l in self._limit_arglist(arglist, cmd, *args, **kwargs):
534 self.run0(cmd, *(list(args) + l), **kwargs)
534 self.run0(cmd, *(list(args) + l), **kwargs)
535
535
536
536
537 class mapfile(dict):
537 class mapfile(dict):
538 def __init__(self, ui, path):
538 def __init__(self, ui, path):
539 super(mapfile, self).__init__()
539 super(mapfile, self).__init__()
540 self.ui = ui
540 self.ui = ui
541 self.path = path
541 self.path = path
542 self.fp = None
542 self.fp = None
543 self.order = []
543 self.order = []
544 self._read()
544 self._read()
545
545
546 def _read(self):
546 def _read(self):
547 if not self.path:
547 if not self.path:
548 return
548 return
549 try:
549 try:
550 fp = open(self.path, b'rb')
550 fp = open(self.path, b'rb')
551 except IOError as err:
551 except IOError as err:
552 if err.errno != errno.ENOENT:
552 if err.errno != errno.ENOENT:
553 raise
553 raise
554 return
554 return
555 for i, line in enumerate(util.iterfile(fp)):
555 for i, line in enumerate(fp):
556 line = line.splitlines()[0].rstrip()
556 line = line.splitlines()[0].rstrip()
557 if not line:
557 if not line:
558 # Ignore blank lines
558 # Ignore blank lines
559 continue
559 continue
560 try:
560 try:
561 key, value = line.rsplit(b' ', 1)
561 key, value = line.rsplit(b' ', 1)
562 except ValueError:
562 except ValueError:
563 raise error.Abort(
563 raise error.Abort(
564 _(b'syntax error in %s(%d): key/value pair expected')
564 _(b'syntax error in %s(%d): key/value pair expected')
565 % (self.path, i + 1)
565 % (self.path, i + 1)
566 )
566 )
567 if key not in self:
567 if key not in self:
568 self.order.append(key)
568 self.order.append(key)
569 super(mapfile, self).__setitem__(key, value)
569 super(mapfile, self).__setitem__(key, value)
570 fp.close()
570 fp.close()
571
571
572 def __setitem__(self, key, value):
572 def __setitem__(self, key, value):
573 if self.fp is None:
573 if self.fp is None:
574 try:
574 try:
575 self.fp = open(self.path, b'ab')
575 self.fp = open(self.path, b'ab')
576 except IOError as err:
576 except IOError as err:
577 raise error.Abort(
577 raise error.Abort(
578 _(b'could not open map file %r: %s')
578 _(b'could not open map file %r: %s')
579 % (self.path, encoding.strtolocal(err.strerror))
579 % (self.path, encoding.strtolocal(err.strerror))
580 )
580 )
581 self.fp.write(util.tonativeeol(b'%s %s\n' % (key, value)))
581 self.fp.write(util.tonativeeol(b'%s %s\n' % (key, value)))
582 self.fp.flush()
582 self.fp.flush()
583 super(mapfile, self).__setitem__(key, value)
583 super(mapfile, self).__setitem__(key, value)
584
584
585 def close(self):
585 def close(self):
586 if self.fp:
586 if self.fp:
587 self.fp.close()
587 self.fp.close()
588 self.fp = None
588 self.fp = None
589
589
590
590
591 def makedatetimestamp(t):
591 def makedatetimestamp(t):
592 """Like dateutil.makedate() but for time t instead of current time"""
592 """Like dateutil.makedate() but for time t instead of current time"""
593 delta = datetime.datetime.utcfromtimestamp(
593 delta = datetime.datetime.utcfromtimestamp(
594 t
594 t
595 ) - datetime.datetime.fromtimestamp(t)
595 ) - datetime.datetime.fromtimestamp(t)
596 tz = delta.days * 86400 + delta.seconds
596 tz = delta.days * 86400 + delta.seconds
597 return t, tz
597 return t, tz
@@ -1,667 +1,667 b''
1 # convcmd - convert extension commands definition
1 # convcmd - convert extension commands definition
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import os
9 import os
10 import shutil
10 import shutil
11
11
12 from mercurial.i18n import _
12 from mercurial.i18n import _
13 from mercurial.pycompat import open
13 from mercurial.pycompat import open
14 from mercurial import (
14 from mercurial import (
15 encoding,
15 encoding,
16 error,
16 error,
17 hg,
17 hg,
18 pycompat,
18 pycompat,
19 scmutil,
19 scmutil,
20 util,
20 util,
21 )
21 )
22 from mercurial.utils import dateutil
22 from mercurial.utils import dateutil
23
23
24 from . import (
24 from . import (
25 bzr,
25 bzr,
26 common,
26 common,
27 cvs,
27 cvs,
28 darcs,
28 darcs,
29 filemap,
29 filemap,
30 git,
30 git,
31 gnuarch,
31 gnuarch,
32 hg as hgconvert,
32 hg as hgconvert,
33 monotone,
33 monotone,
34 p4,
34 p4,
35 subversion,
35 subversion,
36 )
36 )
37
37
38 mapfile = common.mapfile
38 mapfile = common.mapfile
39 MissingTool = common.MissingTool
39 MissingTool = common.MissingTool
40 NoRepo = common.NoRepo
40 NoRepo = common.NoRepo
41 SKIPREV = common.SKIPREV
41 SKIPREV = common.SKIPREV
42
42
43 bzr_source = bzr.bzr_source
43 bzr_source = bzr.bzr_source
44 convert_cvs = cvs.convert_cvs
44 convert_cvs = cvs.convert_cvs
45 convert_git = git.convert_git
45 convert_git = git.convert_git
46 darcs_source = darcs.darcs_source
46 darcs_source = darcs.darcs_source
47 gnuarch_source = gnuarch.gnuarch_source
47 gnuarch_source = gnuarch.gnuarch_source
48 mercurial_sink = hgconvert.mercurial_sink
48 mercurial_sink = hgconvert.mercurial_sink
49 mercurial_source = hgconvert.mercurial_source
49 mercurial_source = hgconvert.mercurial_source
50 monotone_source = monotone.monotone_source
50 monotone_source = monotone.monotone_source
51 p4_source = p4.p4_source
51 p4_source = p4.p4_source
52 svn_sink = subversion.svn_sink
52 svn_sink = subversion.svn_sink
53 svn_source = subversion.svn_source
53 svn_source = subversion.svn_source
54
54
55 orig_encoding = b'ascii'
55 orig_encoding = b'ascii'
56
56
57
57
58 def readauthormap(ui, authorfile, authors=None):
58 def readauthormap(ui, authorfile, authors=None):
59 if authors is None:
59 if authors is None:
60 authors = {}
60 authors = {}
61 with open(authorfile, b'rb') as afile:
61 with open(authorfile, b'rb') as afile:
62 for line in afile:
62 for line in afile:
63
63
64 line = line.strip()
64 line = line.strip()
65 if not line or line.startswith(b'#'):
65 if not line or line.startswith(b'#'):
66 continue
66 continue
67
67
68 try:
68 try:
69 srcauthor, dstauthor = line.split(b'=', 1)
69 srcauthor, dstauthor = line.split(b'=', 1)
70 except ValueError:
70 except ValueError:
71 msg = _(b'ignoring bad line in author map file %s: %s\n')
71 msg = _(b'ignoring bad line in author map file %s: %s\n')
72 ui.warn(msg % (authorfile, line.rstrip()))
72 ui.warn(msg % (authorfile, line.rstrip()))
73 continue
73 continue
74
74
75 srcauthor = srcauthor.strip()
75 srcauthor = srcauthor.strip()
76 dstauthor = dstauthor.strip()
76 dstauthor = dstauthor.strip()
77 if authors.get(srcauthor) in (None, dstauthor):
77 if authors.get(srcauthor) in (None, dstauthor):
78 msg = _(b'mapping author %s to %s\n')
78 msg = _(b'mapping author %s to %s\n')
79 ui.debug(msg % (srcauthor, dstauthor))
79 ui.debug(msg % (srcauthor, dstauthor))
80 authors[srcauthor] = dstauthor
80 authors[srcauthor] = dstauthor
81 continue
81 continue
82
82
83 m = _(b'overriding mapping for author %s, was %s, will be %s\n')
83 m = _(b'overriding mapping for author %s, was %s, will be %s\n')
84 ui.status(m % (srcauthor, authors[srcauthor], dstauthor))
84 ui.status(m % (srcauthor, authors[srcauthor], dstauthor))
85 return authors
85 return authors
86
86
87
87
88 def recode(s):
88 def recode(s):
89 if isinstance(s, str):
89 if isinstance(s, str):
90 return s.encode(pycompat.sysstr(orig_encoding), 'replace')
90 return s.encode(pycompat.sysstr(orig_encoding), 'replace')
91 else:
91 else:
92 return s.decode('utf-8').encode(
92 return s.decode('utf-8').encode(
93 pycompat.sysstr(orig_encoding), 'replace'
93 pycompat.sysstr(orig_encoding), 'replace'
94 )
94 )
95
95
96
96
97 def mapbranch(branch, branchmap):
97 def mapbranch(branch, branchmap):
98 """
98 """
99 >>> bmap = {b'default': b'branch1'}
99 >>> bmap = {b'default': b'branch1'}
100 >>> for i in [b'', None]:
100 >>> for i in [b'', None]:
101 ... mapbranch(i, bmap)
101 ... mapbranch(i, bmap)
102 'branch1'
102 'branch1'
103 'branch1'
103 'branch1'
104 >>> bmap = {b'None': b'branch2'}
104 >>> bmap = {b'None': b'branch2'}
105 >>> for i in [b'', None]:
105 >>> for i in [b'', None]:
106 ... mapbranch(i, bmap)
106 ... mapbranch(i, bmap)
107 'branch2'
107 'branch2'
108 'branch2'
108 'branch2'
109 >>> bmap = {b'None': b'branch3', b'default': b'branch4'}
109 >>> bmap = {b'None': b'branch3', b'default': b'branch4'}
110 >>> for i in [b'None', b'', None, b'default', b'branch5']:
110 >>> for i in [b'None', b'', None, b'default', b'branch5']:
111 ... mapbranch(i, bmap)
111 ... mapbranch(i, bmap)
112 'branch3'
112 'branch3'
113 'branch4'
113 'branch4'
114 'branch4'
114 'branch4'
115 'branch4'
115 'branch4'
116 'branch5'
116 'branch5'
117 """
117 """
118 # If branch is None or empty, this commit is coming from the source
118 # If branch is None or empty, this commit is coming from the source
119 # repository's default branch and destined for the default branch in the
119 # repository's default branch and destined for the default branch in the
120 # destination repository. For such commits, using a literal "default"
120 # destination repository. For such commits, using a literal "default"
121 # in branchmap below allows the user to map "default" to an alternate
121 # in branchmap below allows the user to map "default" to an alternate
122 # default branch in the destination repository.
122 # default branch in the destination repository.
123 branch = branchmap.get(branch or b'default', branch)
123 branch = branchmap.get(branch or b'default', branch)
124 # At some point we used "None" literal to denote the default branch,
124 # At some point we used "None" literal to denote the default branch,
125 # attempt to use that for backward compatibility.
125 # attempt to use that for backward compatibility.
126 if not branch:
126 if not branch:
127 branch = branchmap.get(b'None', branch)
127 branch = branchmap.get(b'None', branch)
128 return branch
128 return branch
129
129
130
130
131 source_converters = [
131 source_converters = [
132 (b'cvs', convert_cvs, b'branchsort'),
132 (b'cvs', convert_cvs, b'branchsort'),
133 (b'git', convert_git, b'branchsort'),
133 (b'git', convert_git, b'branchsort'),
134 (b'svn', svn_source, b'branchsort'),
134 (b'svn', svn_source, b'branchsort'),
135 (b'hg', mercurial_source, b'sourcesort'),
135 (b'hg', mercurial_source, b'sourcesort'),
136 (b'darcs', darcs_source, b'branchsort'),
136 (b'darcs', darcs_source, b'branchsort'),
137 (b'mtn', monotone_source, b'branchsort'),
137 (b'mtn', monotone_source, b'branchsort'),
138 (b'gnuarch', gnuarch_source, b'branchsort'),
138 (b'gnuarch', gnuarch_source, b'branchsort'),
139 (b'bzr', bzr_source, b'branchsort'),
139 (b'bzr', bzr_source, b'branchsort'),
140 (b'p4', p4_source, b'branchsort'),
140 (b'p4', p4_source, b'branchsort'),
141 ]
141 ]
142
142
143 sink_converters = [
143 sink_converters = [
144 (b'hg', mercurial_sink),
144 (b'hg', mercurial_sink),
145 (b'svn', svn_sink),
145 (b'svn', svn_sink),
146 ]
146 ]
147
147
148
148
149 def convertsource(ui, path, type, revs):
149 def convertsource(ui, path, type, revs):
150 exceptions = []
150 exceptions = []
151 if type and type not in [s[0] for s in source_converters]:
151 if type and type not in [s[0] for s in source_converters]:
152 raise error.Abort(_(b'%s: invalid source repository type') % type)
152 raise error.Abort(_(b'%s: invalid source repository type') % type)
153 for name, source, sortmode in source_converters:
153 for name, source, sortmode in source_converters:
154 try:
154 try:
155 if not type or name == type:
155 if not type or name == type:
156 return source(ui, name, path, revs), sortmode
156 return source(ui, name, path, revs), sortmode
157 except (NoRepo, MissingTool) as inst:
157 except (NoRepo, MissingTool) as inst:
158 exceptions.append(inst)
158 exceptions.append(inst)
159 if not ui.quiet:
159 if not ui.quiet:
160 for inst in exceptions:
160 for inst in exceptions:
161 ui.write(b"%s\n" % pycompat.bytestr(inst.args[0]))
161 ui.write(b"%s\n" % pycompat.bytestr(inst.args[0]))
162 raise error.Abort(_(b'%s: missing or unsupported repository') % path)
162 raise error.Abort(_(b'%s: missing or unsupported repository') % path)
163
163
164
164
165 def convertsink(ui, path, type):
165 def convertsink(ui, path, type):
166 if type and type not in [s[0] for s in sink_converters]:
166 if type and type not in [s[0] for s in sink_converters]:
167 raise error.Abort(_(b'%s: invalid destination repository type') % type)
167 raise error.Abort(_(b'%s: invalid destination repository type') % type)
168 for name, sink in sink_converters:
168 for name, sink in sink_converters:
169 try:
169 try:
170 if not type or name == type:
170 if not type or name == type:
171 return sink(ui, name, path)
171 return sink(ui, name, path)
172 except NoRepo as inst:
172 except NoRepo as inst:
173 ui.note(_(b"convert: %s\n") % inst)
173 ui.note(_(b"convert: %s\n") % inst)
174 except MissingTool as inst:
174 except MissingTool as inst:
175 raise error.Abort(b'%s\n' % inst)
175 raise error.Abort(b'%s\n' % inst)
176 raise error.Abort(_(b'%s: unknown repository type') % path)
176 raise error.Abort(_(b'%s: unknown repository type') % path)
177
177
178
178
179 class progresssource(object):
179 class progresssource(object):
180 def __init__(self, ui, source, filecount):
180 def __init__(self, ui, source, filecount):
181 self.ui = ui
181 self.ui = ui
182 self.source = source
182 self.source = source
183 self.progress = ui.makeprogress(
183 self.progress = ui.makeprogress(
184 _(b'getting files'), unit=_(b'files'), total=filecount
184 _(b'getting files'), unit=_(b'files'), total=filecount
185 )
185 )
186
186
187 def getfile(self, file, rev):
187 def getfile(self, file, rev):
188 self.progress.increment(item=file)
188 self.progress.increment(item=file)
189 return self.source.getfile(file, rev)
189 return self.source.getfile(file, rev)
190
190
191 def targetfilebelongstosource(self, targetfilename):
191 def targetfilebelongstosource(self, targetfilename):
192 return self.source.targetfilebelongstosource(targetfilename)
192 return self.source.targetfilebelongstosource(targetfilename)
193
193
194 def lookuprev(self, rev):
194 def lookuprev(self, rev):
195 return self.source.lookuprev(rev)
195 return self.source.lookuprev(rev)
196
196
197 def close(self):
197 def close(self):
198 self.progress.complete()
198 self.progress.complete()
199
199
200
200
201 class converter(object):
201 class converter(object):
202 def __init__(self, ui, source, dest, revmapfile, opts):
202 def __init__(self, ui, source, dest, revmapfile, opts):
203
203
204 self.source = source
204 self.source = source
205 self.dest = dest
205 self.dest = dest
206 self.ui = ui
206 self.ui = ui
207 self.opts = opts
207 self.opts = opts
208 self.commitcache = {}
208 self.commitcache = {}
209 self.authors = {}
209 self.authors = {}
210 self.authorfile = None
210 self.authorfile = None
211
211
212 # Record converted revisions persistently: maps source revision
212 # Record converted revisions persistently: maps source revision
213 # ID to target revision ID (both strings). (This is how
213 # ID to target revision ID (both strings). (This is how
214 # incremental conversions work.)
214 # incremental conversions work.)
215 self.map = mapfile(ui, revmapfile)
215 self.map = mapfile(ui, revmapfile)
216
216
217 # Read first the dst author map if any
217 # Read first the dst author map if any
218 authorfile = self.dest.authorfile()
218 authorfile = self.dest.authorfile()
219 if authorfile and os.path.exists(authorfile):
219 if authorfile and os.path.exists(authorfile):
220 self.readauthormap(authorfile)
220 self.readauthormap(authorfile)
221 # Extend/Override with new author map if necessary
221 # Extend/Override with new author map if necessary
222 if opts.get(b'authormap'):
222 if opts.get(b'authormap'):
223 self.readauthormap(opts.get(b'authormap'))
223 self.readauthormap(opts.get(b'authormap'))
224 self.authorfile = self.dest.authorfile()
224 self.authorfile = self.dest.authorfile()
225
225
226 self.splicemap = self.parsesplicemap(opts.get(b'splicemap'))
226 self.splicemap = self.parsesplicemap(opts.get(b'splicemap'))
227 self.branchmap = mapfile(ui, opts.get(b'branchmap'))
227 self.branchmap = mapfile(ui, opts.get(b'branchmap'))
228
228
229 def parsesplicemap(self, path):
229 def parsesplicemap(self, path):
230 """check and validate the splicemap format and
230 """check and validate the splicemap format and
231 return a child/parents dictionary.
231 return a child/parents dictionary.
232 Format checking has two parts.
232 Format checking has two parts.
233 1. generic format which is same across all source types
233 1. generic format which is same across all source types
234 2. specific format checking which may be different for
234 2. specific format checking which may be different for
235 different source type. This logic is implemented in
235 different source type. This logic is implemented in
236 checkrevformat function in source files like
236 checkrevformat function in source files like
237 hg.py, subversion.py etc.
237 hg.py, subversion.py etc.
238 """
238 """
239
239
240 if not path:
240 if not path:
241 return {}
241 return {}
242 m = {}
242 m = {}
243 try:
243 try:
244 fp = open(path, b'rb')
244 fp = open(path, b'rb')
245 for i, line in enumerate(util.iterfile(fp)):
245 for i, line in enumerate(fp):
246 line = line.splitlines()[0].rstrip()
246 line = line.splitlines()[0].rstrip()
247 if not line:
247 if not line:
248 # Ignore blank lines
248 # Ignore blank lines
249 continue
249 continue
250 # split line
250 # split line
251 lex = common.shlexer(data=line, whitespace=b',')
251 lex = common.shlexer(data=line, whitespace=b',')
252 line = list(lex)
252 line = list(lex)
253 # check number of parents
253 # check number of parents
254 if not (2 <= len(line) <= 3):
254 if not (2 <= len(line) <= 3):
255 raise error.Abort(
255 raise error.Abort(
256 _(
256 _(
257 b'syntax error in %s(%d): child parent1'
257 b'syntax error in %s(%d): child parent1'
258 b'[,parent2] expected'
258 b'[,parent2] expected'
259 )
259 )
260 % (path, i + 1)
260 % (path, i + 1)
261 )
261 )
262 for part in line:
262 for part in line:
263 self.source.checkrevformat(part)
263 self.source.checkrevformat(part)
264 child, p1, p2 = line[0], line[1:2], line[2:]
264 child, p1, p2 = line[0], line[1:2], line[2:]
265 if p1 == p2:
265 if p1 == p2:
266 m[child] = p1
266 m[child] = p1
267 else:
267 else:
268 m[child] = p1 + p2
268 m[child] = p1 + p2
269 # if file does not exist or error reading, exit
269 # if file does not exist or error reading, exit
270 except IOError:
270 except IOError:
271 raise error.Abort(
271 raise error.Abort(
272 _(b'splicemap file not found or error reading %s:') % path
272 _(b'splicemap file not found or error reading %s:') % path
273 )
273 )
274 return m
274 return m
275
275
276 def walktree(self, heads):
276 def walktree(self, heads):
277 """Return a mapping that identifies the uncommitted parents of every
277 """Return a mapping that identifies the uncommitted parents of every
278 uncommitted changeset."""
278 uncommitted changeset."""
279 visit = list(heads)
279 visit = list(heads)
280 known = set()
280 known = set()
281 parents = {}
281 parents = {}
282 numcommits = self.source.numcommits()
282 numcommits = self.source.numcommits()
283 progress = self.ui.makeprogress(
283 progress = self.ui.makeprogress(
284 _(b'scanning'), unit=_(b'revisions'), total=numcommits
284 _(b'scanning'), unit=_(b'revisions'), total=numcommits
285 )
285 )
286 while visit:
286 while visit:
287 n = visit.pop(0)
287 n = visit.pop(0)
288 if n in known:
288 if n in known:
289 continue
289 continue
290 if n in self.map:
290 if n in self.map:
291 m = self.map[n]
291 m = self.map[n]
292 if m == SKIPREV or self.dest.hascommitfrommap(m):
292 if m == SKIPREV or self.dest.hascommitfrommap(m):
293 continue
293 continue
294 known.add(n)
294 known.add(n)
295 progress.update(len(known))
295 progress.update(len(known))
296 commit = self.cachecommit(n)
296 commit = self.cachecommit(n)
297 parents[n] = []
297 parents[n] = []
298 for p in commit.parents:
298 for p in commit.parents:
299 parents[n].append(p)
299 parents[n].append(p)
300 visit.append(p)
300 visit.append(p)
301 progress.complete()
301 progress.complete()
302
302
303 return parents
303 return parents
304
304
305 def mergesplicemap(self, parents, splicemap):
305 def mergesplicemap(self, parents, splicemap):
306 """A splicemap redefines child/parent relationships. Check the
306 """A splicemap redefines child/parent relationships. Check the
307 map contains valid revision identifiers and merge the new
307 map contains valid revision identifiers and merge the new
308 links in the source graph.
308 links in the source graph.
309 """
309 """
310 for c in sorted(splicemap):
310 for c in sorted(splicemap):
311 if c not in parents:
311 if c not in parents:
312 if not self.dest.hascommitforsplicemap(self.map.get(c, c)):
312 if not self.dest.hascommitforsplicemap(self.map.get(c, c)):
313 # Could be in source but not converted during this run
313 # Could be in source but not converted during this run
314 self.ui.warn(
314 self.ui.warn(
315 _(
315 _(
316 b'splice map revision %s is not being '
316 b'splice map revision %s is not being '
317 b'converted, ignoring\n'
317 b'converted, ignoring\n'
318 )
318 )
319 % c
319 % c
320 )
320 )
321 continue
321 continue
322 pc = []
322 pc = []
323 for p in splicemap[c]:
323 for p in splicemap[c]:
324 # We do not have to wait for nodes already in dest.
324 # We do not have to wait for nodes already in dest.
325 if self.dest.hascommitforsplicemap(self.map.get(p, p)):
325 if self.dest.hascommitforsplicemap(self.map.get(p, p)):
326 continue
326 continue
327 # Parent is not in dest and not being converted, not good
327 # Parent is not in dest and not being converted, not good
328 if p not in parents:
328 if p not in parents:
329 raise error.Abort(_(b'unknown splice map parent: %s') % p)
329 raise error.Abort(_(b'unknown splice map parent: %s') % p)
330 pc.append(p)
330 pc.append(p)
331 parents[c] = pc
331 parents[c] = pc
332
332
333 def toposort(self, parents, sortmode):
333 def toposort(self, parents, sortmode):
334 """Return an ordering such that every uncommitted changeset is
334 """Return an ordering such that every uncommitted changeset is
335 preceded by all its uncommitted ancestors."""
335 preceded by all its uncommitted ancestors."""
336
336
337 def mapchildren(parents):
337 def mapchildren(parents):
338 """Return a (children, roots) tuple where 'children' maps parent
338 """Return a (children, roots) tuple where 'children' maps parent
339 revision identifiers to children ones, and 'roots' is the list of
339 revision identifiers to children ones, and 'roots' is the list of
340 revisions without parents. 'parents' must be a mapping of revision
340 revisions without parents. 'parents' must be a mapping of revision
341 identifier to its parents ones.
341 identifier to its parents ones.
342 """
342 """
343 visit = collections.deque(sorted(parents))
343 visit = collections.deque(sorted(parents))
344 seen = set()
344 seen = set()
345 children = {}
345 children = {}
346 roots = []
346 roots = []
347
347
348 while visit:
348 while visit:
349 n = visit.popleft()
349 n = visit.popleft()
350 if n in seen:
350 if n in seen:
351 continue
351 continue
352 seen.add(n)
352 seen.add(n)
353 # Ensure that nodes without parents are present in the
353 # Ensure that nodes without parents are present in the
354 # 'children' mapping.
354 # 'children' mapping.
355 children.setdefault(n, [])
355 children.setdefault(n, [])
356 hasparent = False
356 hasparent = False
357 for p in parents[n]:
357 for p in parents[n]:
358 if p not in self.map:
358 if p not in self.map:
359 visit.append(p)
359 visit.append(p)
360 hasparent = True
360 hasparent = True
361 children.setdefault(p, []).append(n)
361 children.setdefault(p, []).append(n)
362 if not hasparent:
362 if not hasparent:
363 roots.append(n)
363 roots.append(n)
364
364
365 return children, roots
365 return children, roots
366
366
367 # Sort functions are supposed to take a list of revisions which
367 # Sort functions are supposed to take a list of revisions which
368 # can be converted immediately and pick one
368 # can be converted immediately and pick one
369
369
370 def makebranchsorter():
370 def makebranchsorter():
371 """If the previously converted revision has a child in the
371 """If the previously converted revision has a child in the
372 eligible revisions list, pick it. Return the list head
372 eligible revisions list, pick it. Return the list head
373 otherwise. Branch sort attempts to minimize branch
373 otherwise. Branch sort attempts to minimize branch
374 switching, which is harmful for Mercurial backend
374 switching, which is harmful for Mercurial backend
375 compression.
375 compression.
376 """
376 """
377 prev = [None]
377 prev = [None]
378
378
379 def picknext(nodes):
379 def picknext(nodes):
380 next = nodes[0]
380 next = nodes[0]
381 for n in nodes:
381 for n in nodes:
382 if prev[0] in parents[n]:
382 if prev[0] in parents[n]:
383 next = n
383 next = n
384 break
384 break
385 prev[0] = next
385 prev[0] = next
386 return next
386 return next
387
387
388 return picknext
388 return picknext
389
389
390 def makesourcesorter():
390 def makesourcesorter():
391 """Source specific sort."""
391 """Source specific sort."""
392 keyfn = lambda n: self.commitcache[n].sortkey
392 keyfn = lambda n: self.commitcache[n].sortkey
393
393
394 def picknext(nodes):
394 def picknext(nodes):
395 return sorted(nodes, key=keyfn)[0]
395 return sorted(nodes, key=keyfn)[0]
396
396
397 return picknext
397 return picknext
398
398
399 def makeclosesorter():
399 def makeclosesorter():
400 """Close order sort."""
400 """Close order sort."""
401 keyfn = lambda n: (
401 keyfn = lambda n: (
402 b'close' not in self.commitcache[n].extra,
402 b'close' not in self.commitcache[n].extra,
403 self.commitcache[n].sortkey,
403 self.commitcache[n].sortkey,
404 )
404 )
405
405
406 def picknext(nodes):
406 def picknext(nodes):
407 return sorted(nodes, key=keyfn)[0]
407 return sorted(nodes, key=keyfn)[0]
408
408
409 return picknext
409 return picknext
410
410
411 def makedatesorter():
411 def makedatesorter():
412 """Sort revisions by date."""
412 """Sort revisions by date."""
413 dates = {}
413 dates = {}
414
414
415 def getdate(n):
415 def getdate(n):
416 if n not in dates:
416 if n not in dates:
417 dates[n] = dateutil.parsedate(self.commitcache[n].date)
417 dates[n] = dateutil.parsedate(self.commitcache[n].date)
418 return dates[n]
418 return dates[n]
419
419
420 def picknext(nodes):
420 def picknext(nodes):
421 return min([(getdate(n), n) for n in nodes])[1]
421 return min([(getdate(n), n) for n in nodes])[1]
422
422
423 return picknext
423 return picknext
424
424
425 if sortmode == b'branchsort':
425 if sortmode == b'branchsort':
426 picknext = makebranchsorter()
426 picknext = makebranchsorter()
427 elif sortmode == b'datesort':
427 elif sortmode == b'datesort':
428 picknext = makedatesorter()
428 picknext = makedatesorter()
429 elif sortmode == b'sourcesort':
429 elif sortmode == b'sourcesort':
430 picknext = makesourcesorter()
430 picknext = makesourcesorter()
431 elif sortmode == b'closesort':
431 elif sortmode == b'closesort':
432 picknext = makeclosesorter()
432 picknext = makeclosesorter()
433 else:
433 else:
434 raise error.Abort(_(b'unknown sort mode: %s') % sortmode)
434 raise error.Abort(_(b'unknown sort mode: %s') % sortmode)
435
435
436 children, actives = mapchildren(parents)
436 children, actives = mapchildren(parents)
437
437
438 s = []
438 s = []
439 pendings = {}
439 pendings = {}
440 while actives:
440 while actives:
441 n = picknext(actives)
441 n = picknext(actives)
442 actives.remove(n)
442 actives.remove(n)
443 s.append(n)
443 s.append(n)
444
444
445 # Update dependents list
445 # Update dependents list
446 for c in children.get(n, []):
446 for c in children.get(n, []):
447 if c not in pendings:
447 if c not in pendings:
448 pendings[c] = [p for p in parents[c] if p not in self.map]
448 pendings[c] = [p for p in parents[c] if p not in self.map]
449 try:
449 try:
450 pendings[c].remove(n)
450 pendings[c].remove(n)
451 except ValueError:
451 except ValueError:
452 raise error.Abort(
452 raise error.Abort(
453 _(b'cycle detected between %s and %s')
453 _(b'cycle detected between %s and %s')
454 % (recode(c), recode(n))
454 % (recode(c), recode(n))
455 )
455 )
456 if not pendings[c]:
456 if not pendings[c]:
457 # Parents are converted, node is eligible
457 # Parents are converted, node is eligible
458 actives.insert(0, c)
458 actives.insert(0, c)
459 pendings[c] = None
459 pendings[c] = None
460
460
461 if len(s) != len(parents):
461 if len(s) != len(parents):
462 raise error.Abort(_(b"not all revisions were sorted"))
462 raise error.Abort(_(b"not all revisions were sorted"))
463
463
464 return s
464 return s
465
465
466 def writeauthormap(self):
466 def writeauthormap(self):
467 authorfile = self.authorfile
467 authorfile = self.authorfile
468 if authorfile:
468 if authorfile:
469 self.ui.status(_(b'writing author map file %s\n') % authorfile)
469 self.ui.status(_(b'writing author map file %s\n') % authorfile)
470 ofile = open(authorfile, b'wb+')
470 ofile = open(authorfile, b'wb+')
471 for author in self.authors:
471 for author in self.authors:
472 ofile.write(
472 ofile.write(
473 util.tonativeeol(
473 util.tonativeeol(
474 b"%s=%s\n" % (author, self.authors[author])
474 b"%s=%s\n" % (author, self.authors[author])
475 )
475 )
476 )
476 )
477 ofile.close()
477 ofile.close()
478
478
479 def readauthormap(self, authorfile):
479 def readauthormap(self, authorfile):
480 self.authors = readauthormap(self.ui, authorfile, self.authors)
480 self.authors = readauthormap(self.ui, authorfile, self.authors)
481
481
482 def cachecommit(self, rev):
482 def cachecommit(self, rev):
483 commit = self.source.getcommit(rev)
483 commit = self.source.getcommit(rev)
484 commit.author = self.authors.get(commit.author, commit.author)
484 commit.author = self.authors.get(commit.author, commit.author)
485 commit.branch = mapbranch(commit.branch, self.branchmap)
485 commit.branch = mapbranch(commit.branch, self.branchmap)
486 self.commitcache[rev] = commit
486 self.commitcache[rev] = commit
487 return commit
487 return commit
488
488
489 def copy(self, rev):
489 def copy(self, rev):
490 commit = self.commitcache[rev]
490 commit = self.commitcache[rev]
491 full = self.opts.get(b'full')
491 full = self.opts.get(b'full')
492 changes = self.source.getchanges(rev, full)
492 changes = self.source.getchanges(rev, full)
493 if isinstance(changes, bytes):
493 if isinstance(changes, bytes):
494 if changes == SKIPREV:
494 if changes == SKIPREV:
495 dest = SKIPREV
495 dest = SKIPREV
496 else:
496 else:
497 dest = self.map[changes]
497 dest = self.map[changes]
498 self.map[rev] = dest
498 self.map[rev] = dest
499 return
499 return
500 files, copies, cleanp2 = changes
500 files, copies, cleanp2 = changes
501 pbranches = []
501 pbranches = []
502 if commit.parents:
502 if commit.parents:
503 for prev in commit.parents:
503 for prev in commit.parents:
504 if prev not in self.commitcache:
504 if prev not in self.commitcache:
505 self.cachecommit(prev)
505 self.cachecommit(prev)
506 pbranches.append(
506 pbranches.append(
507 (self.map[prev], self.commitcache[prev].branch)
507 (self.map[prev], self.commitcache[prev].branch)
508 )
508 )
509 self.dest.setbranch(commit.branch, pbranches)
509 self.dest.setbranch(commit.branch, pbranches)
510 try:
510 try:
511 parents = self.splicemap[rev]
511 parents = self.splicemap[rev]
512 self.ui.status(
512 self.ui.status(
513 _(b'spliced in %s as parents of %s\n')
513 _(b'spliced in %s as parents of %s\n')
514 % (_(b' and ').join(parents), rev)
514 % (_(b' and ').join(parents), rev)
515 )
515 )
516 parents = [self.map.get(p, p) for p in parents]
516 parents = [self.map.get(p, p) for p in parents]
517 except KeyError:
517 except KeyError:
518 parents = [b[0] for b in pbranches]
518 parents = [b[0] for b in pbranches]
519 parents.extend(
519 parents.extend(
520 self.map[x] for x in commit.optparents if x in self.map
520 self.map[x] for x in commit.optparents if x in self.map
521 )
521 )
522 if len(pbranches) != 2:
522 if len(pbranches) != 2:
523 cleanp2 = set()
523 cleanp2 = set()
524 if len(parents) < 3:
524 if len(parents) < 3:
525 source = progresssource(self.ui, self.source, len(files))
525 source = progresssource(self.ui, self.source, len(files))
526 else:
526 else:
527 # For an octopus merge, we end up traversing the list of
527 # For an octopus merge, we end up traversing the list of
528 # changed files N-1 times. This tweak to the number of
528 # changed files N-1 times. This tweak to the number of
529 # files makes it so the progress bar doesn't overflow
529 # files makes it so the progress bar doesn't overflow
530 # itself.
530 # itself.
531 source = progresssource(
531 source = progresssource(
532 self.ui, self.source, len(files) * (len(parents) - 1)
532 self.ui, self.source, len(files) * (len(parents) - 1)
533 )
533 )
534 newnode = self.dest.putcommit(
534 newnode = self.dest.putcommit(
535 files, copies, parents, commit, source, self.map, full, cleanp2
535 files, copies, parents, commit, source, self.map, full, cleanp2
536 )
536 )
537 source.close()
537 source.close()
538 self.source.converted(rev, newnode)
538 self.source.converted(rev, newnode)
539 self.map[rev] = newnode
539 self.map[rev] = newnode
540
540
541 def convert(self, sortmode):
541 def convert(self, sortmode):
542 try:
542 try:
543 self.source.before()
543 self.source.before()
544 self.dest.before()
544 self.dest.before()
545 self.source.setrevmap(self.map)
545 self.source.setrevmap(self.map)
546 self.ui.status(_(b"scanning source...\n"))
546 self.ui.status(_(b"scanning source...\n"))
547 heads = self.source.getheads()
547 heads = self.source.getheads()
548 parents = self.walktree(heads)
548 parents = self.walktree(heads)
549 self.mergesplicemap(parents, self.splicemap)
549 self.mergesplicemap(parents, self.splicemap)
550 self.ui.status(_(b"sorting...\n"))
550 self.ui.status(_(b"sorting...\n"))
551 t = self.toposort(parents, sortmode)
551 t = self.toposort(parents, sortmode)
552 num = len(t)
552 num = len(t)
553 c = None
553 c = None
554
554
555 self.ui.status(_(b"converting...\n"))
555 self.ui.status(_(b"converting...\n"))
556 progress = self.ui.makeprogress(
556 progress = self.ui.makeprogress(
557 _(b'converting'), unit=_(b'revisions'), total=len(t)
557 _(b'converting'), unit=_(b'revisions'), total=len(t)
558 )
558 )
559 for i, c in enumerate(t):
559 for i, c in enumerate(t):
560 num -= 1
560 num -= 1
561 desc = self.commitcache[c].desc
561 desc = self.commitcache[c].desc
562 if b"\n" in desc:
562 if b"\n" in desc:
563 desc = desc.splitlines()[0]
563 desc = desc.splitlines()[0]
564 # convert log message to local encoding without using
564 # convert log message to local encoding without using
565 # tolocal() because the encoding.encoding convert()
565 # tolocal() because the encoding.encoding convert()
566 # uses is 'utf-8'
566 # uses is 'utf-8'
567 self.ui.status(b"%d %s\n" % (num, recode(desc)))
567 self.ui.status(b"%d %s\n" % (num, recode(desc)))
568 self.ui.note(_(b"source: %s\n") % recode(c))
568 self.ui.note(_(b"source: %s\n") % recode(c))
569 progress.update(i)
569 progress.update(i)
570 self.copy(c)
570 self.copy(c)
571 progress.complete()
571 progress.complete()
572
572
573 if not self.ui.configbool(b'convert', b'skiptags'):
573 if not self.ui.configbool(b'convert', b'skiptags'):
574 tags = self.source.gettags()
574 tags = self.source.gettags()
575 ctags = {}
575 ctags = {}
576 for k in tags:
576 for k in tags:
577 v = tags[k]
577 v = tags[k]
578 if self.map.get(v, SKIPREV) != SKIPREV:
578 if self.map.get(v, SKIPREV) != SKIPREV:
579 ctags[k] = self.map[v]
579 ctags[k] = self.map[v]
580
580
581 if c and ctags:
581 if c and ctags:
582 nrev, tagsparent = self.dest.puttags(ctags)
582 nrev, tagsparent = self.dest.puttags(ctags)
583 if nrev and tagsparent:
583 if nrev and tagsparent:
584 # write another hash correspondence to override the
584 # write another hash correspondence to override the
585 # previous one so we don't end up with extra tag heads
585 # previous one so we don't end up with extra tag heads
586 tagsparents = [
586 tagsparents = [
587 e for e in self.map.items() if e[1] == tagsparent
587 e for e in self.map.items() if e[1] == tagsparent
588 ]
588 ]
589 if tagsparents:
589 if tagsparents:
590 self.map[tagsparents[0][0]] = nrev
590 self.map[tagsparents[0][0]] = nrev
591
591
592 bookmarks = self.source.getbookmarks()
592 bookmarks = self.source.getbookmarks()
593 cbookmarks = {}
593 cbookmarks = {}
594 for k in bookmarks:
594 for k in bookmarks:
595 v = bookmarks[k]
595 v = bookmarks[k]
596 if self.map.get(v, SKIPREV) != SKIPREV:
596 if self.map.get(v, SKIPREV) != SKIPREV:
597 cbookmarks[k] = self.map[v]
597 cbookmarks[k] = self.map[v]
598
598
599 if c and cbookmarks:
599 if c and cbookmarks:
600 self.dest.putbookmarks(cbookmarks)
600 self.dest.putbookmarks(cbookmarks)
601
601
602 self.writeauthormap()
602 self.writeauthormap()
603 finally:
603 finally:
604 self.cleanup()
604 self.cleanup()
605
605
606 def cleanup(self):
606 def cleanup(self):
607 try:
607 try:
608 self.dest.after()
608 self.dest.after()
609 finally:
609 finally:
610 self.source.after()
610 self.source.after()
611 self.map.close()
611 self.map.close()
612
612
613
613
614 def convert(ui, src, dest=None, revmapfile=None, **opts):
614 def convert(ui, src, dest=None, revmapfile=None, **opts):
615 opts = pycompat.byteskwargs(opts)
615 opts = pycompat.byteskwargs(opts)
616 global orig_encoding
616 global orig_encoding
617 orig_encoding = encoding.encoding
617 orig_encoding = encoding.encoding
618 encoding.encoding = b'UTF-8'
618 encoding.encoding = b'UTF-8'
619
619
620 # support --authors as an alias for --authormap
620 # support --authors as an alias for --authormap
621 if not opts.get(b'authormap'):
621 if not opts.get(b'authormap'):
622 opts[b'authormap'] = opts.get(b'authors')
622 opts[b'authormap'] = opts.get(b'authors')
623
623
624 if not dest:
624 if not dest:
625 dest = hg.defaultdest(src) + b"-hg"
625 dest = hg.defaultdest(src) + b"-hg"
626 ui.status(_(b"assuming destination %s\n") % dest)
626 ui.status(_(b"assuming destination %s\n") % dest)
627
627
628 destc = convertsink(ui, dest, opts.get(b'dest_type'))
628 destc = convertsink(ui, dest, opts.get(b'dest_type'))
629 destc = scmutil.wrapconvertsink(destc)
629 destc = scmutil.wrapconvertsink(destc)
630
630
631 try:
631 try:
632 srcc, defaultsort = convertsource(
632 srcc, defaultsort = convertsource(
633 ui, src, opts.get(b'source_type'), opts.get(b'rev')
633 ui, src, opts.get(b'source_type'), opts.get(b'rev')
634 )
634 )
635 except Exception:
635 except Exception:
636 for path in destc.created:
636 for path in destc.created:
637 shutil.rmtree(path, True)
637 shutil.rmtree(path, True)
638 raise
638 raise
639
639
640 sortmodes = (b'branchsort', b'datesort', b'sourcesort', b'closesort')
640 sortmodes = (b'branchsort', b'datesort', b'sourcesort', b'closesort')
641 sortmode = [m for m in sortmodes if opts.get(m)]
641 sortmode = [m for m in sortmodes if opts.get(m)]
642 if len(sortmode) > 1:
642 if len(sortmode) > 1:
643 raise error.Abort(_(b'more than one sort mode specified'))
643 raise error.Abort(_(b'more than one sort mode specified'))
644 if sortmode:
644 if sortmode:
645 sortmode = sortmode[0]
645 sortmode = sortmode[0]
646 else:
646 else:
647 sortmode = defaultsort
647 sortmode = defaultsort
648
648
649 if sortmode == b'sourcesort' and not srcc.hasnativeorder():
649 if sortmode == b'sourcesort' and not srcc.hasnativeorder():
650 raise error.Abort(
650 raise error.Abort(
651 _(b'--sourcesort is not supported by this data source')
651 _(b'--sourcesort is not supported by this data source')
652 )
652 )
653 if sortmode == b'closesort' and not srcc.hasnativeclose():
653 if sortmode == b'closesort' and not srcc.hasnativeclose():
654 raise error.Abort(
654 raise error.Abort(
655 _(b'--closesort is not supported by this data source')
655 _(b'--closesort is not supported by this data source')
656 )
656 )
657
657
658 fmap = opts.get(b'filemap')
658 fmap = opts.get(b'filemap')
659 if fmap:
659 if fmap:
660 srcc = filemap.filemap_source(ui, srcc, fmap)
660 srcc = filemap.filemap_source(ui, srcc, fmap)
661 destc.setfilemapmode(True)
661 destc.setfilemapmode(True)
662
662
663 if not revmapfile:
663 if not revmapfile:
664 revmapfile = destc.revmapfile()
664 revmapfile = destc.revmapfile()
665
665
666 c = converter(ui, srcc, destc, revmapfile, opts)
666 c = converter(ui, srcc, destc, revmapfile, opts)
667 c.convert(sortmode)
667 c.convert(sortmode)
@@ -1,1655 +1,1655 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import bisect
9 import bisect
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('dirstate')
27 rustmod = policy.importrust('dirstate')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 """compile the regexp with the best available regexp engine and return a
50 """compile the regexp with the best available regexp engine and return a
51 matcher function"""
51 matcher function"""
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 """Returns the list of subinclude matcher args and the kindpats without the
85 """Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it."""
86 subincludes in it."""
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """Checks whether the kindspats match everything, as e.g.
110 """Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls,
120 matchercls,
121 root,
121 root,
122 cwd,
122 cwd,
123 kindpats,
123 kindpats,
124 ctx=None,
124 ctx=None,
125 listsubrepos=False,
125 listsubrepos=False,
126 badfn=None,
126 badfn=None,
127 ):
127 ):
128 matchers = []
128 matchers = []
129 fms, kindpats = _expandsets(
129 fms, kindpats = _expandsets(
130 cwd,
130 cwd,
131 kindpats,
131 kindpats,
132 ctx=ctx,
132 ctx=ctx,
133 listsubrepos=listsubrepos,
133 listsubrepos=listsubrepos,
134 badfn=badfn,
134 badfn=badfn,
135 )
135 )
136 if kindpats:
136 if kindpats:
137 m = matchercls(root, kindpats, badfn=badfn)
137 m = matchercls(root, kindpats, badfn=badfn)
138 matchers.append(m)
138 matchers.append(m)
139 if fms:
139 if fms:
140 matchers.extend(fms)
140 matchers.extend(fms)
141 if not matchers:
141 if not matchers:
142 return nevermatcher(badfn=badfn)
142 return nevermatcher(badfn=badfn)
143 if len(matchers) == 1:
143 if len(matchers) == 1:
144 return matchers[0]
144 return matchers[0]
145 return unionmatcher(matchers)
145 return unionmatcher(matchers)
146
146
147
147
148 def match(
148 def match(
149 root,
149 root,
150 cwd,
150 cwd,
151 patterns=None,
151 patterns=None,
152 include=None,
152 include=None,
153 exclude=None,
153 exclude=None,
154 default=b'glob',
154 default=b'glob',
155 auditor=None,
155 auditor=None,
156 ctx=None,
156 ctx=None,
157 listsubrepos=False,
157 listsubrepos=False,
158 warn=None,
158 warn=None,
159 badfn=None,
159 badfn=None,
160 icasefs=False,
160 icasefs=False,
161 ):
161 ):
162 r"""build an object to match a set of file patterns
162 r"""build an object to match a set of file patterns
163
163
164 arguments:
164 arguments:
165 root - the canonical root of the tree you're matching against
165 root - the canonical root of the tree you're matching against
166 cwd - the current working directory, if relevant
166 cwd - the current working directory, if relevant
167 patterns - patterns to find
167 patterns - patterns to find
168 include - patterns to include (unless they are excluded)
168 include - patterns to include (unless they are excluded)
169 exclude - patterns to exclude (even if they are included)
169 exclude - patterns to exclude (even if they are included)
170 default - if a pattern in patterns has no explicit type, assume this one
170 default - if a pattern in patterns has no explicit type, assume this one
171 auditor - optional path auditor
171 auditor - optional path auditor
172 ctx - optional changecontext
172 ctx - optional changecontext
173 listsubrepos - if True, recurse into subrepositories
173 listsubrepos - if True, recurse into subrepositories
174 warn - optional function used for printing warnings
174 warn - optional function used for printing warnings
175 badfn - optional bad() callback for this matcher instead of the default
175 badfn - optional bad() callback for this matcher instead of the default
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 normalizes the given patterns to the case in the filesystem
177 normalizes the given patterns to the case in the filesystem
178
178
179 a pattern is one of:
179 a pattern is one of:
180 'glob:<glob>' - a glob relative to cwd
180 'glob:<glob>' - a glob relative to cwd
181 're:<regexp>' - a regular expression
181 're:<regexp>' - a regular expression
182 'path:<path>' - a path relative to repository root, which is matched
182 'path:<path>' - a path relative to repository root, which is matched
183 recursively
183 recursively
184 'rootfilesin:<path>' - a path relative to repository root, which is
184 'rootfilesin:<path>' - a path relative to repository root, which is
185 matched non-recursively (will not match subdirectories)
185 matched non-recursively (will not match subdirectories)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 'relpath:<path>' - a path relative to cwd
187 'relpath:<path>' - a path relative to cwd
188 'relre:<regexp>' - a regexp that needn't match the start of a name
188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 'set:<fileset>' - a fileset expression
189 'set:<fileset>' - a fileset expression
190 'include:<path>' - a file of patterns to read and include
190 'include:<path>' - a file of patterns to read and include
191 'subinclude:<path>' - a file of patterns to match against files under
191 'subinclude:<path>' - a file of patterns to match against files under
192 the same directory
192 the same directory
193 '<something>' - a pattern of the specified default type
193 '<something>' - a pattern of the specified default type
194
194
195 >>> def _match(root, *args, **kwargs):
195 >>> def _match(root, *args, **kwargs):
196 ... return match(util.localpath(root), *args, **kwargs)
196 ... return match(util.localpath(root), *args, **kwargs)
197
197
198 Usually a patternmatcher is returned:
198 Usually a patternmatcher is returned:
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201
201
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 intersectionmatcher (resp. a differencematcher):
203 intersectionmatcher (resp. a differencematcher):
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 <class 'mercurial.match.intersectionmatcher'>
205 <class 'mercurial.match.intersectionmatcher'>
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 <class 'mercurial.match.differencematcher'>
207 <class 'mercurial.match.differencematcher'>
208
208
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 >>> _match(b'/foo', b'.', [])
210 >>> _match(b'/foo', b'.', [])
211 <alwaysmatcher>
211 <alwaysmatcher>
212
212
213 The 'default' argument determines which kind of pattern is assumed if a
213 The 'default' argument determines which kind of pattern is assumed if a
214 pattern has no prefix:
214 pattern has no prefix:
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 <patternmatcher patterns='.*\\.c$'>
216 <patternmatcher patterns='.*\\.c$'>
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 <patternmatcher patterns='main\\.py(?:/|$)'>
218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 <patternmatcher patterns='main.py'>
220 <patternmatcher patterns='main.py'>
221
221
222 The primary use of matchers is to check whether a value (usually a file
222 The primary use of matchers is to check whether a value (usually a file
223 name) matches againset one of the patterns given at initialization. There
223 name) matches againset one of the patterns given at initialization. There
224 are two ways of doing this check.
224 are two ways of doing this check.
225
225
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227
227
228 1. Calling the matcher with a file name returns True if any pattern
228 1. Calling the matcher with a file name returns True if any pattern
229 matches that file name:
229 matches that file name:
230 >>> m(b'a')
230 >>> m(b'a')
231 True
231 True
232 >>> m(b'main.c')
232 >>> m(b'main.c')
233 True
233 True
234 >>> m(b'test.py')
234 >>> m(b'test.py')
235 False
235 False
236
236
237 2. Using the exact() method only returns True if the file name matches one
237 2. Using the exact() method only returns True if the file name matches one
238 of the exact patterns (i.e. not re: or glob: patterns):
238 of the exact patterns (i.e. not re: or glob: patterns):
239 >>> m.exact(b'a')
239 >>> m.exact(b'a')
240 True
240 True
241 >>> m.exact(b'main.c')
241 >>> m.exact(b'main.c')
242 False
242 False
243 """
243 """
244 assert os.path.isabs(root)
244 assert os.path.isabs(root)
245 cwd = os.path.join(root, util.localpath(cwd))
245 cwd = os.path.join(root, util.localpath(cwd))
246 normalize = _donormalize
246 normalize = _donormalize
247 if icasefs:
247 if icasefs:
248 dirstate = ctx.repo().dirstate
248 dirstate = ctx.repo().dirstate
249 dsnormalize = dirstate.normalize
249 dsnormalize = dirstate.normalize
250
250
251 def normalize(patterns, default, root, cwd, auditor, warn):
251 def normalize(patterns, default, root, cwd, auditor, warn):
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 kindpats = []
253 kindpats = []
254 for kind, pats, source in kp:
254 for kind, pats, source in kp:
255 if kind not in (b're', b'relre'): # regex can't be normalized
255 if kind not in (b're', b'relre'): # regex can't be normalized
256 p = pats
256 p = pats
257 pats = dsnormalize(pats)
257 pats = dsnormalize(pats)
258
258
259 # Preserve the original to handle a case only rename.
259 # Preserve the original to handle a case only rename.
260 if p != pats and p in dirstate:
260 if p != pats and p in dirstate:
261 kindpats.append((kind, p, source))
261 kindpats.append((kind, p, source))
262
262
263 kindpats.append((kind, pats, source))
263 kindpats.append((kind, pats, source))
264 return kindpats
264 return kindpats
265
265
266 if patterns:
266 if patterns:
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 if _kindpatsalwaysmatch(kindpats):
268 if _kindpatsalwaysmatch(kindpats):
269 m = alwaysmatcher(badfn)
269 m = alwaysmatcher(badfn)
270 else:
270 else:
271 m = _buildkindpatsmatcher(
271 m = _buildkindpatsmatcher(
272 patternmatcher,
272 patternmatcher,
273 root,
273 root,
274 cwd,
274 cwd,
275 kindpats,
275 kindpats,
276 ctx=ctx,
276 ctx=ctx,
277 listsubrepos=listsubrepos,
277 listsubrepos=listsubrepos,
278 badfn=badfn,
278 badfn=badfn,
279 )
279 )
280 else:
280 else:
281 # It's a little strange that no patterns means to match everything.
281 # It's a little strange that no patterns means to match everything.
282 # Consider changing this to match nothing (probably using nevermatcher).
282 # Consider changing this to match nothing (probably using nevermatcher).
283 m = alwaysmatcher(badfn)
283 m = alwaysmatcher(badfn)
284
284
285 if include:
285 if include:
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 im = _buildkindpatsmatcher(
287 im = _buildkindpatsmatcher(
288 includematcher,
288 includematcher,
289 root,
289 root,
290 cwd,
290 cwd,
291 kindpats,
291 kindpats,
292 ctx=ctx,
292 ctx=ctx,
293 listsubrepos=listsubrepos,
293 listsubrepos=listsubrepos,
294 badfn=None,
294 badfn=None,
295 )
295 )
296 m = intersectmatchers(m, im)
296 m = intersectmatchers(m, im)
297 if exclude:
297 if exclude:
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 em = _buildkindpatsmatcher(
299 em = _buildkindpatsmatcher(
300 includematcher,
300 includematcher,
301 root,
301 root,
302 cwd,
302 cwd,
303 kindpats,
303 kindpats,
304 ctx=ctx,
304 ctx=ctx,
305 listsubrepos=listsubrepos,
305 listsubrepos=listsubrepos,
306 badfn=None,
306 badfn=None,
307 )
307 )
308 m = differencematcher(m, em)
308 m = differencematcher(m, em)
309 return m
309 return m
310
310
311
311
312 def exact(files, badfn=None):
312 def exact(files, badfn=None):
313 return exactmatcher(files, badfn=badfn)
313 return exactmatcher(files, badfn=badfn)
314
314
315
315
316 def always(badfn=None):
316 def always(badfn=None):
317 return alwaysmatcher(badfn)
317 return alwaysmatcher(badfn)
318
318
319
319
320 def never(badfn=None):
320 def never(badfn=None):
321 return nevermatcher(badfn)
321 return nevermatcher(badfn)
322
322
323
323
324 def badmatch(match, badfn):
324 def badmatch(match, badfn):
325 """Make a copy of the given matcher, replacing its bad method with the given
325 """Make a copy of the given matcher, replacing its bad method with the given
326 one.
326 one.
327 """
327 """
328 m = copy.copy(match)
328 m = copy.copy(match)
329 m.bad = badfn
329 m.bad = badfn
330 return m
330 return m
331
331
332
332
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 normalized and rooted patterns and with listfiles expanded."""
335 normalized and rooted patterns and with listfiles expanded."""
336 kindpats = []
336 kindpats = []
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 if kind in cwdrelativepatternkinds:
338 if kind in cwdrelativepatternkinds:
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 pat = util.normpath(pat)
341 pat = util.normpath(pat)
342 elif kind in (b'listfile', b'listfile0'):
342 elif kind in (b'listfile', b'listfile0'):
343 try:
343 try:
344 files = util.readfile(pat)
344 files = util.readfile(pat)
345 if kind == b'listfile0':
345 if kind == b'listfile0':
346 files = files.split(b'\0')
346 files = files.split(b'\0')
347 else:
347 else:
348 files = files.splitlines()
348 files = files.splitlines()
349 files = [f for f in files if f]
349 files = [f for f in files if f]
350 except EnvironmentError:
350 except EnvironmentError:
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 for k, p, source in _donormalize(
352 for k, p, source in _donormalize(
353 files, default, root, cwd, auditor, warn
353 files, default, root, cwd, auditor, warn
354 ):
354 ):
355 kindpats.append((k, p, pat))
355 kindpats.append((k, p, pat))
356 continue
356 continue
357 elif kind == b'include':
357 elif kind == b'include':
358 try:
358 try:
359 fullpath = os.path.join(root, util.localpath(pat))
359 fullpath = os.path.join(root, util.localpath(pat))
360 includepats = readpatternfile(fullpath, warn)
360 includepats = readpatternfile(fullpath, warn)
361 for k, p, source in _donormalize(
361 for k, p, source in _donormalize(
362 includepats, default, root, cwd, auditor, warn
362 includepats, default, root, cwd, auditor, warn
363 ):
363 ):
364 kindpats.append((k, p, source or pat))
364 kindpats.append((k, p, source or pat))
365 except error.Abort as inst:
365 except error.Abort as inst:
366 raise error.Abort(
366 raise error.Abort(
367 b'%s: %s'
367 b'%s: %s'
368 % (
368 % (
369 pat,
369 pat,
370 inst.message,
370 inst.message,
371 ) # pytype: disable=unsupported-operands
371 ) # pytype: disable=unsupported-operands
372 )
372 )
373 except IOError as inst:
373 except IOError as inst:
374 if warn:
374 if warn:
375 warn(
375 warn(
376 _(b"skipping unreadable pattern file '%s': %s\n")
376 _(b"skipping unreadable pattern file '%s': %s\n")
377 % (pat, stringutil.forcebytestr(inst.strerror))
377 % (pat, stringutil.forcebytestr(inst.strerror))
378 )
378 )
379 continue
379 continue
380 # else: re or relre - which cannot be normalized
380 # else: re or relre - which cannot be normalized
381 kindpats.append((kind, pat, b''))
381 kindpats.append((kind, pat, b''))
382 return kindpats
382 return kindpats
383
383
384
384
385 class basematcher(object):
385 class basematcher(object):
386 def __init__(self, badfn=None):
386 def __init__(self, badfn=None):
387 if badfn is not None:
387 if badfn is not None:
388 self.bad = badfn
388 self.bad = badfn
389
389
390 def __call__(self, fn):
390 def __call__(self, fn):
391 return self.matchfn(fn)
391 return self.matchfn(fn)
392
392
393 # Callbacks related to how the matcher is used by dirstate.walk.
393 # Callbacks related to how the matcher is used by dirstate.walk.
394 # Subscribers to these events must monkeypatch the matcher object.
394 # Subscribers to these events must monkeypatch the matcher object.
395 def bad(self, f, msg):
395 def bad(self, f, msg):
396 """Callback from dirstate.walk for each explicit file that can't be
396 """Callback from dirstate.walk for each explicit file that can't be
397 found/accessed, with an error message."""
397 found/accessed, with an error message."""
398
398
399 # If an traversedir is set, it will be called when a directory discovered
399 # If an traversedir is set, it will be called when a directory discovered
400 # by recursive traversal is visited.
400 # by recursive traversal is visited.
401 traversedir = None
401 traversedir = None
402
402
403 @propertycache
403 @propertycache
404 def _files(self):
404 def _files(self):
405 return []
405 return []
406
406
407 def files(self):
407 def files(self):
408 """Explicitly listed files or patterns or roots:
408 """Explicitly listed files or patterns or roots:
409 if no patterns or .always(): empty list,
409 if no patterns or .always(): empty list,
410 if exact: list exact files,
410 if exact: list exact files,
411 if not .anypats(): list all files and dirs,
411 if not .anypats(): list all files and dirs,
412 else: optimal roots"""
412 else: optimal roots"""
413 return self._files
413 return self._files
414
414
415 @propertycache
415 @propertycache
416 def _fileset(self):
416 def _fileset(self):
417 return set(self._files)
417 return set(self._files)
418
418
419 def exact(self, f):
419 def exact(self, f):
420 '''Returns True if f is in .files().'''
420 '''Returns True if f is in .files().'''
421 return f in self._fileset
421 return f in self._fileset
422
422
423 def matchfn(self, f):
423 def matchfn(self, f):
424 return False
424 return False
425
425
426 def visitdir(self, dir):
426 def visitdir(self, dir):
427 """Decides whether a directory should be visited based on whether it
427 """Decides whether a directory should be visited based on whether it
428 has potential matches in it or one of its subdirectories. This is
428 has potential matches in it or one of its subdirectories. This is
429 based on the match's primary, included, and excluded patterns.
429 based on the match's primary, included, and excluded patterns.
430
430
431 Returns the string 'all' if the given directory and all subdirectories
431 Returns the string 'all' if the given directory and all subdirectories
432 should be visited. Otherwise returns True or False indicating whether
432 should be visited. Otherwise returns True or False indicating whether
433 the given directory should be visited.
433 the given directory should be visited.
434 """
434 """
435 return True
435 return True
436
436
437 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
438 """Decides whether a directory should be visited based on whether it
438 """Decides whether a directory should be visited based on whether it
439 has potential matches in it or one of its subdirectories, and
439 has potential matches in it or one of its subdirectories, and
440 potentially lists which subdirectories of that directory should be
440 potentially lists which subdirectories of that directory should be
441 visited. This is based on the match's primary, included, and excluded
441 visited. This is based on the match's primary, included, and excluded
442 patterns.
442 patterns.
443
443
444 This function is very similar to 'visitdir', and the following mapping
444 This function is very similar to 'visitdir', and the following mapping
445 can be applied:
445 can be applied:
446
446
447 visitdir | visitchildrenlist
447 visitdir | visitchildrenlist
448 ----------+-------------------
448 ----------+-------------------
449 False | set()
449 False | set()
450 'all' | 'all'
450 'all' | 'all'
451 True | 'this' OR non-empty set of subdirs -or files- to visit
451 True | 'this' OR non-empty set of subdirs -or files- to visit
452
452
453 Example:
453 Example:
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 the following values (assuming the implementation of visitchildrenset
455 the following values (assuming the implementation of visitchildrenset
456 is capable of recognizing this; some implementations are not).
456 is capable of recognizing this; some implementations are not).
457
457
458 '' -> {'foo', 'qux'}
458 '' -> {'foo', 'qux'}
459 'baz' -> set()
459 'baz' -> set()
460 'foo' -> {'bar'}
460 'foo' -> {'bar'}
461 # Ideally this would be 'all', but since the prefix nature of matchers
461 # Ideally this would be 'all', but since the prefix nature of matchers
462 # is applied to the entire matcher, we have to downgrade this to
462 # is applied to the entire matcher, we have to downgrade this to
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 # in.
464 # in.
465 'foo/bar' -> 'this'
465 'foo/bar' -> 'this'
466 'qux' -> 'this'
466 'qux' -> 'this'
467
467
468 Important:
468 Important:
469 Most matchers do not know if they're representing files or
469 Most matchers do not know if they're representing files or
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 file or a directory, so visitchildrenset('dir') for most matchers will
471 file or a directory, so visitchildrenset('dir') for most matchers will
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 does), it may return 'this'. Do not rely on the return being a set
473 does), it may return 'this'. Do not rely on the return being a set
474 indicating that there are no files in this dir to investigate (or
474 indicating that there are no files in this dir to investigate (or
475 equivalently that if there are files to investigate in 'dir' that it
475 equivalently that if there are files to investigate in 'dir' that it
476 will always return 'this').
476 will always return 'this').
477 """
477 """
478 return b'this'
478 return b'this'
479
479
480 def always(self):
480 def always(self):
481 """Matcher will match everything and .files() will be empty --
481 """Matcher will match everything and .files() will be empty --
482 optimization might be possible."""
482 optimization might be possible."""
483 return False
483 return False
484
484
485 def isexact(self):
485 def isexact(self):
486 """Matcher will match exactly the list of files in .files() --
486 """Matcher will match exactly the list of files in .files() --
487 optimization might be possible."""
487 optimization might be possible."""
488 return False
488 return False
489
489
490 def prefix(self):
490 def prefix(self):
491 """Matcher will match the paths in .files() recursively --
491 """Matcher will match the paths in .files() recursively --
492 optimization might be possible."""
492 optimization might be possible."""
493 return False
493 return False
494
494
495 def anypats(self):
495 def anypats(self):
496 """None of .always(), .isexact(), and .prefix() is true --
496 """None of .always(), .isexact(), and .prefix() is true --
497 optimizations will be difficult."""
497 optimizations will be difficult."""
498 return not self.always() and not self.isexact() and not self.prefix()
498 return not self.always() and not self.isexact() and not self.prefix()
499
499
500
500
501 class alwaysmatcher(basematcher):
501 class alwaysmatcher(basematcher):
502 '''Matches everything.'''
502 '''Matches everything.'''
503
503
504 def __init__(self, badfn=None):
504 def __init__(self, badfn=None):
505 super(alwaysmatcher, self).__init__(badfn)
505 super(alwaysmatcher, self).__init__(badfn)
506
506
507 def always(self):
507 def always(self):
508 return True
508 return True
509
509
510 def matchfn(self, f):
510 def matchfn(self, f):
511 return True
511 return True
512
512
513 def visitdir(self, dir):
513 def visitdir(self, dir):
514 return b'all'
514 return b'all'
515
515
516 def visitchildrenset(self, dir):
516 def visitchildrenset(self, dir):
517 return b'all'
517 return b'all'
518
518
519 def __repr__(self):
519 def __repr__(self):
520 return r'<alwaysmatcher>'
520 return r'<alwaysmatcher>'
521
521
522
522
523 class nevermatcher(basematcher):
523 class nevermatcher(basematcher):
524 '''Matches nothing.'''
524 '''Matches nothing.'''
525
525
526 def __init__(self, badfn=None):
526 def __init__(self, badfn=None):
527 super(nevermatcher, self).__init__(badfn)
527 super(nevermatcher, self).__init__(badfn)
528
528
529 # It's a little weird to say that the nevermatcher is an exact matcher
529 # It's a little weird to say that the nevermatcher is an exact matcher
530 # or a prefix matcher, but it seems to make sense to let callers take
530 # or a prefix matcher, but it seems to make sense to let callers take
531 # fast paths based on either. There will be no exact matches, nor any
531 # fast paths based on either. There will be no exact matches, nor any
532 # prefixes (files() returns []), so fast paths iterating over them should
532 # prefixes (files() returns []), so fast paths iterating over them should
533 # be efficient (and correct).
533 # be efficient (and correct).
534 def isexact(self):
534 def isexact(self):
535 return True
535 return True
536
536
537 def prefix(self):
537 def prefix(self):
538 return True
538 return True
539
539
540 def visitdir(self, dir):
540 def visitdir(self, dir):
541 return False
541 return False
542
542
543 def visitchildrenset(self, dir):
543 def visitchildrenset(self, dir):
544 return set()
544 return set()
545
545
546 def __repr__(self):
546 def __repr__(self):
547 return r'<nevermatcher>'
547 return r'<nevermatcher>'
548
548
549
549
550 class predicatematcher(basematcher):
550 class predicatematcher(basematcher):
551 """A matcher adapter for a simple boolean function"""
551 """A matcher adapter for a simple boolean function"""
552
552
553 def __init__(self, predfn, predrepr=None, badfn=None):
553 def __init__(self, predfn, predrepr=None, badfn=None):
554 super(predicatematcher, self).__init__(badfn)
554 super(predicatematcher, self).__init__(badfn)
555 self.matchfn = predfn
555 self.matchfn = predfn
556 self._predrepr = predrepr
556 self._predrepr = predrepr
557
557
558 @encoding.strmethod
558 @encoding.strmethod
559 def __repr__(self):
559 def __repr__(self):
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 self.matchfn
561 self.matchfn
562 )
562 )
563 return b'<predicatenmatcher pred=%s>' % s
563 return b'<predicatenmatcher pred=%s>' % s
564
564
565
565
566 def path_or_parents_in_set(path, prefix_set):
566 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
568 l = len(prefix_set)
569 if l == 0:
569 if l == 0:
570 return False
570 return False
571 if path in prefix_set:
571 if path in prefix_set:
572 return True
572 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
576 if l > 5:
577 return any(
577 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
579 )
580
580
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
582 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
583 if b'' in prefix_set:
584 return True
584 return True
585
585
586 sl = ord(b'/')
586 sl = ord(b'/')
587
587
588 # We already checked that path isn't in prefix_set exactly, so
588 # We already checked that path isn't in prefix_set exactly, so
589 # `path[len(pf)] should never raise IndexError.
589 # `path[len(pf)] should never raise IndexError.
590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
591
591
592
592
593 class patternmatcher(basematcher):
593 class patternmatcher(basematcher):
594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
595
595
596 >>> kindpats = [
596 >>> kindpats = [
597 ... (b're', br'.*\.c$', b''),
597 ... (b're', br'.*\.c$', b''),
598 ... (b'path', b'foo/a', b''),
598 ... (b'path', b'foo/a', b''),
599 ... (b'relpath', b'b', b''),
599 ... (b'relpath', b'b', b''),
600 ... (b'glob', b'*.h', b''),
600 ... (b'glob', b'*.h', b''),
601 ... ]
601 ... ]
602 >>> m = patternmatcher(b'foo', kindpats)
602 >>> m = patternmatcher(b'foo', kindpats)
603 >>> m(b'main.c') # matches re:.*\.c$
603 >>> m(b'main.c') # matches re:.*\.c$
604 True
604 True
605 >>> m(b'b.txt')
605 >>> m(b'b.txt')
606 False
606 False
607 >>> m(b'foo/a') # matches path:foo/a
607 >>> m(b'foo/a') # matches path:foo/a
608 True
608 True
609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
610 False
610 False
611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
612 True
612 True
613 >>> m(b'lib.h') # matches glob:*.h
613 >>> m(b'lib.h') # matches glob:*.h
614 True
614 True
615
615
616 >>> m.files()
616 >>> m.files()
617 ['', 'foo/a', 'b', '']
617 ['', 'foo/a', 'b', '']
618 >>> m.exact(b'foo/a')
618 >>> m.exact(b'foo/a')
619 True
619 True
620 >>> m.exact(b'b')
620 >>> m.exact(b'b')
621 True
621 True
622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
623 False
623 False
624 """
624 """
625
625
626 def __init__(self, root, kindpats, badfn=None):
626 def __init__(self, root, kindpats, badfn=None):
627 super(patternmatcher, self).__init__(badfn)
627 super(patternmatcher, self).__init__(badfn)
628
628
629 self._files = _explicitfiles(kindpats)
629 self._files = _explicitfiles(kindpats)
630 self._prefix = _prefix(kindpats)
630 self._prefix = _prefix(kindpats)
631 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
631 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
632
632
633 @propertycache
633 @propertycache
634 def _dirs(self):
634 def _dirs(self):
635 return set(pathutil.dirs(self._fileset))
635 return set(pathutil.dirs(self._fileset))
636
636
637 def visitdir(self, dir):
637 def visitdir(self, dir):
638 if self._prefix and dir in self._fileset:
638 if self._prefix and dir in self._fileset:
639 return b'all'
639 return b'all'
640 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
640 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
641
641
642 def visitchildrenset(self, dir):
642 def visitchildrenset(self, dir):
643 ret = self.visitdir(dir)
643 ret = self.visitdir(dir)
644 if ret is True:
644 if ret is True:
645 return b'this'
645 return b'this'
646 elif not ret:
646 elif not ret:
647 return set()
647 return set()
648 assert ret == b'all'
648 assert ret == b'all'
649 return b'all'
649 return b'all'
650
650
651 def prefix(self):
651 def prefix(self):
652 return self._prefix
652 return self._prefix
653
653
654 @encoding.strmethod
654 @encoding.strmethod
655 def __repr__(self):
655 def __repr__(self):
656 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
656 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
657
657
658
658
659 # This is basically a reimplementation of pathutil.dirs that stores the
659 # This is basically a reimplementation of pathutil.dirs that stores the
660 # children instead of just a count of them, plus a small optional optimization
660 # children instead of just a count of them, plus a small optional optimization
661 # to avoid some directories we don't need.
661 # to avoid some directories we don't need.
662 class _dirchildren(object):
662 class _dirchildren(object):
663 def __init__(self, paths, onlyinclude=None):
663 def __init__(self, paths, onlyinclude=None):
664 self._dirs = {}
664 self._dirs = {}
665 self._onlyinclude = onlyinclude or []
665 self._onlyinclude = onlyinclude or []
666 addpath = self.addpath
666 addpath = self.addpath
667 for f in paths:
667 for f in paths:
668 addpath(f)
668 addpath(f)
669
669
670 def addpath(self, path):
670 def addpath(self, path):
671 if path == b'':
671 if path == b'':
672 return
672 return
673 dirs = self._dirs
673 dirs = self._dirs
674 findsplitdirs = _dirchildren._findsplitdirs
674 findsplitdirs = _dirchildren._findsplitdirs
675 for d, b in findsplitdirs(path):
675 for d, b in findsplitdirs(path):
676 if d not in self._onlyinclude:
676 if d not in self._onlyinclude:
677 continue
677 continue
678 dirs.setdefault(d, set()).add(b)
678 dirs.setdefault(d, set()).add(b)
679
679
680 @staticmethod
680 @staticmethod
681 def _findsplitdirs(path):
681 def _findsplitdirs(path):
682 # yields (dirname, basename) tuples, walking back to the root. This is
682 # yields (dirname, basename) tuples, walking back to the root. This is
683 # very similar to pathutil.finddirs, except:
683 # very similar to pathutil.finddirs, except:
684 # - produces a (dirname, basename) tuple, not just 'dirname'
684 # - produces a (dirname, basename) tuple, not just 'dirname'
685 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
685 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
686 # slash.
686 # slash.
687 oldpos = len(path)
687 oldpos = len(path)
688 pos = path.rfind(b'/')
688 pos = path.rfind(b'/')
689 while pos != -1:
689 while pos != -1:
690 yield path[:pos], path[pos + 1 : oldpos]
690 yield path[:pos], path[pos + 1 : oldpos]
691 oldpos = pos
691 oldpos = pos
692 pos = path.rfind(b'/', 0, pos)
692 pos = path.rfind(b'/', 0, pos)
693 yield b'', path[:oldpos]
693 yield b'', path[:oldpos]
694
694
695 def get(self, path):
695 def get(self, path):
696 return self._dirs.get(path, set())
696 return self._dirs.get(path, set())
697
697
698
698
699 class includematcher(basematcher):
699 class includematcher(basematcher):
700 def __init__(self, root, kindpats, badfn=None):
700 def __init__(self, root, kindpats, badfn=None):
701 super(includematcher, self).__init__(badfn)
701 super(includematcher, self).__init__(badfn)
702 if rustmod is not None:
702 if rustmod is not None:
703 # We need to pass the patterns to Rust because they can contain
703 # We need to pass the patterns to Rust because they can contain
704 # patterns from the user interface
704 # patterns from the user interface
705 self._kindpats = kindpats
705 self._kindpats = kindpats
706 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
706 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
707 self._prefix = _prefix(kindpats)
707 self._prefix = _prefix(kindpats)
708 roots, dirs, parents = _rootsdirsandparents(kindpats)
708 roots, dirs, parents = _rootsdirsandparents(kindpats)
709 # roots are directories which are recursively included.
709 # roots are directories which are recursively included.
710 self._roots = set(roots)
710 self._roots = set(roots)
711 # dirs are directories which are non-recursively included.
711 # dirs are directories which are non-recursively included.
712 self._dirs = set(dirs)
712 self._dirs = set(dirs)
713 # parents are directories which are non-recursively included because
713 # parents are directories which are non-recursively included because
714 # they are needed to get to items in _dirs or _roots.
714 # they are needed to get to items in _dirs or _roots.
715 self._parents = parents
715 self._parents = parents
716
716
717 def visitdir(self, dir):
717 def visitdir(self, dir):
718 if self._prefix and dir in self._roots:
718 if self._prefix and dir in self._roots:
719 return b'all'
719 return b'all'
720 return (
720 return (
721 dir in self._dirs
721 dir in self._dirs
722 or dir in self._parents
722 or dir in self._parents
723 or path_or_parents_in_set(dir, self._roots)
723 or path_or_parents_in_set(dir, self._roots)
724 )
724 )
725
725
726 @propertycache
726 @propertycache
727 def _allparentschildren(self):
727 def _allparentschildren(self):
728 # It may seem odd that we add dirs, roots, and parents, and then
728 # It may seem odd that we add dirs, roots, and parents, and then
729 # restrict to only parents. This is to catch the case of:
729 # restrict to only parents. This is to catch the case of:
730 # dirs = ['foo/bar']
730 # dirs = ['foo/bar']
731 # parents = ['foo']
731 # parents = ['foo']
732 # if we asked for the children of 'foo', but had only added
732 # if we asked for the children of 'foo', but had only added
733 # self._parents, we wouldn't be able to respond ['bar'].
733 # self._parents, we wouldn't be able to respond ['bar'].
734 return _dirchildren(
734 return _dirchildren(
735 itertools.chain(self._dirs, self._roots, self._parents),
735 itertools.chain(self._dirs, self._roots, self._parents),
736 onlyinclude=self._parents,
736 onlyinclude=self._parents,
737 )
737 )
738
738
739 def visitchildrenset(self, dir):
739 def visitchildrenset(self, dir):
740 if self._prefix and dir in self._roots:
740 if self._prefix and dir in self._roots:
741 return b'all'
741 return b'all'
742 # Note: this does *not* include the 'dir in self._parents' case from
742 # Note: this does *not* include the 'dir in self._parents' case from
743 # visitdir, that's handled below.
743 # visitdir, that's handled below.
744 if (
744 if (
745 b'' in self._roots
745 b'' in self._roots
746 or dir in self._dirs
746 or dir in self._dirs
747 or path_or_parents_in_set(dir, self._roots)
747 or path_or_parents_in_set(dir, self._roots)
748 ):
748 ):
749 return b'this'
749 return b'this'
750
750
751 if dir in self._parents:
751 if dir in self._parents:
752 return self._allparentschildren.get(dir) or set()
752 return self._allparentschildren.get(dir) or set()
753 return set()
753 return set()
754
754
755 @encoding.strmethod
755 @encoding.strmethod
756 def __repr__(self):
756 def __repr__(self):
757 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
757 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
758
758
759
759
760 class exactmatcher(basematcher):
760 class exactmatcher(basematcher):
761 r"""Matches the input files exactly. They are interpreted as paths, not
761 r"""Matches the input files exactly. They are interpreted as paths, not
762 patterns (so no kind-prefixes).
762 patterns (so no kind-prefixes).
763
763
764 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
764 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
765 >>> m(b'a.txt')
765 >>> m(b'a.txt')
766 True
766 True
767 >>> m(b'b.txt')
767 >>> m(b'b.txt')
768 False
768 False
769
769
770 Input files that would be matched are exactly those returned by .files()
770 Input files that would be matched are exactly those returned by .files()
771 >>> m.files()
771 >>> m.files()
772 ['a.txt', 're:.*\\.c$']
772 ['a.txt', 're:.*\\.c$']
773
773
774 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
774 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
775 >>> m(b'main.c')
775 >>> m(b'main.c')
776 False
776 False
777 >>> m(br're:.*\.c$')
777 >>> m(br're:.*\.c$')
778 True
778 True
779 """
779 """
780
780
781 def __init__(self, files, badfn=None):
781 def __init__(self, files, badfn=None):
782 super(exactmatcher, self).__init__(badfn)
782 super(exactmatcher, self).__init__(badfn)
783
783
784 if isinstance(files, list):
784 if isinstance(files, list):
785 self._files = files
785 self._files = files
786 else:
786 else:
787 self._files = list(files)
787 self._files = list(files)
788
788
789 matchfn = basematcher.exact
789 matchfn = basematcher.exact
790
790
791 @propertycache
791 @propertycache
792 def _dirs(self):
792 def _dirs(self):
793 return set(pathutil.dirs(self._fileset))
793 return set(pathutil.dirs(self._fileset))
794
794
795 def visitdir(self, dir):
795 def visitdir(self, dir):
796 return dir in self._dirs
796 return dir in self._dirs
797
797
798 @propertycache
798 @propertycache
799 def _visitchildrenset_candidates(self):
799 def _visitchildrenset_candidates(self):
800 """A memoized set of candidates for visitchildrenset."""
800 """A memoized set of candidates for visitchildrenset."""
801 return self._fileset | self._dirs - {b''}
801 return self._fileset | self._dirs - {b''}
802
802
803 @propertycache
803 @propertycache
804 def _sorted_visitchildrenset_candidates(self):
804 def _sorted_visitchildrenset_candidates(self):
805 """A memoized sorted list of candidates for visitchildrenset."""
805 """A memoized sorted list of candidates for visitchildrenset."""
806 return sorted(self._visitchildrenset_candidates)
806 return sorted(self._visitchildrenset_candidates)
807
807
808 def visitchildrenset(self, dir):
808 def visitchildrenset(self, dir):
809 if not self._fileset or dir not in self._dirs:
809 if not self._fileset or dir not in self._dirs:
810 return set()
810 return set()
811
811
812 if dir == b'':
812 if dir == b'':
813 candidates = self._visitchildrenset_candidates
813 candidates = self._visitchildrenset_candidates
814 else:
814 else:
815 candidates = self._sorted_visitchildrenset_candidates
815 candidates = self._sorted_visitchildrenset_candidates
816 d = dir + b'/'
816 d = dir + b'/'
817 # Use bisect to find the first element potentially starting with d
817 # Use bisect to find the first element potentially starting with d
818 # (i.e. >= d). This should always find at least one element (we'll
818 # (i.e. >= d). This should always find at least one element (we'll
819 # assert later if this is not the case).
819 # assert later if this is not the case).
820 first = bisect.bisect_left(candidates, d)
820 first = bisect.bisect_left(candidates, d)
821 # We need a representation of the first element that is > d that
821 # We need a representation of the first element that is > d that
822 # does not start with d, so since we added a `/` on the end of dir,
822 # does not start with d, so since we added a `/` on the end of dir,
823 # we'll add whatever comes after slash (we could probably assume
823 # we'll add whatever comes after slash (we could probably assume
824 # that `0` is after `/`, but let's not) to the end of dir instead.
824 # that `0` is after `/`, but let's not) to the end of dir instead.
825 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
825 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
826 # Use bisect to find the first element >= d_next
826 # Use bisect to find the first element >= d_next
827 last = bisect.bisect_left(candidates, dnext, lo=first)
827 last = bisect.bisect_left(candidates, dnext, lo=first)
828 dlen = len(d)
828 dlen = len(d)
829 candidates = {c[dlen:] for c in candidates[first:last]}
829 candidates = {c[dlen:] for c in candidates[first:last]}
830 # self._dirs includes all of the directories, recursively, so if
830 # self._dirs includes all of the directories, recursively, so if
831 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
831 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
832 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
832 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
833 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
833 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
834 # immediate subdir will be in there without a slash.
834 # immediate subdir will be in there without a slash.
835 ret = {c for c in candidates if b'/' not in c}
835 ret = {c for c in candidates if b'/' not in c}
836 # We really do not expect ret to be empty, since that would imply that
836 # We really do not expect ret to be empty, since that would imply that
837 # there's something in _dirs that didn't have a file in _fileset.
837 # there's something in _dirs that didn't have a file in _fileset.
838 assert ret
838 assert ret
839 return ret
839 return ret
840
840
841 def isexact(self):
841 def isexact(self):
842 return True
842 return True
843
843
844 @encoding.strmethod
844 @encoding.strmethod
845 def __repr__(self):
845 def __repr__(self):
846 return b'<exactmatcher files=%r>' % self._files
846 return b'<exactmatcher files=%r>' % self._files
847
847
848
848
849 class differencematcher(basematcher):
849 class differencematcher(basematcher):
850 """Composes two matchers by matching if the first matches and the second
850 """Composes two matchers by matching if the first matches and the second
851 does not.
851 does not.
852
852
853 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
853 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
854 """
854 """
855
855
856 def __init__(self, m1, m2):
856 def __init__(self, m1, m2):
857 super(differencematcher, self).__init__()
857 super(differencematcher, self).__init__()
858 self._m1 = m1
858 self._m1 = m1
859 self._m2 = m2
859 self._m2 = m2
860 self.bad = m1.bad
860 self.bad = m1.bad
861 self.traversedir = m1.traversedir
861 self.traversedir = m1.traversedir
862
862
863 def matchfn(self, f):
863 def matchfn(self, f):
864 return self._m1(f) and not self._m2(f)
864 return self._m1(f) and not self._m2(f)
865
865
866 @propertycache
866 @propertycache
867 def _files(self):
867 def _files(self):
868 if self.isexact():
868 if self.isexact():
869 return [f for f in self._m1.files() if self(f)]
869 return [f for f in self._m1.files() if self(f)]
870 # If m1 is not an exact matcher, we can't easily figure out the set of
870 # If m1 is not an exact matcher, we can't easily figure out the set of
871 # files, because its files() are not always files. For example, if
871 # files, because its files() are not always files. For example, if
872 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
872 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
873 # want to remove "dir" from the set even though it would match m2,
873 # want to remove "dir" from the set even though it would match m2,
874 # because the "dir" in m1 may not be a file.
874 # because the "dir" in m1 may not be a file.
875 return self._m1.files()
875 return self._m1.files()
876
876
877 def visitdir(self, dir):
877 def visitdir(self, dir):
878 if self._m2.visitdir(dir) == b'all':
878 if self._m2.visitdir(dir) == b'all':
879 return False
879 return False
880 elif not self._m2.visitdir(dir):
880 elif not self._m2.visitdir(dir):
881 # m2 does not match dir, we can return 'all' here if possible
881 # m2 does not match dir, we can return 'all' here if possible
882 return self._m1.visitdir(dir)
882 return self._m1.visitdir(dir)
883 return bool(self._m1.visitdir(dir))
883 return bool(self._m1.visitdir(dir))
884
884
885 def visitchildrenset(self, dir):
885 def visitchildrenset(self, dir):
886 m2_set = self._m2.visitchildrenset(dir)
886 m2_set = self._m2.visitchildrenset(dir)
887 if m2_set == b'all':
887 if m2_set == b'all':
888 return set()
888 return set()
889 m1_set = self._m1.visitchildrenset(dir)
889 m1_set = self._m1.visitchildrenset(dir)
890 # Possible values for m1: 'all', 'this', set(...), set()
890 # Possible values for m1: 'all', 'this', set(...), set()
891 # Possible values for m2: 'this', set(...), set()
891 # Possible values for m2: 'this', set(...), set()
892 # If m2 has nothing under here that we care about, return m1, even if
892 # If m2 has nothing under here that we care about, return m1, even if
893 # it's 'all'. This is a change in behavior from visitdir, which would
893 # it's 'all'. This is a change in behavior from visitdir, which would
894 # return True, not 'all', for some reason.
894 # return True, not 'all', for some reason.
895 if not m2_set:
895 if not m2_set:
896 return m1_set
896 return m1_set
897 if m1_set in [b'all', b'this']:
897 if m1_set in [b'all', b'this']:
898 # Never return 'all' here if m2_set is any kind of non-empty (either
898 # Never return 'all' here if m2_set is any kind of non-empty (either
899 # 'this' or set(foo)), since m2 might return set() for a
899 # 'this' or set(foo)), since m2 might return set() for a
900 # subdirectory.
900 # subdirectory.
901 return b'this'
901 return b'this'
902 # Possible values for m1: set(...), set()
902 # Possible values for m1: set(...), set()
903 # Possible values for m2: 'this', set(...)
903 # Possible values for m2: 'this', set(...)
904 # We ignore m2's set results. They're possibly incorrect:
904 # We ignore m2's set results. They're possibly incorrect:
905 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
905 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
906 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
906 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
907 # return set(), which is *not* correct, we still need to visit 'dir'!
907 # return set(), which is *not* correct, we still need to visit 'dir'!
908 return m1_set
908 return m1_set
909
909
910 def isexact(self):
910 def isexact(self):
911 return self._m1.isexact()
911 return self._m1.isexact()
912
912
913 @encoding.strmethod
913 @encoding.strmethod
914 def __repr__(self):
914 def __repr__(self):
915 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
915 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
916
916
917
917
918 def intersectmatchers(m1, m2):
918 def intersectmatchers(m1, m2):
919 """Composes two matchers by matching if both of them match.
919 """Composes two matchers by matching if both of them match.
920
920
921 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
921 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
922 """
922 """
923 if m1 is None or m2 is None:
923 if m1 is None or m2 is None:
924 return m1 or m2
924 return m1 or m2
925 if m1.always():
925 if m1.always():
926 m = copy.copy(m2)
926 m = copy.copy(m2)
927 # TODO: Consider encapsulating these things in a class so there's only
927 # TODO: Consider encapsulating these things in a class so there's only
928 # one thing to copy from m1.
928 # one thing to copy from m1.
929 m.bad = m1.bad
929 m.bad = m1.bad
930 m.traversedir = m1.traversedir
930 m.traversedir = m1.traversedir
931 return m
931 return m
932 if m2.always():
932 if m2.always():
933 m = copy.copy(m1)
933 m = copy.copy(m1)
934 return m
934 return m
935 return intersectionmatcher(m1, m2)
935 return intersectionmatcher(m1, m2)
936
936
937
937
938 class intersectionmatcher(basematcher):
938 class intersectionmatcher(basematcher):
939 def __init__(self, m1, m2):
939 def __init__(self, m1, m2):
940 super(intersectionmatcher, self).__init__()
940 super(intersectionmatcher, self).__init__()
941 self._m1 = m1
941 self._m1 = m1
942 self._m2 = m2
942 self._m2 = m2
943 self.bad = m1.bad
943 self.bad = m1.bad
944 self.traversedir = m1.traversedir
944 self.traversedir = m1.traversedir
945
945
946 @propertycache
946 @propertycache
947 def _files(self):
947 def _files(self):
948 if self.isexact():
948 if self.isexact():
949 m1, m2 = self._m1, self._m2
949 m1, m2 = self._m1, self._m2
950 if not m1.isexact():
950 if not m1.isexact():
951 m1, m2 = m2, m1
951 m1, m2 = m2, m1
952 return [f for f in m1.files() if m2(f)]
952 return [f for f in m1.files() if m2(f)]
953 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
953 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
954 # the set of files, because their files() are not always files. For
954 # the set of files, because their files() are not always files. For
955 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
955 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
956 # "path:dir2", we don't want to remove "dir2" from the set.
956 # "path:dir2", we don't want to remove "dir2" from the set.
957 return self._m1.files() + self._m2.files()
957 return self._m1.files() + self._m2.files()
958
958
959 def matchfn(self, f):
959 def matchfn(self, f):
960 return self._m1(f) and self._m2(f)
960 return self._m1(f) and self._m2(f)
961
961
962 def visitdir(self, dir):
962 def visitdir(self, dir):
963 visit1 = self._m1.visitdir(dir)
963 visit1 = self._m1.visitdir(dir)
964 if visit1 == b'all':
964 if visit1 == b'all':
965 return self._m2.visitdir(dir)
965 return self._m2.visitdir(dir)
966 # bool() because visit1=True + visit2='all' should not be 'all'
966 # bool() because visit1=True + visit2='all' should not be 'all'
967 return bool(visit1 and self._m2.visitdir(dir))
967 return bool(visit1 and self._m2.visitdir(dir))
968
968
969 def visitchildrenset(self, dir):
969 def visitchildrenset(self, dir):
970 m1_set = self._m1.visitchildrenset(dir)
970 m1_set = self._m1.visitchildrenset(dir)
971 if not m1_set:
971 if not m1_set:
972 return set()
972 return set()
973 m2_set = self._m2.visitchildrenset(dir)
973 m2_set = self._m2.visitchildrenset(dir)
974 if not m2_set:
974 if not m2_set:
975 return set()
975 return set()
976
976
977 if m1_set == b'all':
977 if m1_set == b'all':
978 return m2_set
978 return m2_set
979 elif m2_set == b'all':
979 elif m2_set == b'all':
980 return m1_set
980 return m1_set
981
981
982 if m1_set == b'this' or m2_set == b'this':
982 if m1_set == b'this' or m2_set == b'this':
983 return b'this'
983 return b'this'
984
984
985 assert isinstance(m1_set, set) and isinstance(m2_set, set)
985 assert isinstance(m1_set, set) and isinstance(m2_set, set)
986 return m1_set.intersection(m2_set)
986 return m1_set.intersection(m2_set)
987
987
988 def always(self):
988 def always(self):
989 return self._m1.always() and self._m2.always()
989 return self._m1.always() and self._m2.always()
990
990
991 def isexact(self):
991 def isexact(self):
992 return self._m1.isexact() or self._m2.isexact()
992 return self._m1.isexact() or self._m2.isexact()
993
993
994 @encoding.strmethod
994 @encoding.strmethod
995 def __repr__(self):
995 def __repr__(self):
996 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
996 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
997
997
998
998
999 class subdirmatcher(basematcher):
999 class subdirmatcher(basematcher):
1000 """Adapt a matcher to work on a subdirectory only.
1000 """Adapt a matcher to work on a subdirectory only.
1001
1001
1002 The paths are remapped to remove/insert the path as needed:
1002 The paths are remapped to remove/insert the path as needed:
1003
1003
1004 >>> from . import pycompat
1004 >>> from . import pycompat
1005 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1005 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1006 >>> m2 = subdirmatcher(b'sub', m1)
1006 >>> m2 = subdirmatcher(b'sub', m1)
1007 >>> m2(b'a.txt')
1007 >>> m2(b'a.txt')
1008 False
1008 False
1009 >>> m2(b'b.txt')
1009 >>> m2(b'b.txt')
1010 True
1010 True
1011 >>> m2.matchfn(b'a.txt')
1011 >>> m2.matchfn(b'a.txt')
1012 False
1012 False
1013 >>> m2.matchfn(b'b.txt')
1013 >>> m2.matchfn(b'b.txt')
1014 True
1014 True
1015 >>> m2.files()
1015 >>> m2.files()
1016 ['b.txt']
1016 ['b.txt']
1017 >>> m2.exact(b'b.txt')
1017 >>> m2.exact(b'b.txt')
1018 True
1018 True
1019 >>> def bad(f, msg):
1019 >>> def bad(f, msg):
1020 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1020 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1021 >>> m1.bad = bad
1021 >>> m1.bad = bad
1022 >>> m2.bad(b'x.txt', b'No such file')
1022 >>> m2.bad(b'x.txt', b'No such file')
1023 sub/x.txt: No such file
1023 sub/x.txt: No such file
1024 """
1024 """
1025
1025
1026 def __init__(self, path, matcher):
1026 def __init__(self, path, matcher):
1027 super(subdirmatcher, self).__init__()
1027 super(subdirmatcher, self).__init__()
1028 self._path = path
1028 self._path = path
1029 self._matcher = matcher
1029 self._matcher = matcher
1030 self._always = matcher.always()
1030 self._always = matcher.always()
1031
1031
1032 self._files = [
1032 self._files = [
1033 f[len(path) + 1 :]
1033 f[len(path) + 1 :]
1034 for f in matcher._files
1034 for f in matcher._files
1035 if f.startswith(path + b"/")
1035 if f.startswith(path + b"/")
1036 ]
1036 ]
1037
1037
1038 # If the parent repo had a path to this subrepo and the matcher is
1038 # If the parent repo had a path to this subrepo and the matcher is
1039 # a prefix matcher, this submatcher always matches.
1039 # a prefix matcher, this submatcher always matches.
1040 if matcher.prefix():
1040 if matcher.prefix():
1041 self._always = any(f == path for f in matcher._files)
1041 self._always = any(f == path for f in matcher._files)
1042
1042
1043 def bad(self, f, msg):
1043 def bad(self, f, msg):
1044 self._matcher.bad(self._path + b"/" + f, msg)
1044 self._matcher.bad(self._path + b"/" + f, msg)
1045
1045
1046 def matchfn(self, f):
1046 def matchfn(self, f):
1047 # Some information is lost in the superclass's constructor, so we
1047 # Some information is lost in the superclass's constructor, so we
1048 # can not accurately create the matching function for the subdirectory
1048 # can not accurately create the matching function for the subdirectory
1049 # from the inputs. Instead, we override matchfn() and visitdir() to
1049 # from the inputs. Instead, we override matchfn() and visitdir() to
1050 # call the original matcher with the subdirectory path prepended.
1050 # call the original matcher with the subdirectory path prepended.
1051 return self._matcher.matchfn(self._path + b"/" + f)
1051 return self._matcher.matchfn(self._path + b"/" + f)
1052
1052
1053 def visitdir(self, dir):
1053 def visitdir(self, dir):
1054 if dir == b'':
1054 if dir == b'':
1055 dir = self._path
1055 dir = self._path
1056 else:
1056 else:
1057 dir = self._path + b"/" + dir
1057 dir = self._path + b"/" + dir
1058 return self._matcher.visitdir(dir)
1058 return self._matcher.visitdir(dir)
1059
1059
1060 def visitchildrenset(self, dir):
1060 def visitchildrenset(self, dir):
1061 if dir == b'':
1061 if dir == b'':
1062 dir = self._path
1062 dir = self._path
1063 else:
1063 else:
1064 dir = self._path + b"/" + dir
1064 dir = self._path + b"/" + dir
1065 return self._matcher.visitchildrenset(dir)
1065 return self._matcher.visitchildrenset(dir)
1066
1066
1067 def always(self):
1067 def always(self):
1068 return self._always
1068 return self._always
1069
1069
1070 def prefix(self):
1070 def prefix(self):
1071 return self._matcher.prefix() and not self._always
1071 return self._matcher.prefix() and not self._always
1072
1072
1073 @encoding.strmethod
1073 @encoding.strmethod
1074 def __repr__(self):
1074 def __repr__(self):
1075 return b'<subdirmatcher path=%r, matcher=%r>' % (
1075 return b'<subdirmatcher path=%r, matcher=%r>' % (
1076 self._path,
1076 self._path,
1077 self._matcher,
1077 self._matcher,
1078 )
1078 )
1079
1079
1080
1080
1081 class prefixdirmatcher(basematcher):
1081 class prefixdirmatcher(basematcher):
1082 """Adapt a matcher to work on a parent directory.
1082 """Adapt a matcher to work on a parent directory.
1083
1083
1084 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1084 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1085
1085
1086 The prefix path should usually be the relative path from the root of
1086 The prefix path should usually be the relative path from the root of
1087 this matcher to the root of the wrapped matcher.
1087 this matcher to the root of the wrapped matcher.
1088
1088
1089 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1089 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1090 >>> m2 = prefixdirmatcher(b'd/e', m1)
1090 >>> m2 = prefixdirmatcher(b'd/e', m1)
1091 >>> m2(b'a.txt')
1091 >>> m2(b'a.txt')
1092 False
1092 False
1093 >>> m2(b'd/e/a.txt')
1093 >>> m2(b'd/e/a.txt')
1094 True
1094 True
1095 >>> m2(b'd/e/b.txt')
1095 >>> m2(b'd/e/b.txt')
1096 False
1096 False
1097 >>> m2.files()
1097 >>> m2.files()
1098 ['d/e/a.txt', 'd/e/f/b.txt']
1098 ['d/e/a.txt', 'd/e/f/b.txt']
1099 >>> m2.exact(b'd/e/a.txt')
1099 >>> m2.exact(b'd/e/a.txt')
1100 True
1100 True
1101 >>> m2.visitdir(b'd')
1101 >>> m2.visitdir(b'd')
1102 True
1102 True
1103 >>> m2.visitdir(b'd/e')
1103 >>> m2.visitdir(b'd/e')
1104 True
1104 True
1105 >>> m2.visitdir(b'd/e/f')
1105 >>> m2.visitdir(b'd/e/f')
1106 True
1106 True
1107 >>> m2.visitdir(b'd/e/g')
1107 >>> m2.visitdir(b'd/e/g')
1108 False
1108 False
1109 >>> m2.visitdir(b'd/ef')
1109 >>> m2.visitdir(b'd/ef')
1110 False
1110 False
1111 """
1111 """
1112
1112
1113 def __init__(self, path, matcher, badfn=None):
1113 def __init__(self, path, matcher, badfn=None):
1114 super(prefixdirmatcher, self).__init__(badfn)
1114 super(prefixdirmatcher, self).__init__(badfn)
1115 if not path:
1115 if not path:
1116 raise error.ProgrammingError(b'prefix path must not be empty')
1116 raise error.ProgrammingError(b'prefix path must not be empty')
1117 self._path = path
1117 self._path = path
1118 self._pathprefix = path + b'/'
1118 self._pathprefix = path + b'/'
1119 self._matcher = matcher
1119 self._matcher = matcher
1120
1120
1121 @propertycache
1121 @propertycache
1122 def _files(self):
1122 def _files(self):
1123 return [self._pathprefix + f for f in self._matcher._files]
1123 return [self._pathprefix + f for f in self._matcher._files]
1124
1124
1125 def matchfn(self, f):
1125 def matchfn(self, f):
1126 if not f.startswith(self._pathprefix):
1126 if not f.startswith(self._pathprefix):
1127 return False
1127 return False
1128 return self._matcher.matchfn(f[len(self._pathprefix) :])
1128 return self._matcher.matchfn(f[len(self._pathprefix) :])
1129
1129
1130 @propertycache
1130 @propertycache
1131 def _pathdirs(self):
1131 def _pathdirs(self):
1132 return set(pathutil.finddirs(self._path))
1132 return set(pathutil.finddirs(self._path))
1133
1133
1134 def visitdir(self, dir):
1134 def visitdir(self, dir):
1135 if dir == self._path:
1135 if dir == self._path:
1136 return self._matcher.visitdir(b'')
1136 return self._matcher.visitdir(b'')
1137 if dir.startswith(self._pathprefix):
1137 if dir.startswith(self._pathprefix):
1138 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1138 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1139 return dir in self._pathdirs
1139 return dir in self._pathdirs
1140
1140
1141 def visitchildrenset(self, dir):
1141 def visitchildrenset(self, dir):
1142 if dir == self._path:
1142 if dir == self._path:
1143 return self._matcher.visitchildrenset(b'')
1143 return self._matcher.visitchildrenset(b'')
1144 if dir.startswith(self._pathprefix):
1144 if dir.startswith(self._pathprefix):
1145 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1145 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1146 if dir in self._pathdirs:
1146 if dir in self._pathdirs:
1147 return b'this'
1147 return b'this'
1148 return set()
1148 return set()
1149
1149
1150 def isexact(self):
1150 def isexact(self):
1151 return self._matcher.isexact()
1151 return self._matcher.isexact()
1152
1152
1153 def prefix(self):
1153 def prefix(self):
1154 return self._matcher.prefix()
1154 return self._matcher.prefix()
1155
1155
1156 @encoding.strmethod
1156 @encoding.strmethod
1157 def __repr__(self):
1157 def __repr__(self):
1158 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1158 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1159 pycompat.bytestr(self._path),
1159 pycompat.bytestr(self._path),
1160 self._matcher,
1160 self._matcher,
1161 )
1161 )
1162
1162
1163
1163
1164 class unionmatcher(basematcher):
1164 class unionmatcher(basematcher):
1165 """A matcher that is the union of several matchers.
1165 """A matcher that is the union of several matchers.
1166
1166
1167 The non-matching-attributes (bad, traversedir) are taken from the first
1167 The non-matching-attributes (bad, traversedir) are taken from the first
1168 matcher.
1168 matcher.
1169 """
1169 """
1170
1170
1171 def __init__(self, matchers):
1171 def __init__(self, matchers):
1172 m1 = matchers[0]
1172 m1 = matchers[0]
1173 super(unionmatcher, self).__init__()
1173 super(unionmatcher, self).__init__()
1174 self.traversedir = m1.traversedir
1174 self.traversedir = m1.traversedir
1175 self._matchers = matchers
1175 self._matchers = matchers
1176
1176
1177 def matchfn(self, f):
1177 def matchfn(self, f):
1178 for match in self._matchers:
1178 for match in self._matchers:
1179 if match(f):
1179 if match(f):
1180 return True
1180 return True
1181 return False
1181 return False
1182
1182
1183 def visitdir(self, dir):
1183 def visitdir(self, dir):
1184 r = False
1184 r = False
1185 for m in self._matchers:
1185 for m in self._matchers:
1186 v = m.visitdir(dir)
1186 v = m.visitdir(dir)
1187 if v == b'all':
1187 if v == b'all':
1188 return v
1188 return v
1189 r |= v
1189 r |= v
1190 return r
1190 return r
1191
1191
1192 def visitchildrenset(self, dir):
1192 def visitchildrenset(self, dir):
1193 r = set()
1193 r = set()
1194 this = False
1194 this = False
1195 for m in self._matchers:
1195 for m in self._matchers:
1196 v = m.visitchildrenset(dir)
1196 v = m.visitchildrenset(dir)
1197 if not v:
1197 if not v:
1198 continue
1198 continue
1199 if v == b'all':
1199 if v == b'all':
1200 return v
1200 return v
1201 if this or v == b'this':
1201 if this or v == b'this':
1202 this = True
1202 this = True
1203 # don't break, we might have an 'all' in here.
1203 # don't break, we might have an 'all' in here.
1204 continue
1204 continue
1205 assert isinstance(v, set)
1205 assert isinstance(v, set)
1206 r = r.union(v)
1206 r = r.union(v)
1207 if this:
1207 if this:
1208 return b'this'
1208 return b'this'
1209 return r
1209 return r
1210
1210
1211 @encoding.strmethod
1211 @encoding.strmethod
1212 def __repr__(self):
1212 def __repr__(self):
1213 return b'<unionmatcher matchers=%r>' % self._matchers
1213 return b'<unionmatcher matchers=%r>' % self._matchers
1214
1214
1215
1215
1216 def patkind(pattern, default=None):
1216 def patkind(pattern, default=None):
1217 r"""If pattern is 'kind:pat' with a known kind, return kind.
1217 r"""If pattern is 'kind:pat' with a known kind, return kind.
1218
1218
1219 >>> patkind(br're:.*\.c$')
1219 >>> patkind(br're:.*\.c$')
1220 're'
1220 're'
1221 >>> patkind(b'glob:*.c')
1221 >>> patkind(b'glob:*.c')
1222 'glob'
1222 'glob'
1223 >>> patkind(b'relpath:test.py')
1223 >>> patkind(b'relpath:test.py')
1224 'relpath'
1224 'relpath'
1225 >>> patkind(b'main.py')
1225 >>> patkind(b'main.py')
1226 >>> patkind(b'main.py', default=b're')
1226 >>> patkind(b'main.py', default=b're')
1227 're'
1227 're'
1228 """
1228 """
1229 return _patsplit(pattern, default)[0]
1229 return _patsplit(pattern, default)[0]
1230
1230
1231
1231
1232 def _patsplit(pattern, default):
1232 def _patsplit(pattern, default):
1233 """Split a string into the optional pattern kind prefix and the actual
1233 """Split a string into the optional pattern kind prefix and the actual
1234 pattern."""
1234 pattern."""
1235 if b':' in pattern:
1235 if b':' in pattern:
1236 kind, pat = pattern.split(b':', 1)
1236 kind, pat = pattern.split(b':', 1)
1237 if kind in allpatternkinds:
1237 if kind in allpatternkinds:
1238 return kind, pat
1238 return kind, pat
1239 return default, pattern
1239 return default, pattern
1240
1240
1241
1241
1242 def _globre(pat):
1242 def _globre(pat):
1243 r"""Convert an extended glob string to a regexp string.
1243 r"""Convert an extended glob string to a regexp string.
1244
1244
1245 >>> from . import pycompat
1245 >>> from . import pycompat
1246 >>> def bprint(s):
1246 >>> def bprint(s):
1247 ... print(pycompat.sysstr(s))
1247 ... print(pycompat.sysstr(s))
1248 >>> bprint(_globre(br'?'))
1248 >>> bprint(_globre(br'?'))
1249 .
1249 .
1250 >>> bprint(_globre(br'*'))
1250 >>> bprint(_globre(br'*'))
1251 [^/]*
1251 [^/]*
1252 >>> bprint(_globre(br'**'))
1252 >>> bprint(_globre(br'**'))
1253 .*
1253 .*
1254 >>> bprint(_globre(br'**/a'))
1254 >>> bprint(_globre(br'**/a'))
1255 (?:.*/)?a
1255 (?:.*/)?a
1256 >>> bprint(_globre(br'a/**/b'))
1256 >>> bprint(_globre(br'a/**/b'))
1257 a/(?:.*/)?b
1257 a/(?:.*/)?b
1258 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1258 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1259 [a*?!^][\^b][^c]
1259 [a*?!^][\^b][^c]
1260 >>> bprint(_globre(br'{a,b}'))
1260 >>> bprint(_globre(br'{a,b}'))
1261 (?:a|b)
1261 (?:a|b)
1262 >>> bprint(_globre(br'.\*\?'))
1262 >>> bprint(_globre(br'.\*\?'))
1263 \.\*\?
1263 \.\*\?
1264 """
1264 """
1265 i, n = 0, len(pat)
1265 i, n = 0, len(pat)
1266 res = b''
1266 res = b''
1267 group = 0
1267 group = 0
1268 escape = util.stringutil.regexbytesescapemap.get
1268 escape = util.stringutil.regexbytesescapemap.get
1269
1269
1270 def peek():
1270 def peek():
1271 return i < n and pat[i : i + 1]
1271 return i < n and pat[i : i + 1]
1272
1272
1273 while i < n:
1273 while i < n:
1274 c = pat[i : i + 1]
1274 c = pat[i : i + 1]
1275 i += 1
1275 i += 1
1276 if c not in b'*?[{},\\':
1276 if c not in b'*?[{},\\':
1277 res += escape(c, c)
1277 res += escape(c, c)
1278 elif c == b'*':
1278 elif c == b'*':
1279 if peek() == b'*':
1279 if peek() == b'*':
1280 i += 1
1280 i += 1
1281 if peek() == b'/':
1281 if peek() == b'/':
1282 i += 1
1282 i += 1
1283 res += b'(?:.*/)?'
1283 res += b'(?:.*/)?'
1284 else:
1284 else:
1285 res += b'.*'
1285 res += b'.*'
1286 else:
1286 else:
1287 res += b'[^/]*'
1287 res += b'[^/]*'
1288 elif c == b'?':
1288 elif c == b'?':
1289 res += b'.'
1289 res += b'.'
1290 elif c == b'[':
1290 elif c == b'[':
1291 j = i
1291 j = i
1292 if j < n and pat[j : j + 1] in b'!]':
1292 if j < n and pat[j : j + 1] in b'!]':
1293 j += 1
1293 j += 1
1294 while j < n and pat[j : j + 1] != b']':
1294 while j < n and pat[j : j + 1] != b']':
1295 j += 1
1295 j += 1
1296 if j >= n:
1296 if j >= n:
1297 res += b'\\['
1297 res += b'\\['
1298 else:
1298 else:
1299 stuff = pat[i:j].replace(b'\\', b'\\\\')
1299 stuff = pat[i:j].replace(b'\\', b'\\\\')
1300 i = j + 1
1300 i = j + 1
1301 if stuff[0:1] == b'!':
1301 if stuff[0:1] == b'!':
1302 stuff = b'^' + stuff[1:]
1302 stuff = b'^' + stuff[1:]
1303 elif stuff[0:1] == b'^':
1303 elif stuff[0:1] == b'^':
1304 stuff = b'\\' + stuff
1304 stuff = b'\\' + stuff
1305 res = b'%s[%s]' % (res, stuff)
1305 res = b'%s[%s]' % (res, stuff)
1306 elif c == b'{':
1306 elif c == b'{':
1307 group += 1
1307 group += 1
1308 res += b'(?:'
1308 res += b'(?:'
1309 elif c == b'}' and group:
1309 elif c == b'}' and group:
1310 res += b')'
1310 res += b')'
1311 group -= 1
1311 group -= 1
1312 elif c == b',' and group:
1312 elif c == b',' and group:
1313 res += b'|'
1313 res += b'|'
1314 elif c == b'\\':
1314 elif c == b'\\':
1315 p = peek()
1315 p = peek()
1316 if p:
1316 if p:
1317 i += 1
1317 i += 1
1318 res += escape(p, p)
1318 res += escape(p, p)
1319 else:
1319 else:
1320 res += escape(c, c)
1320 res += escape(c, c)
1321 else:
1321 else:
1322 res += escape(c, c)
1322 res += escape(c, c)
1323 return res
1323 return res
1324
1324
1325
1325
1326 def _regex(kind, pat, globsuffix):
1326 def _regex(kind, pat, globsuffix):
1327 """Convert a (normalized) pattern of any kind into a
1327 """Convert a (normalized) pattern of any kind into a
1328 regular expression.
1328 regular expression.
1329 globsuffix is appended to the regexp of globs."""
1329 globsuffix is appended to the regexp of globs."""
1330 if not pat and kind in (b'glob', b'relpath'):
1330 if not pat and kind in (b'glob', b'relpath'):
1331 return b''
1331 return b''
1332 if kind == b're':
1332 if kind == b're':
1333 return pat
1333 return pat
1334 if kind in (b'path', b'relpath'):
1334 if kind in (b'path', b'relpath'):
1335 if pat == b'.':
1335 if pat == b'.':
1336 return b''
1336 return b''
1337 return util.stringutil.reescape(pat) + b'(?:/|$)'
1337 return util.stringutil.reescape(pat) + b'(?:/|$)'
1338 if kind == b'rootfilesin':
1338 if kind == b'rootfilesin':
1339 if pat == b'.':
1339 if pat == b'.':
1340 escaped = b''
1340 escaped = b''
1341 else:
1341 else:
1342 # Pattern is a directory name.
1342 # Pattern is a directory name.
1343 escaped = util.stringutil.reescape(pat) + b'/'
1343 escaped = util.stringutil.reescape(pat) + b'/'
1344 # Anything after the pattern must be a non-directory.
1344 # Anything after the pattern must be a non-directory.
1345 return escaped + b'[^/]+$'
1345 return escaped + b'[^/]+$'
1346 if kind == b'relglob':
1346 if kind == b'relglob':
1347 globre = _globre(pat)
1347 globre = _globre(pat)
1348 if globre.startswith(b'[^/]*'):
1348 if globre.startswith(b'[^/]*'):
1349 # When pat has the form *XYZ (common), make the returned regex more
1349 # When pat has the form *XYZ (common), make the returned regex more
1350 # legible by returning the regex for **XYZ instead of **/*XYZ.
1350 # legible by returning the regex for **XYZ instead of **/*XYZ.
1351 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1351 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1352 return b'(?:|.*/)' + globre + globsuffix
1352 return b'(?:|.*/)' + globre + globsuffix
1353 if kind == b'relre':
1353 if kind == b'relre':
1354 if pat.startswith(b'^'):
1354 if pat.startswith(b'^'):
1355 return pat
1355 return pat
1356 return b'.*' + pat
1356 return b'.*' + pat
1357 if kind in (b'glob', b'rootglob'):
1357 if kind in (b'glob', b'rootglob'):
1358 return _globre(pat) + globsuffix
1358 return _globre(pat) + globsuffix
1359 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1359 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1360
1360
1361
1361
1362 def _buildmatch(kindpats, globsuffix, root):
1362 def _buildmatch(kindpats, globsuffix, root):
1363 """Return regexp string and a matcher function for kindpats.
1363 """Return regexp string and a matcher function for kindpats.
1364 globsuffix is appended to the regexp of globs."""
1364 globsuffix is appended to the regexp of globs."""
1365 matchfuncs = []
1365 matchfuncs = []
1366
1366
1367 subincludes, kindpats = _expandsubinclude(kindpats, root)
1367 subincludes, kindpats = _expandsubinclude(kindpats, root)
1368 if subincludes:
1368 if subincludes:
1369 submatchers = {}
1369 submatchers = {}
1370
1370
1371 def matchsubinclude(f):
1371 def matchsubinclude(f):
1372 for prefix, matcherargs in subincludes:
1372 for prefix, matcherargs in subincludes:
1373 if f.startswith(prefix):
1373 if f.startswith(prefix):
1374 mf = submatchers.get(prefix)
1374 mf = submatchers.get(prefix)
1375 if mf is None:
1375 if mf is None:
1376 mf = match(*matcherargs)
1376 mf = match(*matcherargs)
1377 submatchers[prefix] = mf
1377 submatchers[prefix] = mf
1378
1378
1379 if mf(f[len(prefix) :]):
1379 if mf(f[len(prefix) :]):
1380 return True
1380 return True
1381 return False
1381 return False
1382
1382
1383 matchfuncs.append(matchsubinclude)
1383 matchfuncs.append(matchsubinclude)
1384
1384
1385 regex = b''
1385 regex = b''
1386 if kindpats:
1386 if kindpats:
1387 if all(k == b'rootfilesin' for k, p, s in kindpats):
1387 if all(k == b'rootfilesin' for k, p, s in kindpats):
1388 dirs = {p for k, p, s in kindpats}
1388 dirs = {p for k, p, s in kindpats}
1389
1389
1390 def mf(f):
1390 def mf(f):
1391 i = f.rfind(b'/')
1391 i = f.rfind(b'/')
1392 if i >= 0:
1392 if i >= 0:
1393 dir = f[:i]
1393 dir = f[:i]
1394 else:
1394 else:
1395 dir = b'.'
1395 dir = b'.'
1396 return dir in dirs
1396 return dir in dirs
1397
1397
1398 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1398 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1399 matchfuncs.append(mf)
1399 matchfuncs.append(mf)
1400 else:
1400 else:
1401 regex, mf = _buildregexmatch(kindpats, globsuffix)
1401 regex, mf = _buildregexmatch(kindpats, globsuffix)
1402 matchfuncs.append(mf)
1402 matchfuncs.append(mf)
1403
1403
1404 if len(matchfuncs) == 1:
1404 if len(matchfuncs) == 1:
1405 return regex, matchfuncs[0]
1405 return regex, matchfuncs[0]
1406 else:
1406 else:
1407 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1407 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1408
1408
1409
1409
1410 MAX_RE_SIZE = 20000
1410 MAX_RE_SIZE = 20000
1411
1411
1412
1412
1413 def _joinregexes(regexps):
1413 def _joinregexes(regexps):
1414 """gather multiple regular expressions into a single one"""
1414 """gather multiple regular expressions into a single one"""
1415 return b'|'.join(regexps)
1415 return b'|'.join(regexps)
1416
1416
1417
1417
1418 def _buildregexmatch(kindpats, globsuffix):
1418 def _buildregexmatch(kindpats, globsuffix):
1419 """Build a match function from a list of kinds and kindpats,
1419 """Build a match function from a list of kinds and kindpats,
1420 return regexp string and a matcher function.
1420 return regexp string and a matcher function.
1421
1421
1422 Test too large input
1422 Test too large input
1423 >>> _buildregexmatch([
1423 >>> _buildregexmatch([
1424 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1424 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1425 ... ], b'$')
1425 ... ], b'$')
1426 Traceback (most recent call last):
1426 Traceback (most recent call last):
1427 ...
1427 ...
1428 Abort: matcher pattern is too long (20009 bytes)
1428 Abort: matcher pattern is too long (20009 bytes)
1429 """
1429 """
1430 try:
1430 try:
1431 allgroups = []
1431 allgroups = []
1432 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1432 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1433 fullregexp = _joinregexes(regexps)
1433 fullregexp = _joinregexes(regexps)
1434
1434
1435 startidx = 0
1435 startidx = 0
1436 groupsize = 0
1436 groupsize = 0
1437 for idx, r in enumerate(regexps):
1437 for idx, r in enumerate(regexps):
1438 piecesize = len(r)
1438 piecesize = len(r)
1439 if piecesize > MAX_RE_SIZE:
1439 if piecesize > MAX_RE_SIZE:
1440 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1440 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1441 raise error.Abort(msg)
1441 raise error.Abort(msg)
1442 elif (groupsize + piecesize) > MAX_RE_SIZE:
1442 elif (groupsize + piecesize) > MAX_RE_SIZE:
1443 group = regexps[startidx:idx]
1443 group = regexps[startidx:idx]
1444 allgroups.append(_joinregexes(group))
1444 allgroups.append(_joinregexes(group))
1445 startidx = idx
1445 startidx = idx
1446 groupsize = 0
1446 groupsize = 0
1447 groupsize += piecesize + 1
1447 groupsize += piecesize + 1
1448
1448
1449 if startidx == 0:
1449 if startidx == 0:
1450 matcher = _rematcher(fullregexp)
1450 matcher = _rematcher(fullregexp)
1451 func = lambda s: bool(matcher(s))
1451 func = lambda s: bool(matcher(s))
1452 else:
1452 else:
1453 group = regexps[startidx:]
1453 group = regexps[startidx:]
1454 allgroups.append(_joinregexes(group))
1454 allgroups.append(_joinregexes(group))
1455 allmatchers = [_rematcher(g) for g in allgroups]
1455 allmatchers = [_rematcher(g) for g in allgroups]
1456 func = lambda s: any(m(s) for m in allmatchers)
1456 func = lambda s: any(m(s) for m in allmatchers)
1457 return fullregexp, func
1457 return fullregexp, func
1458 except re.error:
1458 except re.error:
1459 for k, p, s in kindpats:
1459 for k, p, s in kindpats:
1460 try:
1460 try:
1461 _rematcher(_regex(k, p, globsuffix))
1461 _rematcher(_regex(k, p, globsuffix))
1462 except re.error:
1462 except re.error:
1463 if s:
1463 if s:
1464 raise error.Abort(
1464 raise error.Abort(
1465 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1465 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1466 )
1466 )
1467 else:
1467 else:
1468 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1468 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1469 raise error.Abort(_(b"invalid pattern"))
1469 raise error.Abort(_(b"invalid pattern"))
1470
1470
1471
1471
1472 def _patternrootsanddirs(kindpats):
1472 def _patternrootsanddirs(kindpats):
1473 """Returns roots and directories corresponding to each pattern.
1473 """Returns roots and directories corresponding to each pattern.
1474
1474
1475 This calculates the roots and directories exactly matching the patterns and
1475 This calculates the roots and directories exactly matching the patterns and
1476 returns a tuple of (roots, dirs) for each. It does not return other
1476 returns a tuple of (roots, dirs) for each. It does not return other
1477 directories which may also need to be considered, like the parent
1477 directories which may also need to be considered, like the parent
1478 directories.
1478 directories.
1479 """
1479 """
1480 r = []
1480 r = []
1481 d = []
1481 d = []
1482 for kind, pat, source in kindpats:
1482 for kind, pat, source in kindpats:
1483 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1483 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1484 root = []
1484 root = []
1485 for p in pat.split(b'/'):
1485 for p in pat.split(b'/'):
1486 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1486 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1487 break
1487 break
1488 root.append(p)
1488 root.append(p)
1489 r.append(b'/'.join(root))
1489 r.append(b'/'.join(root))
1490 elif kind in (b'relpath', b'path'):
1490 elif kind in (b'relpath', b'path'):
1491 if pat == b'.':
1491 if pat == b'.':
1492 pat = b''
1492 pat = b''
1493 r.append(pat)
1493 r.append(pat)
1494 elif kind in (b'rootfilesin',):
1494 elif kind in (b'rootfilesin',):
1495 if pat == b'.':
1495 if pat == b'.':
1496 pat = b''
1496 pat = b''
1497 d.append(pat)
1497 d.append(pat)
1498 else: # relglob, re, relre
1498 else: # relglob, re, relre
1499 r.append(b'')
1499 r.append(b'')
1500 return r, d
1500 return r, d
1501
1501
1502
1502
1503 def _roots(kindpats):
1503 def _roots(kindpats):
1504 '''Returns root directories to match recursively from the given patterns.'''
1504 '''Returns root directories to match recursively from the given patterns.'''
1505 roots, dirs = _patternrootsanddirs(kindpats)
1505 roots, dirs = _patternrootsanddirs(kindpats)
1506 return roots
1506 return roots
1507
1507
1508
1508
1509 def _rootsdirsandparents(kindpats):
1509 def _rootsdirsandparents(kindpats):
1510 """Returns roots and exact directories from patterns.
1510 """Returns roots and exact directories from patterns.
1511
1511
1512 `roots` are directories to match recursively, `dirs` should
1512 `roots` are directories to match recursively, `dirs` should
1513 be matched non-recursively, and `parents` are the implicitly required
1513 be matched non-recursively, and `parents` are the implicitly required
1514 directories to walk to items in either roots or dirs.
1514 directories to walk to items in either roots or dirs.
1515
1515
1516 Returns a tuple of (roots, dirs, parents).
1516 Returns a tuple of (roots, dirs, parents).
1517
1517
1518 >>> r = _rootsdirsandparents(
1518 >>> r = _rootsdirsandparents(
1519 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1519 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1520 ... (b'glob', b'g*', b'')])
1520 ... (b'glob', b'g*', b'')])
1521 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1521 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1522 (['g/h', 'g/h', ''], []) ['', 'g']
1522 (['g/h', 'g/h', ''], []) ['', 'g']
1523 >>> r = _rootsdirsandparents(
1523 >>> r = _rootsdirsandparents(
1524 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1524 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1525 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1525 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1526 ([], ['g/h', '']) ['', 'g']
1526 ([], ['g/h', '']) ['', 'g']
1527 >>> r = _rootsdirsandparents(
1527 >>> r = _rootsdirsandparents(
1528 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1528 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1529 ... (b'path', b'', b'')])
1529 ... (b'path', b'', b'')])
1530 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1530 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1531 (['r', 'p/p', ''], []) ['', 'p']
1531 (['r', 'p/p', ''], []) ['', 'p']
1532 >>> r = _rootsdirsandparents(
1532 >>> r = _rootsdirsandparents(
1533 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1533 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1534 ... (b'relre', b'rr', b'')])
1534 ... (b'relre', b'rr', b'')])
1535 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1535 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1536 (['', '', ''], []) ['']
1536 (['', '', ''], []) ['']
1537 """
1537 """
1538 r, d = _patternrootsanddirs(kindpats)
1538 r, d = _patternrootsanddirs(kindpats)
1539
1539
1540 p = set()
1540 p = set()
1541 # Add the parents as non-recursive/exact directories, since they must be
1541 # Add the parents as non-recursive/exact directories, since they must be
1542 # scanned to get to either the roots or the other exact directories.
1542 # scanned to get to either the roots or the other exact directories.
1543 p.update(pathutil.dirs(d))
1543 p.update(pathutil.dirs(d))
1544 p.update(pathutil.dirs(r))
1544 p.update(pathutil.dirs(r))
1545
1545
1546 # FIXME: all uses of this function convert these to sets, do so before
1546 # FIXME: all uses of this function convert these to sets, do so before
1547 # returning.
1547 # returning.
1548 # FIXME: all uses of this function do not need anything in 'roots' and
1548 # FIXME: all uses of this function do not need anything in 'roots' and
1549 # 'dirs' to also be in 'parents', consider removing them before returning.
1549 # 'dirs' to also be in 'parents', consider removing them before returning.
1550 return r, d, p
1550 return r, d, p
1551
1551
1552
1552
1553 def _explicitfiles(kindpats):
1553 def _explicitfiles(kindpats):
1554 """Returns the potential explicit filenames from the patterns.
1554 """Returns the potential explicit filenames from the patterns.
1555
1555
1556 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1556 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1557 ['foo/bar']
1557 ['foo/bar']
1558 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1558 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1559 []
1559 []
1560 """
1560 """
1561 # Keep only the pattern kinds where one can specify filenames (vs only
1561 # Keep only the pattern kinds where one can specify filenames (vs only
1562 # directory names).
1562 # directory names).
1563 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1563 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1564 return _roots(filable)
1564 return _roots(filable)
1565
1565
1566
1566
1567 def _prefix(kindpats):
1567 def _prefix(kindpats):
1568 '''Whether all the patterns match a prefix (i.e. recursively)'''
1568 '''Whether all the patterns match a prefix (i.e. recursively)'''
1569 for kind, pat, source in kindpats:
1569 for kind, pat, source in kindpats:
1570 if kind not in (b'path', b'relpath'):
1570 if kind not in (b'path', b'relpath'):
1571 return False
1571 return False
1572 return True
1572 return True
1573
1573
1574
1574
1575 _commentre = None
1575 _commentre = None
1576
1576
1577
1577
1578 def readpatternfile(filepath, warn, sourceinfo=False):
1578 def readpatternfile(filepath, warn, sourceinfo=False):
1579 """parse a pattern file, returning a list of
1579 """parse a pattern file, returning a list of
1580 patterns. These patterns should be given to compile()
1580 patterns. These patterns should be given to compile()
1581 to be validated and converted into a match function.
1581 to be validated and converted into a match function.
1582
1582
1583 trailing white space is dropped.
1583 trailing white space is dropped.
1584 the escape character is backslash.
1584 the escape character is backslash.
1585 comments start with #.
1585 comments start with #.
1586 empty lines are skipped.
1586 empty lines are skipped.
1587
1587
1588 lines can be of the following formats:
1588 lines can be of the following formats:
1589
1589
1590 syntax: regexp # defaults following lines to non-rooted regexps
1590 syntax: regexp # defaults following lines to non-rooted regexps
1591 syntax: glob # defaults following lines to non-rooted globs
1591 syntax: glob # defaults following lines to non-rooted globs
1592 re:pattern # non-rooted regular expression
1592 re:pattern # non-rooted regular expression
1593 glob:pattern # non-rooted glob
1593 glob:pattern # non-rooted glob
1594 rootglob:pat # rooted glob (same root as ^ in regexps)
1594 rootglob:pat # rooted glob (same root as ^ in regexps)
1595 pattern # pattern of the current default type
1595 pattern # pattern of the current default type
1596
1596
1597 if sourceinfo is set, returns a list of tuples:
1597 if sourceinfo is set, returns a list of tuples:
1598 (pattern, lineno, originalline).
1598 (pattern, lineno, originalline).
1599 This is useful to debug ignore patterns.
1599 This is useful to debug ignore patterns.
1600 """
1600 """
1601
1601
1602 syntaxes = {
1602 syntaxes = {
1603 b're': b'relre:',
1603 b're': b'relre:',
1604 b'regexp': b'relre:',
1604 b'regexp': b'relre:',
1605 b'glob': b'relglob:',
1605 b'glob': b'relglob:',
1606 b'rootglob': b'rootglob:',
1606 b'rootglob': b'rootglob:',
1607 b'include': b'include',
1607 b'include': b'include',
1608 b'subinclude': b'subinclude',
1608 b'subinclude': b'subinclude',
1609 }
1609 }
1610 syntax = b'relre:'
1610 syntax = b'relre:'
1611 patterns = []
1611 patterns = []
1612
1612
1613 fp = open(filepath, b'rb')
1613 fp = open(filepath, b'rb')
1614 for lineno, line in enumerate(util.iterfile(fp), start=1):
1614 for lineno, line in enumerate(fp, start=1):
1615 if b"#" in line:
1615 if b"#" in line:
1616 global _commentre
1616 global _commentre
1617 if not _commentre:
1617 if not _commentre:
1618 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1618 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1619 # remove comments prefixed by an even number of escapes
1619 # remove comments prefixed by an even number of escapes
1620 m = _commentre.search(line)
1620 m = _commentre.search(line)
1621 if m:
1621 if m:
1622 line = line[: m.end(1)]
1622 line = line[: m.end(1)]
1623 # fixup properly escaped comments that survived the above
1623 # fixup properly escaped comments that survived the above
1624 line = line.replace(b"\\#", b"#")
1624 line = line.replace(b"\\#", b"#")
1625 line = line.rstrip()
1625 line = line.rstrip()
1626 if not line:
1626 if not line:
1627 continue
1627 continue
1628
1628
1629 if line.startswith(b'syntax:'):
1629 if line.startswith(b'syntax:'):
1630 s = line[7:].strip()
1630 s = line[7:].strip()
1631 try:
1631 try:
1632 syntax = syntaxes[s]
1632 syntax = syntaxes[s]
1633 except KeyError:
1633 except KeyError:
1634 if warn:
1634 if warn:
1635 warn(
1635 warn(
1636 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1636 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1637 )
1637 )
1638 continue
1638 continue
1639
1639
1640 linesyntax = syntax
1640 linesyntax = syntax
1641 for s, rels in syntaxes.items():
1641 for s, rels in syntaxes.items():
1642 if line.startswith(rels):
1642 if line.startswith(rels):
1643 linesyntax = rels
1643 linesyntax = rels
1644 line = line[len(rels) :]
1644 line = line[len(rels) :]
1645 break
1645 break
1646 elif line.startswith(s + b':'):
1646 elif line.startswith(s + b':'):
1647 linesyntax = rels
1647 linesyntax = rels
1648 line = line[len(s) + 1 :]
1648 line = line[len(s) + 1 :]
1649 break
1649 break
1650 if sourceinfo:
1650 if sourceinfo:
1651 patterns.append((linesyntax + line, lineno, line))
1651 patterns.append((linesyntax + line, lineno, line))
1652 else:
1652 else:
1653 patterns.append(linesyntax + line)
1653 patterns.append(linesyntax + line)
1654 fp.close()
1654 fp.close()
1655 return patterns
1655 return patterns
@@ -1,3261 +1,3261 b''
1 # patch.py - patch file parsing routines
1 # patch.py - patch file parsing routines
2 #
2 #
3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 import collections
10 import collections
11 import contextlib
11 import contextlib
12 import copy
12 import copy
13 import errno
13 import errno
14 import os
14 import os
15 import re
15 import re
16 import shutil
16 import shutil
17 import zlib
17 import zlib
18
18
19 from .i18n import _
19 from .i18n import _
20 from .node import (
20 from .node import (
21 hex,
21 hex,
22 sha1nodeconstants,
22 sha1nodeconstants,
23 short,
23 short,
24 )
24 )
25 from .pycompat import open
25 from .pycompat import open
26 from . import (
26 from . import (
27 copies,
27 copies,
28 diffhelper,
28 diffhelper,
29 diffutil,
29 diffutil,
30 encoding,
30 encoding,
31 error,
31 error,
32 mail,
32 mail,
33 mdiff,
33 mdiff,
34 pathutil,
34 pathutil,
35 pycompat,
35 pycompat,
36 scmutil,
36 scmutil,
37 similar,
37 similar,
38 util,
38 util,
39 vfs as vfsmod,
39 vfs as vfsmod,
40 )
40 )
41 from .utils import (
41 from .utils import (
42 dateutil,
42 dateutil,
43 hashutil,
43 hashutil,
44 procutil,
44 procutil,
45 stringutil,
45 stringutil,
46 )
46 )
47
47
48 stringio = util.stringio
48 stringio = util.stringio
49
49
50 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
50 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
51 tabsplitter = re.compile(br'(\t+|[^\t]+)')
51 tabsplitter = re.compile(br'(\t+|[^\t]+)')
52 wordsplitter = re.compile(
52 wordsplitter = re.compile(
53 br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|[^ \ta-zA-Z0-9_\x80-\xff])'
53 br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|[^ \ta-zA-Z0-9_\x80-\xff])'
54 )
54 )
55
55
56 PatchError = error.PatchError
56 PatchError = error.PatchError
57 PatchParseError = error.PatchParseError
57 PatchParseError = error.PatchParseError
58 PatchApplicationError = error.PatchApplicationError
58 PatchApplicationError = error.PatchApplicationError
59
59
60 # public functions
60 # public functions
61
61
62
62
63 def split(stream):
63 def split(stream):
64 '''return an iterator of individual patches from a stream'''
64 '''return an iterator of individual patches from a stream'''
65
65
66 def isheader(line, inheader):
66 def isheader(line, inheader):
67 if inheader and line.startswith((b' ', b'\t')):
67 if inheader and line.startswith((b' ', b'\t')):
68 # continuation
68 # continuation
69 return True
69 return True
70 if line.startswith((b' ', b'-', b'+')):
70 if line.startswith((b' ', b'-', b'+')):
71 # diff line - don't check for header pattern in there
71 # diff line - don't check for header pattern in there
72 return False
72 return False
73 l = line.split(b': ', 1)
73 l = line.split(b': ', 1)
74 return len(l) == 2 and b' ' not in l[0]
74 return len(l) == 2 and b' ' not in l[0]
75
75
76 def chunk(lines):
76 def chunk(lines):
77 return stringio(b''.join(lines))
77 return stringio(b''.join(lines))
78
78
79 def hgsplit(stream, cur):
79 def hgsplit(stream, cur):
80 inheader = True
80 inheader = True
81
81
82 for line in stream:
82 for line in stream:
83 if not line.strip():
83 if not line.strip():
84 inheader = False
84 inheader = False
85 if not inheader and line.startswith(b'# HG changeset patch'):
85 if not inheader and line.startswith(b'# HG changeset patch'):
86 yield chunk(cur)
86 yield chunk(cur)
87 cur = []
87 cur = []
88 inheader = True
88 inheader = True
89
89
90 cur.append(line)
90 cur.append(line)
91
91
92 if cur:
92 if cur:
93 yield chunk(cur)
93 yield chunk(cur)
94
94
95 def mboxsplit(stream, cur):
95 def mboxsplit(stream, cur):
96 for line in stream:
96 for line in stream:
97 if line.startswith(b'From '):
97 if line.startswith(b'From '):
98 for c in split(chunk(cur[1:])):
98 for c in split(chunk(cur[1:])):
99 yield c
99 yield c
100 cur = []
100 cur = []
101
101
102 cur.append(line)
102 cur.append(line)
103
103
104 if cur:
104 if cur:
105 for c in split(chunk(cur[1:])):
105 for c in split(chunk(cur[1:])):
106 yield c
106 yield c
107
107
108 def mimesplit(stream, cur):
108 def mimesplit(stream, cur):
109 def msgfp(m):
109 def msgfp(m):
110 fp = stringio()
110 fp = stringio()
111 # pytype: disable=wrong-arg-types
111 # pytype: disable=wrong-arg-types
112 g = mail.Generator(fp, mangle_from_=False)
112 g = mail.Generator(fp, mangle_from_=False)
113 # pytype: enable=wrong-arg-types
113 # pytype: enable=wrong-arg-types
114 g.flatten(m)
114 g.flatten(m)
115 fp.seek(0)
115 fp.seek(0)
116 return fp
116 return fp
117
117
118 for line in stream:
118 for line in stream:
119 cur.append(line)
119 cur.append(line)
120 c = chunk(cur)
120 c = chunk(cur)
121
121
122 m = mail.parse(c)
122 m = mail.parse(c)
123 if not m.is_multipart():
123 if not m.is_multipart():
124 yield msgfp(m)
124 yield msgfp(m)
125 else:
125 else:
126 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
126 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
127 for part in m.walk():
127 for part in m.walk():
128 ct = part.get_content_type()
128 ct = part.get_content_type()
129 if ct not in ok_types:
129 if ct not in ok_types:
130 continue
130 continue
131 yield msgfp(part)
131 yield msgfp(part)
132
132
133 def headersplit(stream, cur):
133 def headersplit(stream, cur):
134 inheader = False
134 inheader = False
135
135
136 for line in stream:
136 for line in stream:
137 if not inheader and isheader(line, inheader):
137 if not inheader and isheader(line, inheader):
138 yield chunk(cur)
138 yield chunk(cur)
139 cur = []
139 cur = []
140 inheader = True
140 inheader = True
141 if inheader and not isheader(line, inheader):
141 if inheader and not isheader(line, inheader):
142 inheader = False
142 inheader = False
143
143
144 cur.append(line)
144 cur.append(line)
145
145
146 if cur:
146 if cur:
147 yield chunk(cur)
147 yield chunk(cur)
148
148
149 def remainder(cur):
149 def remainder(cur):
150 yield chunk(cur)
150 yield chunk(cur)
151
151
152 class fiter(object):
152 class fiter(object):
153 def __init__(self, fp):
153 def __init__(self, fp):
154 self.fp = fp
154 self.fp = fp
155
155
156 def __iter__(self):
156 def __iter__(self):
157 return self
157 return self
158
158
159 def next(self):
159 def next(self):
160 l = self.fp.readline()
160 l = self.fp.readline()
161 if not l:
161 if not l:
162 raise StopIteration
162 raise StopIteration
163 return l
163 return l
164
164
165 __next__ = next
165 __next__ = next
166
166
167 inheader = False
167 inheader = False
168 cur = []
168 cur = []
169
169
170 mimeheaders = [b'content-type']
170 mimeheaders = [b'content-type']
171
171
172 if not util.safehasattr(stream, b'next'):
172 if not util.safehasattr(stream, b'next'):
173 # http responses, for example, have readline but not next
173 # http responses, for example, have readline but not next
174 stream = fiter(stream)
174 stream = fiter(stream)
175
175
176 for line in stream:
176 for line in stream:
177 cur.append(line)
177 cur.append(line)
178 if line.startswith(b'# HG changeset patch'):
178 if line.startswith(b'# HG changeset patch'):
179 return hgsplit(stream, cur)
179 return hgsplit(stream, cur)
180 elif line.startswith(b'From '):
180 elif line.startswith(b'From '):
181 return mboxsplit(stream, cur)
181 return mboxsplit(stream, cur)
182 elif isheader(line, inheader):
182 elif isheader(line, inheader):
183 inheader = True
183 inheader = True
184 if line.split(b':', 1)[0].lower() in mimeheaders:
184 if line.split(b':', 1)[0].lower() in mimeheaders:
185 # let email parser handle this
185 # let email parser handle this
186 return mimesplit(stream, cur)
186 return mimesplit(stream, cur)
187 elif line.startswith(b'--- ') and inheader:
187 elif line.startswith(b'--- ') and inheader:
188 # No evil headers seen by diff start, split by hand
188 # No evil headers seen by diff start, split by hand
189 return headersplit(stream, cur)
189 return headersplit(stream, cur)
190 # Not enough info, keep reading
190 # Not enough info, keep reading
191
191
192 # if we are here, we have a very plain patch
192 # if we are here, we have a very plain patch
193 return remainder(cur)
193 return remainder(cur)
194
194
195
195
196 ## Some facility for extensible patch parsing:
196 ## Some facility for extensible patch parsing:
197 # list of pairs ("header to match", "data key")
197 # list of pairs ("header to match", "data key")
198 patchheadermap = [
198 patchheadermap = [
199 (b'Date', b'date'),
199 (b'Date', b'date'),
200 (b'Branch', b'branch'),
200 (b'Branch', b'branch'),
201 (b'Node ID', b'nodeid'),
201 (b'Node ID', b'nodeid'),
202 ]
202 ]
203
203
204
204
205 @contextlib.contextmanager
205 @contextlib.contextmanager
206 def extract(ui, fileobj):
206 def extract(ui, fileobj):
207 """extract patch from data read from fileobj.
207 """extract patch from data read from fileobj.
208
208
209 patch can be a normal patch or contained in an email message.
209 patch can be a normal patch or contained in an email message.
210
210
211 return a dictionary. Standard keys are:
211 return a dictionary. Standard keys are:
212 - filename,
212 - filename,
213 - message,
213 - message,
214 - user,
214 - user,
215 - date,
215 - date,
216 - branch,
216 - branch,
217 - node,
217 - node,
218 - p1,
218 - p1,
219 - p2.
219 - p2.
220 Any item can be missing from the dictionary. If filename is missing,
220 Any item can be missing from the dictionary. If filename is missing,
221 fileobj did not contain a patch. Caller must unlink filename when done."""
221 fileobj did not contain a patch. Caller must unlink filename when done."""
222
222
223 fd, tmpname = pycompat.mkstemp(prefix=b'hg-patch-')
223 fd, tmpname = pycompat.mkstemp(prefix=b'hg-patch-')
224 tmpfp = os.fdopen(fd, 'wb')
224 tmpfp = os.fdopen(fd, 'wb')
225 try:
225 try:
226 yield _extract(ui, fileobj, tmpname, tmpfp)
226 yield _extract(ui, fileobj, tmpname, tmpfp)
227 finally:
227 finally:
228 tmpfp.close()
228 tmpfp.close()
229 os.unlink(tmpname)
229 os.unlink(tmpname)
230
230
231
231
232 def _extract(ui, fileobj, tmpname, tmpfp):
232 def _extract(ui, fileobj, tmpname, tmpfp):
233
233
234 # attempt to detect the start of a patch
234 # attempt to detect the start of a patch
235 # (this heuristic is borrowed from quilt)
235 # (this heuristic is borrowed from quilt)
236 diffre = re.compile(
236 diffre = re.compile(
237 br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
237 br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
238 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
238 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
239 br'---[ \t].*?^\+\+\+[ \t]|'
239 br'---[ \t].*?^\+\+\+[ \t]|'
240 br'\*\*\*[ \t].*?^---[ \t])',
240 br'\*\*\*[ \t].*?^---[ \t])',
241 re.MULTILINE | re.DOTALL,
241 re.MULTILINE | re.DOTALL,
242 )
242 )
243
243
244 data = {}
244 data = {}
245
245
246 msg = mail.parse(fileobj)
246 msg = mail.parse(fileobj)
247
247
248 subject = msg['Subject'] and mail.headdecode(msg['Subject'])
248 subject = msg['Subject'] and mail.headdecode(msg['Subject'])
249 data[b'user'] = msg['From'] and mail.headdecode(msg['From'])
249 data[b'user'] = msg['From'] and mail.headdecode(msg['From'])
250 if not subject and not data[b'user']:
250 if not subject and not data[b'user']:
251 # Not an email, restore parsed headers if any
251 # Not an email, restore parsed headers if any
252 subject = (
252 subject = (
253 b'\n'.join(
253 b'\n'.join(
254 b': '.join(map(encoding.strtolocal, h)) for h in msg.items()
254 b': '.join(map(encoding.strtolocal, h)) for h in msg.items()
255 )
255 )
256 + b'\n'
256 + b'\n'
257 )
257 )
258
258
259 # should try to parse msg['Date']
259 # should try to parse msg['Date']
260 parents = []
260 parents = []
261
261
262 nodeid = msg['X-Mercurial-Node']
262 nodeid = msg['X-Mercurial-Node']
263 if nodeid:
263 if nodeid:
264 data[b'nodeid'] = nodeid = mail.headdecode(nodeid)
264 data[b'nodeid'] = nodeid = mail.headdecode(nodeid)
265 ui.debug(b'Node ID: %s\n' % nodeid)
265 ui.debug(b'Node ID: %s\n' % nodeid)
266
266
267 if subject:
267 if subject:
268 if subject.startswith(b'[PATCH'):
268 if subject.startswith(b'[PATCH'):
269 pend = subject.find(b']')
269 pend = subject.find(b']')
270 if pend >= 0:
270 if pend >= 0:
271 subject = subject[pend + 1 :].lstrip()
271 subject = subject[pend + 1 :].lstrip()
272 subject = re.sub(br'\n[ \t]+', b' ', subject)
272 subject = re.sub(br'\n[ \t]+', b' ', subject)
273 ui.debug(b'Subject: %s\n' % subject)
273 ui.debug(b'Subject: %s\n' % subject)
274 if data[b'user']:
274 if data[b'user']:
275 ui.debug(b'From: %s\n' % data[b'user'])
275 ui.debug(b'From: %s\n' % data[b'user'])
276 diffs_seen = 0
276 diffs_seen = 0
277 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
277 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
278 message = b''
278 message = b''
279 for part in msg.walk():
279 for part in msg.walk():
280 content_type = pycompat.bytestr(part.get_content_type())
280 content_type = pycompat.bytestr(part.get_content_type())
281 ui.debug(b'Content-Type: %s\n' % content_type)
281 ui.debug(b'Content-Type: %s\n' % content_type)
282 if content_type not in ok_types:
282 if content_type not in ok_types:
283 continue
283 continue
284 payload = part.get_payload(decode=True)
284 payload = part.get_payload(decode=True)
285 m = diffre.search(payload)
285 m = diffre.search(payload)
286 if m:
286 if m:
287 hgpatch = False
287 hgpatch = False
288 hgpatchheader = False
288 hgpatchheader = False
289 ignoretext = False
289 ignoretext = False
290
290
291 ui.debug(b'found patch at byte %d\n' % m.start(0))
291 ui.debug(b'found patch at byte %d\n' % m.start(0))
292 diffs_seen += 1
292 diffs_seen += 1
293 cfp = stringio()
293 cfp = stringio()
294 for line in payload[: m.start(0)].splitlines():
294 for line in payload[: m.start(0)].splitlines():
295 if line.startswith(b'# HG changeset patch') and not hgpatch:
295 if line.startswith(b'# HG changeset patch') and not hgpatch:
296 ui.debug(b'patch generated by hg export\n')
296 ui.debug(b'patch generated by hg export\n')
297 hgpatch = True
297 hgpatch = True
298 hgpatchheader = True
298 hgpatchheader = True
299 # drop earlier commit message content
299 # drop earlier commit message content
300 cfp.seek(0)
300 cfp.seek(0)
301 cfp.truncate()
301 cfp.truncate()
302 subject = None
302 subject = None
303 elif hgpatchheader:
303 elif hgpatchheader:
304 if line.startswith(b'# User '):
304 if line.startswith(b'# User '):
305 data[b'user'] = line[7:]
305 data[b'user'] = line[7:]
306 ui.debug(b'From: %s\n' % data[b'user'])
306 ui.debug(b'From: %s\n' % data[b'user'])
307 elif line.startswith(b"# Parent "):
307 elif line.startswith(b"# Parent "):
308 parents.append(line[9:].lstrip())
308 parents.append(line[9:].lstrip())
309 elif line.startswith(b"# "):
309 elif line.startswith(b"# "):
310 for header, key in patchheadermap:
310 for header, key in patchheadermap:
311 prefix = b'# %s ' % header
311 prefix = b'# %s ' % header
312 if line.startswith(prefix):
312 if line.startswith(prefix):
313 data[key] = line[len(prefix) :]
313 data[key] = line[len(prefix) :]
314 ui.debug(b'%s: %s\n' % (header, data[key]))
314 ui.debug(b'%s: %s\n' % (header, data[key]))
315 else:
315 else:
316 hgpatchheader = False
316 hgpatchheader = False
317 elif line == b'---':
317 elif line == b'---':
318 ignoretext = True
318 ignoretext = True
319 if not hgpatchheader and not ignoretext:
319 if not hgpatchheader and not ignoretext:
320 cfp.write(line)
320 cfp.write(line)
321 cfp.write(b'\n')
321 cfp.write(b'\n')
322 message = cfp.getvalue()
322 message = cfp.getvalue()
323 if tmpfp:
323 if tmpfp:
324 tmpfp.write(payload)
324 tmpfp.write(payload)
325 if not payload.endswith(b'\n'):
325 if not payload.endswith(b'\n'):
326 tmpfp.write(b'\n')
326 tmpfp.write(b'\n')
327 elif not diffs_seen and message and content_type == b'text/plain':
327 elif not diffs_seen and message and content_type == b'text/plain':
328 message += b'\n' + payload
328 message += b'\n' + payload
329
329
330 if subject and not message.startswith(subject):
330 if subject and not message.startswith(subject):
331 message = b'%s\n%s' % (subject, message)
331 message = b'%s\n%s' % (subject, message)
332 data[b'message'] = message
332 data[b'message'] = message
333 tmpfp.close()
333 tmpfp.close()
334 if parents:
334 if parents:
335 data[b'p1'] = parents.pop(0)
335 data[b'p1'] = parents.pop(0)
336 if parents:
336 if parents:
337 data[b'p2'] = parents.pop(0)
337 data[b'p2'] = parents.pop(0)
338
338
339 if diffs_seen:
339 if diffs_seen:
340 data[b'filename'] = tmpname
340 data[b'filename'] = tmpname
341
341
342 return data
342 return data
343
343
344
344
345 class patchmeta(object):
345 class patchmeta(object):
346 """Patched file metadata
346 """Patched file metadata
347
347
348 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
348 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
349 or COPY. 'path' is patched file path. 'oldpath' is set to the
349 or COPY. 'path' is patched file path. 'oldpath' is set to the
350 origin file when 'op' is either COPY or RENAME, None otherwise. If
350 origin file when 'op' is either COPY or RENAME, None otherwise. If
351 file mode is changed, 'mode' is a tuple (islink, isexec) where
351 file mode is changed, 'mode' is a tuple (islink, isexec) where
352 'islink' is True if the file is a symlink and 'isexec' is True if
352 'islink' is True if the file is a symlink and 'isexec' is True if
353 the file is executable. Otherwise, 'mode' is None.
353 the file is executable. Otherwise, 'mode' is None.
354 """
354 """
355
355
356 def __init__(self, path):
356 def __init__(self, path):
357 self.path = path
357 self.path = path
358 self.oldpath = None
358 self.oldpath = None
359 self.mode = None
359 self.mode = None
360 self.op = b'MODIFY'
360 self.op = b'MODIFY'
361 self.binary = False
361 self.binary = False
362
362
363 def setmode(self, mode):
363 def setmode(self, mode):
364 islink = mode & 0o20000
364 islink = mode & 0o20000
365 isexec = mode & 0o100
365 isexec = mode & 0o100
366 self.mode = (islink, isexec)
366 self.mode = (islink, isexec)
367
367
368 def copy(self):
368 def copy(self):
369 other = patchmeta(self.path)
369 other = patchmeta(self.path)
370 other.oldpath = self.oldpath
370 other.oldpath = self.oldpath
371 other.mode = self.mode
371 other.mode = self.mode
372 other.op = self.op
372 other.op = self.op
373 other.binary = self.binary
373 other.binary = self.binary
374 return other
374 return other
375
375
376 def _ispatchinga(self, afile):
376 def _ispatchinga(self, afile):
377 if afile == b'/dev/null':
377 if afile == b'/dev/null':
378 return self.op == b'ADD'
378 return self.op == b'ADD'
379 return afile == b'a/' + (self.oldpath or self.path)
379 return afile == b'a/' + (self.oldpath or self.path)
380
380
381 def _ispatchingb(self, bfile):
381 def _ispatchingb(self, bfile):
382 if bfile == b'/dev/null':
382 if bfile == b'/dev/null':
383 return self.op == b'DELETE'
383 return self.op == b'DELETE'
384 return bfile == b'b/' + self.path
384 return bfile == b'b/' + self.path
385
385
386 def ispatching(self, afile, bfile):
386 def ispatching(self, afile, bfile):
387 return self._ispatchinga(afile) and self._ispatchingb(bfile)
387 return self._ispatchinga(afile) and self._ispatchingb(bfile)
388
388
389 def __repr__(self):
389 def __repr__(self):
390 return "<patchmeta %s %r>" % (self.op, self.path)
390 return "<patchmeta %s %r>" % (self.op, self.path)
391
391
392
392
393 def readgitpatch(lr):
393 def readgitpatch(lr):
394 """extract git-style metadata about patches from <patchname>"""
394 """extract git-style metadata about patches from <patchname>"""
395
395
396 # Filter patch for git information
396 # Filter patch for git information
397 gp = None
397 gp = None
398 gitpatches = []
398 gitpatches = []
399 for line in lr:
399 for line in lr:
400 line = line.rstrip(b'\r\n')
400 line = line.rstrip(b'\r\n')
401 if line.startswith(b'diff --git a/'):
401 if line.startswith(b'diff --git a/'):
402 m = gitre.match(line)
402 m = gitre.match(line)
403 if m:
403 if m:
404 if gp:
404 if gp:
405 gitpatches.append(gp)
405 gitpatches.append(gp)
406 dst = m.group(2)
406 dst = m.group(2)
407 gp = patchmeta(dst)
407 gp = patchmeta(dst)
408 elif gp:
408 elif gp:
409 if line.startswith(b'--- '):
409 if line.startswith(b'--- '):
410 gitpatches.append(gp)
410 gitpatches.append(gp)
411 gp = None
411 gp = None
412 continue
412 continue
413 if line.startswith(b'rename from '):
413 if line.startswith(b'rename from '):
414 gp.op = b'RENAME'
414 gp.op = b'RENAME'
415 gp.oldpath = line[12:]
415 gp.oldpath = line[12:]
416 elif line.startswith(b'rename to '):
416 elif line.startswith(b'rename to '):
417 gp.path = line[10:]
417 gp.path = line[10:]
418 elif line.startswith(b'copy from '):
418 elif line.startswith(b'copy from '):
419 gp.op = b'COPY'
419 gp.op = b'COPY'
420 gp.oldpath = line[10:]
420 gp.oldpath = line[10:]
421 elif line.startswith(b'copy to '):
421 elif line.startswith(b'copy to '):
422 gp.path = line[8:]
422 gp.path = line[8:]
423 elif line.startswith(b'deleted file'):
423 elif line.startswith(b'deleted file'):
424 gp.op = b'DELETE'
424 gp.op = b'DELETE'
425 elif line.startswith(b'new file mode '):
425 elif line.startswith(b'new file mode '):
426 gp.op = b'ADD'
426 gp.op = b'ADD'
427 gp.setmode(int(line[-6:], 8))
427 gp.setmode(int(line[-6:], 8))
428 elif line.startswith(b'new mode '):
428 elif line.startswith(b'new mode '):
429 gp.setmode(int(line[-6:], 8))
429 gp.setmode(int(line[-6:], 8))
430 elif line.startswith(b'GIT binary patch'):
430 elif line.startswith(b'GIT binary patch'):
431 gp.binary = True
431 gp.binary = True
432 if gp:
432 if gp:
433 gitpatches.append(gp)
433 gitpatches.append(gp)
434
434
435 return gitpatches
435 return gitpatches
436
436
437
437
438 class linereader(object):
438 class linereader(object):
439 # simple class to allow pushing lines back into the input stream
439 # simple class to allow pushing lines back into the input stream
440 def __init__(self, fp):
440 def __init__(self, fp):
441 self.fp = fp
441 self.fp = fp
442 self.buf = []
442 self.buf = []
443
443
444 def push(self, line):
444 def push(self, line):
445 if line is not None:
445 if line is not None:
446 self.buf.append(line)
446 self.buf.append(line)
447
447
448 def readline(self):
448 def readline(self):
449 if self.buf:
449 if self.buf:
450 l = self.buf[0]
450 l = self.buf[0]
451 del self.buf[0]
451 del self.buf[0]
452 return l
452 return l
453 return self.fp.readline()
453 return self.fp.readline()
454
454
455 def __iter__(self):
455 def __iter__(self):
456 return iter(self.readline, b'')
456 return iter(self.readline, b'')
457
457
458
458
459 class abstractbackend(object):
459 class abstractbackend(object):
460 def __init__(self, ui):
460 def __init__(self, ui):
461 self.ui = ui
461 self.ui = ui
462
462
463 def getfile(self, fname):
463 def getfile(self, fname):
464 """Return target file data and flags as a (data, (islink,
464 """Return target file data and flags as a (data, (islink,
465 isexec)) tuple. Data is None if file is missing/deleted.
465 isexec)) tuple. Data is None if file is missing/deleted.
466 """
466 """
467 raise NotImplementedError
467 raise NotImplementedError
468
468
469 def setfile(self, fname, data, mode, copysource):
469 def setfile(self, fname, data, mode, copysource):
470 """Write data to target file fname and set its mode. mode is a
470 """Write data to target file fname and set its mode. mode is a
471 (islink, isexec) tuple. If data is None, the file content should
471 (islink, isexec) tuple. If data is None, the file content should
472 be left unchanged. If the file is modified after being copied,
472 be left unchanged. If the file is modified after being copied,
473 copysource is set to the original file name.
473 copysource is set to the original file name.
474 """
474 """
475 raise NotImplementedError
475 raise NotImplementedError
476
476
477 def unlink(self, fname):
477 def unlink(self, fname):
478 """Unlink target file."""
478 """Unlink target file."""
479 raise NotImplementedError
479 raise NotImplementedError
480
480
481 def writerej(self, fname, failed, total, lines):
481 def writerej(self, fname, failed, total, lines):
482 """Write rejected lines for fname. total is the number of hunks
482 """Write rejected lines for fname. total is the number of hunks
483 which failed to apply and total the total number of hunks for this
483 which failed to apply and total the total number of hunks for this
484 files.
484 files.
485 """
485 """
486
486
487 def exists(self, fname):
487 def exists(self, fname):
488 raise NotImplementedError
488 raise NotImplementedError
489
489
490 def close(self):
490 def close(self):
491 raise NotImplementedError
491 raise NotImplementedError
492
492
493
493
494 class fsbackend(abstractbackend):
494 class fsbackend(abstractbackend):
495 def __init__(self, ui, basedir):
495 def __init__(self, ui, basedir):
496 super(fsbackend, self).__init__(ui)
496 super(fsbackend, self).__init__(ui)
497 self.opener = vfsmod.vfs(basedir)
497 self.opener = vfsmod.vfs(basedir)
498
498
499 def getfile(self, fname):
499 def getfile(self, fname):
500 if self.opener.islink(fname):
500 if self.opener.islink(fname):
501 return (self.opener.readlink(fname), (True, False))
501 return (self.opener.readlink(fname), (True, False))
502
502
503 isexec = False
503 isexec = False
504 try:
504 try:
505 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
505 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
506 except OSError as e:
506 except OSError as e:
507 if e.errno != errno.ENOENT:
507 if e.errno != errno.ENOENT:
508 raise
508 raise
509 try:
509 try:
510 return (self.opener.read(fname), (False, isexec))
510 return (self.opener.read(fname), (False, isexec))
511 except IOError as e:
511 except IOError as e:
512 if e.errno != errno.ENOENT:
512 if e.errno != errno.ENOENT:
513 raise
513 raise
514 return None, None
514 return None, None
515
515
516 def setfile(self, fname, data, mode, copysource):
516 def setfile(self, fname, data, mode, copysource):
517 islink, isexec = mode
517 islink, isexec = mode
518 if data is None:
518 if data is None:
519 self.opener.setflags(fname, islink, isexec)
519 self.opener.setflags(fname, islink, isexec)
520 return
520 return
521 if islink:
521 if islink:
522 self.opener.symlink(data, fname)
522 self.opener.symlink(data, fname)
523 else:
523 else:
524 self.opener.write(fname, data)
524 self.opener.write(fname, data)
525 if isexec:
525 if isexec:
526 self.opener.setflags(fname, False, True)
526 self.opener.setflags(fname, False, True)
527
527
528 def unlink(self, fname):
528 def unlink(self, fname):
529 rmdir = self.ui.configbool(b'experimental', b'removeemptydirs')
529 rmdir = self.ui.configbool(b'experimental', b'removeemptydirs')
530 self.opener.unlinkpath(fname, ignoremissing=True, rmdir=rmdir)
530 self.opener.unlinkpath(fname, ignoremissing=True, rmdir=rmdir)
531
531
532 def writerej(self, fname, failed, total, lines):
532 def writerej(self, fname, failed, total, lines):
533 fname = fname + b".rej"
533 fname = fname + b".rej"
534 self.ui.warn(
534 self.ui.warn(
535 _(b"%d out of %d hunks FAILED -- saving rejects to file %s\n")
535 _(b"%d out of %d hunks FAILED -- saving rejects to file %s\n")
536 % (failed, total, fname)
536 % (failed, total, fname)
537 )
537 )
538 fp = self.opener(fname, b'w')
538 fp = self.opener(fname, b'w')
539 fp.writelines(lines)
539 fp.writelines(lines)
540 fp.close()
540 fp.close()
541
541
542 def exists(self, fname):
542 def exists(self, fname):
543 return self.opener.lexists(fname)
543 return self.opener.lexists(fname)
544
544
545
545
546 class workingbackend(fsbackend):
546 class workingbackend(fsbackend):
547 def __init__(self, ui, repo, similarity):
547 def __init__(self, ui, repo, similarity):
548 super(workingbackend, self).__init__(ui, repo.root)
548 super(workingbackend, self).__init__(ui, repo.root)
549 self.repo = repo
549 self.repo = repo
550 self.similarity = similarity
550 self.similarity = similarity
551 self.removed = set()
551 self.removed = set()
552 self.changed = set()
552 self.changed = set()
553 self.copied = []
553 self.copied = []
554
554
555 def _checkknown(self, fname):
555 def _checkknown(self, fname):
556 if not self.repo.dirstate.get_entry(fname).any_tracked and self.exists(
556 if not self.repo.dirstate.get_entry(fname).any_tracked and self.exists(
557 fname
557 fname
558 ):
558 ):
559 raise PatchApplicationError(
559 raise PatchApplicationError(
560 _(b'cannot patch %s: file is not tracked') % fname
560 _(b'cannot patch %s: file is not tracked') % fname
561 )
561 )
562
562
563 def setfile(self, fname, data, mode, copysource):
563 def setfile(self, fname, data, mode, copysource):
564 self._checkknown(fname)
564 self._checkknown(fname)
565 super(workingbackend, self).setfile(fname, data, mode, copysource)
565 super(workingbackend, self).setfile(fname, data, mode, copysource)
566 if copysource is not None:
566 if copysource is not None:
567 self.copied.append((copysource, fname))
567 self.copied.append((copysource, fname))
568 self.changed.add(fname)
568 self.changed.add(fname)
569
569
570 def unlink(self, fname):
570 def unlink(self, fname):
571 self._checkknown(fname)
571 self._checkknown(fname)
572 super(workingbackend, self).unlink(fname)
572 super(workingbackend, self).unlink(fname)
573 self.removed.add(fname)
573 self.removed.add(fname)
574 self.changed.add(fname)
574 self.changed.add(fname)
575
575
576 def close(self):
576 def close(self):
577 wctx = self.repo[None]
577 wctx = self.repo[None]
578 changed = set(self.changed)
578 changed = set(self.changed)
579 for src, dst in self.copied:
579 for src, dst in self.copied:
580 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
580 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
581 if self.removed:
581 if self.removed:
582 wctx.forget(sorted(self.removed))
582 wctx.forget(sorted(self.removed))
583 for f in self.removed:
583 for f in self.removed:
584 if f not in self.repo.dirstate:
584 if f not in self.repo.dirstate:
585 # File was deleted and no longer belongs to the
585 # File was deleted and no longer belongs to the
586 # dirstate, it was probably marked added then
586 # dirstate, it was probably marked added then
587 # deleted, and should not be considered by
587 # deleted, and should not be considered by
588 # marktouched().
588 # marktouched().
589 changed.discard(f)
589 changed.discard(f)
590 if changed:
590 if changed:
591 scmutil.marktouched(self.repo, changed, self.similarity)
591 scmutil.marktouched(self.repo, changed, self.similarity)
592 return sorted(self.changed)
592 return sorted(self.changed)
593
593
594
594
595 class filestore(object):
595 class filestore(object):
596 def __init__(self, maxsize=None):
596 def __init__(self, maxsize=None):
597 self.opener = None
597 self.opener = None
598 self.files = {}
598 self.files = {}
599 self.created = 0
599 self.created = 0
600 self.maxsize = maxsize
600 self.maxsize = maxsize
601 if self.maxsize is None:
601 if self.maxsize is None:
602 self.maxsize = 4 * (2 ** 20)
602 self.maxsize = 4 * (2 ** 20)
603 self.size = 0
603 self.size = 0
604 self.data = {}
604 self.data = {}
605
605
606 def setfile(self, fname, data, mode, copied=None):
606 def setfile(self, fname, data, mode, copied=None):
607 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
607 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
608 self.data[fname] = (data, mode, copied)
608 self.data[fname] = (data, mode, copied)
609 self.size += len(data)
609 self.size += len(data)
610 else:
610 else:
611 if self.opener is None:
611 if self.opener is None:
612 root = pycompat.mkdtemp(prefix=b'hg-patch-')
612 root = pycompat.mkdtemp(prefix=b'hg-patch-')
613 self.opener = vfsmod.vfs(root)
613 self.opener = vfsmod.vfs(root)
614 # Avoid filename issues with these simple names
614 # Avoid filename issues with these simple names
615 fn = b'%d' % self.created
615 fn = b'%d' % self.created
616 self.opener.write(fn, data)
616 self.opener.write(fn, data)
617 self.created += 1
617 self.created += 1
618 self.files[fname] = (fn, mode, copied)
618 self.files[fname] = (fn, mode, copied)
619
619
620 def getfile(self, fname):
620 def getfile(self, fname):
621 if fname in self.data:
621 if fname in self.data:
622 return self.data[fname]
622 return self.data[fname]
623 if not self.opener or fname not in self.files:
623 if not self.opener or fname not in self.files:
624 return None, None, None
624 return None, None, None
625 fn, mode, copied = self.files[fname]
625 fn, mode, copied = self.files[fname]
626 return self.opener.read(fn), mode, copied
626 return self.opener.read(fn), mode, copied
627
627
628 def close(self):
628 def close(self):
629 if self.opener:
629 if self.opener:
630 shutil.rmtree(self.opener.base)
630 shutil.rmtree(self.opener.base)
631
631
632
632
633 class repobackend(abstractbackend):
633 class repobackend(abstractbackend):
634 def __init__(self, ui, repo, ctx, store):
634 def __init__(self, ui, repo, ctx, store):
635 super(repobackend, self).__init__(ui)
635 super(repobackend, self).__init__(ui)
636 self.repo = repo
636 self.repo = repo
637 self.ctx = ctx
637 self.ctx = ctx
638 self.store = store
638 self.store = store
639 self.changed = set()
639 self.changed = set()
640 self.removed = set()
640 self.removed = set()
641 self.copied = {}
641 self.copied = {}
642
642
643 def _checkknown(self, fname):
643 def _checkknown(self, fname):
644 if fname not in self.ctx:
644 if fname not in self.ctx:
645 raise PatchApplicationError(
645 raise PatchApplicationError(
646 _(b'cannot patch %s: file is not tracked') % fname
646 _(b'cannot patch %s: file is not tracked') % fname
647 )
647 )
648
648
649 def getfile(self, fname):
649 def getfile(self, fname):
650 try:
650 try:
651 fctx = self.ctx[fname]
651 fctx = self.ctx[fname]
652 except error.LookupError:
652 except error.LookupError:
653 return None, None
653 return None, None
654 flags = fctx.flags()
654 flags = fctx.flags()
655 return fctx.data(), (b'l' in flags, b'x' in flags)
655 return fctx.data(), (b'l' in flags, b'x' in flags)
656
656
657 def setfile(self, fname, data, mode, copysource):
657 def setfile(self, fname, data, mode, copysource):
658 if copysource:
658 if copysource:
659 self._checkknown(copysource)
659 self._checkknown(copysource)
660 if data is None:
660 if data is None:
661 data = self.ctx[fname].data()
661 data = self.ctx[fname].data()
662 self.store.setfile(fname, data, mode, copysource)
662 self.store.setfile(fname, data, mode, copysource)
663 self.changed.add(fname)
663 self.changed.add(fname)
664 if copysource:
664 if copysource:
665 self.copied[fname] = copysource
665 self.copied[fname] = copysource
666
666
667 def unlink(self, fname):
667 def unlink(self, fname):
668 self._checkknown(fname)
668 self._checkknown(fname)
669 self.removed.add(fname)
669 self.removed.add(fname)
670
670
671 def exists(self, fname):
671 def exists(self, fname):
672 return fname in self.ctx
672 return fname in self.ctx
673
673
674 def close(self):
674 def close(self):
675 return self.changed | self.removed
675 return self.changed | self.removed
676
676
677
677
678 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
678 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
679 unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
679 unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
680 contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
680 contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
681 eolmodes = [b'strict', b'crlf', b'lf', b'auto']
681 eolmodes = [b'strict', b'crlf', b'lf', b'auto']
682
682
683
683
684 class patchfile(object):
684 class patchfile(object):
685 def __init__(self, ui, gp, backend, store, eolmode=b'strict'):
685 def __init__(self, ui, gp, backend, store, eolmode=b'strict'):
686 self.fname = gp.path
686 self.fname = gp.path
687 self.eolmode = eolmode
687 self.eolmode = eolmode
688 self.eol = None
688 self.eol = None
689 self.backend = backend
689 self.backend = backend
690 self.ui = ui
690 self.ui = ui
691 self.lines = []
691 self.lines = []
692 self.exists = False
692 self.exists = False
693 self.missing = True
693 self.missing = True
694 self.mode = gp.mode
694 self.mode = gp.mode
695 self.copysource = gp.oldpath
695 self.copysource = gp.oldpath
696 self.create = gp.op in (b'ADD', b'COPY', b'RENAME')
696 self.create = gp.op in (b'ADD', b'COPY', b'RENAME')
697 self.remove = gp.op == b'DELETE'
697 self.remove = gp.op == b'DELETE'
698 if self.copysource is None:
698 if self.copysource is None:
699 data, mode = backend.getfile(self.fname)
699 data, mode = backend.getfile(self.fname)
700 else:
700 else:
701 data, mode = store.getfile(self.copysource)[:2]
701 data, mode = store.getfile(self.copysource)[:2]
702 if data is not None:
702 if data is not None:
703 self.exists = self.copysource is None or backend.exists(self.fname)
703 self.exists = self.copysource is None or backend.exists(self.fname)
704 self.missing = False
704 self.missing = False
705 if data:
705 if data:
706 self.lines = mdiff.splitnewlines(data)
706 self.lines = mdiff.splitnewlines(data)
707 if self.mode is None:
707 if self.mode is None:
708 self.mode = mode
708 self.mode = mode
709 if self.lines:
709 if self.lines:
710 # Normalize line endings
710 # Normalize line endings
711 if self.lines[0].endswith(b'\r\n'):
711 if self.lines[0].endswith(b'\r\n'):
712 self.eol = b'\r\n'
712 self.eol = b'\r\n'
713 elif self.lines[0].endswith(b'\n'):
713 elif self.lines[0].endswith(b'\n'):
714 self.eol = b'\n'
714 self.eol = b'\n'
715 if eolmode != b'strict':
715 if eolmode != b'strict':
716 nlines = []
716 nlines = []
717 for l in self.lines:
717 for l in self.lines:
718 if l.endswith(b'\r\n'):
718 if l.endswith(b'\r\n'):
719 l = l[:-2] + b'\n'
719 l = l[:-2] + b'\n'
720 nlines.append(l)
720 nlines.append(l)
721 self.lines = nlines
721 self.lines = nlines
722 else:
722 else:
723 if self.create:
723 if self.create:
724 self.missing = False
724 self.missing = False
725 if self.mode is None:
725 if self.mode is None:
726 self.mode = (False, False)
726 self.mode = (False, False)
727 if self.missing:
727 if self.missing:
728 self.ui.warn(_(b"unable to find '%s' for patching\n") % self.fname)
728 self.ui.warn(_(b"unable to find '%s' for patching\n") % self.fname)
729 self.ui.warn(
729 self.ui.warn(
730 _(
730 _(
731 b"(use '--prefix' to apply patch relative to the "
731 b"(use '--prefix' to apply patch relative to the "
732 b"current directory)\n"
732 b"current directory)\n"
733 )
733 )
734 )
734 )
735
735
736 self.hash = {}
736 self.hash = {}
737 self.dirty = 0
737 self.dirty = 0
738 self.offset = 0
738 self.offset = 0
739 self.skew = 0
739 self.skew = 0
740 self.rej = []
740 self.rej = []
741 self.fileprinted = False
741 self.fileprinted = False
742 self.printfile(False)
742 self.printfile(False)
743 self.hunks = 0
743 self.hunks = 0
744
744
745 def writelines(self, fname, lines, mode):
745 def writelines(self, fname, lines, mode):
746 if self.eolmode == b'auto':
746 if self.eolmode == b'auto':
747 eol = self.eol
747 eol = self.eol
748 elif self.eolmode == b'crlf':
748 elif self.eolmode == b'crlf':
749 eol = b'\r\n'
749 eol = b'\r\n'
750 else:
750 else:
751 eol = b'\n'
751 eol = b'\n'
752
752
753 if self.eolmode != b'strict' and eol and eol != b'\n':
753 if self.eolmode != b'strict' and eol and eol != b'\n':
754 rawlines = []
754 rawlines = []
755 for l in lines:
755 for l in lines:
756 if l and l.endswith(b'\n'):
756 if l and l.endswith(b'\n'):
757 l = l[:-1] + eol
757 l = l[:-1] + eol
758 rawlines.append(l)
758 rawlines.append(l)
759 lines = rawlines
759 lines = rawlines
760
760
761 self.backend.setfile(fname, b''.join(lines), mode, self.copysource)
761 self.backend.setfile(fname, b''.join(lines), mode, self.copysource)
762
762
763 def printfile(self, warn):
763 def printfile(self, warn):
764 if self.fileprinted:
764 if self.fileprinted:
765 return
765 return
766 if warn or self.ui.verbose:
766 if warn or self.ui.verbose:
767 self.fileprinted = True
767 self.fileprinted = True
768 s = _(b"patching file %s\n") % self.fname
768 s = _(b"patching file %s\n") % self.fname
769 if warn:
769 if warn:
770 self.ui.warn(s)
770 self.ui.warn(s)
771 else:
771 else:
772 self.ui.note(s)
772 self.ui.note(s)
773
773
774 def findlines(self, l, linenum):
774 def findlines(self, l, linenum):
775 # looks through the hash and finds candidate lines. The
775 # looks through the hash and finds candidate lines. The
776 # result is a list of line numbers sorted based on distance
776 # result is a list of line numbers sorted based on distance
777 # from linenum
777 # from linenum
778
778
779 cand = self.hash.get(l, [])
779 cand = self.hash.get(l, [])
780 if len(cand) > 1:
780 if len(cand) > 1:
781 # resort our list of potentials forward then back.
781 # resort our list of potentials forward then back.
782 cand.sort(key=lambda x: abs(x - linenum))
782 cand.sort(key=lambda x: abs(x - linenum))
783 return cand
783 return cand
784
784
785 def write_rej(self):
785 def write_rej(self):
786 # our rejects are a little different from patch(1). This always
786 # our rejects are a little different from patch(1). This always
787 # creates rejects in the same form as the original patch. A file
787 # creates rejects in the same form as the original patch. A file
788 # header is inserted so that you can run the reject through patch again
788 # header is inserted so that you can run the reject through patch again
789 # without having to type the filename.
789 # without having to type the filename.
790 if not self.rej:
790 if not self.rej:
791 return
791 return
792 base = os.path.basename(self.fname)
792 base = os.path.basename(self.fname)
793 lines = [b"--- %s\n+++ %s\n" % (base, base)]
793 lines = [b"--- %s\n+++ %s\n" % (base, base)]
794 for x in self.rej:
794 for x in self.rej:
795 for l in x.hunk:
795 for l in x.hunk:
796 lines.append(l)
796 lines.append(l)
797 if l[-1:] != b'\n':
797 if l[-1:] != b'\n':
798 lines.append(b'\n' + diffhelper.MISSING_NEWLINE_MARKER)
798 lines.append(b'\n' + diffhelper.MISSING_NEWLINE_MARKER)
799 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
799 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
800
800
801 def apply(self, h):
801 def apply(self, h):
802 if not h.complete():
802 if not h.complete():
803 raise PatchParseError(
803 raise PatchParseError(
804 _(b"bad hunk #%d %s (%d %d %d %d)")
804 _(b"bad hunk #%d %s (%d %d %d %d)")
805 % (h.number, h.desc, len(h.a), h.lena, len(h.b), h.lenb)
805 % (h.number, h.desc, len(h.a), h.lena, len(h.b), h.lenb)
806 )
806 )
807
807
808 self.hunks += 1
808 self.hunks += 1
809
809
810 if self.missing:
810 if self.missing:
811 self.rej.append(h)
811 self.rej.append(h)
812 return -1
812 return -1
813
813
814 if self.exists and self.create:
814 if self.exists and self.create:
815 if self.copysource:
815 if self.copysource:
816 self.ui.warn(
816 self.ui.warn(
817 _(b"cannot create %s: destination already exists\n")
817 _(b"cannot create %s: destination already exists\n")
818 % self.fname
818 % self.fname
819 )
819 )
820 else:
820 else:
821 self.ui.warn(_(b"file %s already exists\n") % self.fname)
821 self.ui.warn(_(b"file %s already exists\n") % self.fname)
822 self.rej.append(h)
822 self.rej.append(h)
823 return -1
823 return -1
824
824
825 if isinstance(h, binhunk):
825 if isinstance(h, binhunk):
826 if self.remove:
826 if self.remove:
827 self.backend.unlink(self.fname)
827 self.backend.unlink(self.fname)
828 else:
828 else:
829 l = h.new(self.lines)
829 l = h.new(self.lines)
830 self.lines[:] = l
830 self.lines[:] = l
831 self.offset += len(l)
831 self.offset += len(l)
832 self.dirty = True
832 self.dirty = True
833 return 0
833 return 0
834
834
835 horig = h
835 horig = h
836 if (
836 if (
837 self.eolmode in (b'crlf', b'lf')
837 self.eolmode in (b'crlf', b'lf')
838 or self.eolmode == b'auto'
838 or self.eolmode == b'auto'
839 and self.eol
839 and self.eol
840 ):
840 ):
841 # If new eols are going to be normalized, then normalize
841 # If new eols are going to be normalized, then normalize
842 # hunk data before patching. Otherwise, preserve input
842 # hunk data before patching. Otherwise, preserve input
843 # line-endings.
843 # line-endings.
844 h = h.getnormalized()
844 h = h.getnormalized()
845
845
846 # fast case first, no offsets, no fuzz
846 # fast case first, no offsets, no fuzz
847 old, oldstart, new, newstart = h.fuzzit(0, False)
847 old, oldstart, new, newstart = h.fuzzit(0, False)
848 oldstart += self.offset
848 oldstart += self.offset
849 orig_start = oldstart
849 orig_start = oldstart
850 # if there's skew we want to emit the "(offset %d lines)" even
850 # if there's skew we want to emit the "(offset %d lines)" even
851 # when the hunk cleanly applies at start + skew, so skip the
851 # when the hunk cleanly applies at start + skew, so skip the
852 # fast case code
852 # fast case code
853 if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart):
853 if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart):
854 if self.remove:
854 if self.remove:
855 self.backend.unlink(self.fname)
855 self.backend.unlink(self.fname)
856 else:
856 else:
857 self.lines[oldstart : oldstart + len(old)] = new
857 self.lines[oldstart : oldstart + len(old)] = new
858 self.offset += len(new) - len(old)
858 self.offset += len(new) - len(old)
859 self.dirty = True
859 self.dirty = True
860 return 0
860 return 0
861
861
862 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
862 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
863 self.hash = {}
863 self.hash = {}
864 for x, s in enumerate(self.lines):
864 for x, s in enumerate(self.lines):
865 self.hash.setdefault(s, []).append(x)
865 self.hash.setdefault(s, []).append(x)
866
866
867 for fuzzlen in pycompat.xrange(
867 for fuzzlen in pycompat.xrange(
868 self.ui.configint(b"patch", b"fuzz") + 1
868 self.ui.configint(b"patch", b"fuzz") + 1
869 ):
869 ):
870 for toponly in [True, False]:
870 for toponly in [True, False]:
871 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
871 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
872 oldstart = oldstart + self.offset + self.skew
872 oldstart = oldstart + self.offset + self.skew
873 oldstart = min(oldstart, len(self.lines))
873 oldstart = min(oldstart, len(self.lines))
874 if old:
874 if old:
875 cand = self.findlines(old[0][1:], oldstart)
875 cand = self.findlines(old[0][1:], oldstart)
876 else:
876 else:
877 # Only adding lines with no or fuzzed context, just
877 # Only adding lines with no or fuzzed context, just
878 # take the skew in account
878 # take the skew in account
879 cand = [oldstart]
879 cand = [oldstart]
880
880
881 for l in cand:
881 for l in cand:
882 if not old or diffhelper.testhunk(old, self.lines, l):
882 if not old or diffhelper.testhunk(old, self.lines, l):
883 self.lines[l : l + len(old)] = new
883 self.lines[l : l + len(old)] = new
884 self.offset += len(new) - len(old)
884 self.offset += len(new) - len(old)
885 self.skew = l - orig_start
885 self.skew = l - orig_start
886 self.dirty = True
886 self.dirty = True
887 offset = l - orig_start - fuzzlen
887 offset = l - orig_start - fuzzlen
888 if fuzzlen:
888 if fuzzlen:
889 msg = _(
889 msg = _(
890 b"Hunk #%d succeeded at %d "
890 b"Hunk #%d succeeded at %d "
891 b"with fuzz %d "
891 b"with fuzz %d "
892 b"(offset %d lines).\n"
892 b"(offset %d lines).\n"
893 )
893 )
894 self.printfile(True)
894 self.printfile(True)
895 self.ui.warn(
895 self.ui.warn(
896 msg % (h.number, l + 1, fuzzlen, offset)
896 msg % (h.number, l + 1, fuzzlen, offset)
897 )
897 )
898 else:
898 else:
899 msg = _(
899 msg = _(
900 b"Hunk #%d succeeded at %d "
900 b"Hunk #%d succeeded at %d "
901 b"(offset %d lines).\n"
901 b"(offset %d lines).\n"
902 )
902 )
903 self.ui.note(msg % (h.number, l + 1, offset))
903 self.ui.note(msg % (h.number, l + 1, offset))
904 return fuzzlen
904 return fuzzlen
905 self.printfile(True)
905 self.printfile(True)
906 self.ui.warn(_(b"Hunk #%d FAILED at %d\n") % (h.number, orig_start))
906 self.ui.warn(_(b"Hunk #%d FAILED at %d\n") % (h.number, orig_start))
907 self.rej.append(horig)
907 self.rej.append(horig)
908 return -1
908 return -1
909
909
910 def close(self):
910 def close(self):
911 if self.dirty:
911 if self.dirty:
912 self.writelines(self.fname, self.lines, self.mode)
912 self.writelines(self.fname, self.lines, self.mode)
913 self.write_rej()
913 self.write_rej()
914 return len(self.rej)
914 return len(self.rej)
915
915
916
916
917 class header(object):
917 class header(object):
918 """patch header"""
918 """patch header"""
919
919
920 diffgit_re = re.compile(b'diff --git a/(.*) b/(.*)$')
920 diffgit_re = re.compile(b'diff --git a/(.*) b/(.*)$')
921 diff_re = re.compile(b'diff -r .* (.*)$')
921 diff_re = re.compile(b'diff -r .* (.*)$')
922 allhunks_re = re.compile(b'(?:index|deleted file) ')
922 allhunks_re = re.compile(b'(?:index|deleted file) ')
923 pretty_re = re.compile(b'(?:new file|deleted file) ')
923 pretty_re = re.compile(b'(?:new file|deleted file) ')
924 special_re = re.compile(b'(?:index|deleted|copy|rename|new mode) ')
924 special_re = re.compile(b'(?:index|deleted|copy|rename|new mode) ')
925 newfile_re = re.compile(b'(?:new file|copy to|rename to)')
925 newfile_re = re.compile(b'(?:new file|copy to|rename to)')
926
926
927 def __init__(self, header):
927 def __init__(self, header):
928 self.header = header
928 self.header = header
929 self.hunks = []
929 self.hunks = []
930
930
931 def binary(self):
931 def binary(self):
932 return any(h.startswith(b'index ') for h in self.header)
932 return any(h.startswith(b'index ') for h in self.header)
933
933
934 def pretty(self, fp):
934 def pretty(self, fp):
935 for h in self.header:
935 for h in self.header:
936 if h.startswith(b'index '):
936 if h.startswith(b'index '):
937 fp.write(_(b'this modifies a binary file (all or nothing)\n'))
937 fp.write(_(b'this modifies a binary file (all or nothing)\n'))
938 break
938 break
939 if self.pretty_re.match(h):
939 if self.pretty_re.match(h):
940 fp.write(h)
940 fp.write(h)
941 if self.binary():
941 if self.binary():
942 fp.write(_(b'this is a binary file\n'))
942 fp.write(_(b'this is a binary file\n'))
943 break
943 break
944 if h.startswith(b'---'):
944 if h.startswith(b'---'):
945 fp.write(
945 fp.write(
946 _(b'%d hunks, %d lines changed\n')
946 _(b'%d hunks, %d lines changed\n')
947 % (
947 % (
948 len(self.hunks),
948 len(self.hunks),
949 sum([max(h.added, h.removed) for h in self.hunks]),
949 sum([max(h.added, h.removed) for h in self.hunks]),
950 )
950 )
951 )
951 )
952 break
952 break
953 fp.write(h)
953 fp.write(h)
954
954
955 def write(self, fp):
955 def write(self, fp):
956 fp.write(b''.join(self.header))
956 fp.write(b''.join(self.header))
957
957
958 def allhunks(self):
958 def allhunks(self):
959 return any(self.allhunks_re.match(h) for h in self.header)
959 return any(self.allhunks_re.match(h) for h in self.header)
960
960
961 def files(self):
961 def files(self):
962 match = self.diffgit_re.match(self.header[0])
962 match = self.diffgit_re.match(self.header[0])
963 if match:
963 if match:
964 fromfile, tofile = match.groups()
964 fromfile, tofile = match.groups()
965 if fromfile == tofile:
965 if fromfile == tofile:
966 return [fromfile]
966 return [fromfile]
967 return [fromfile, tofile]
967 return [fromfile, tofile]
968 else:
968 else:
969 return self.diff_re.match(self.header[0]).groups()
969 return self.diff_re.match(self.header[0]).groups()
970
970
971 def filename(self):
971 def filename(self):
972 return self.files()[-1]
972 return self.files()[-1]
973
973
974 def __repr__(self):
974 def __repr__(self):
975 return '<header %s>' % (
975 return '<header %s>' % (
976 ' '.join(pycompat.rapply(pycompat.fsdecode, self.files()))
976 ' '.join(pycompat.rapply(pycompat.fsdecode, self.files()))
977 )
977 )
978
978
979 def isnewfile(self):
979 def isnewfile(self):
980 return any(self.newfile_re.match(h) for h in self.header)
980 return any(self.newfile_re.match(h) for h in self.header)
981
981
982 def special(self):
982 def special(self):
983 # Special files are shown only at the header level and not at the hunk
983 # Special files are shown only at the header level and not at the hunk
984 # level for example a file that has been deleted is a special file.
984 # level for example a file that has been deleted is a special file.
985 # The user cannot change the content of the operation, in the case of
985 # The user cannot change the content of the operation, in the case of
986 # the deleted file he has to take the deletion or not take it, he
986 # the deleted file he has to take the deletion or not take it, he
987 # cannot take some of it.
987 # cannot take some of it.
988 # Newly added files are special if they are empty, they are not special
988 # Newly added files are special if they are empty, they are not special
989 # if they have some content as we want to be able to change it
989 # if they have some content as we want to be able to change it
990 nocontent = len(self.header) == 2
990 nocontent = len(self.header) == 2
991 emptynewfile = self.isnewfile() and nocontent
991 emptynewfile = self.isnewfile() and nocontent
992 return emptynewfile or any(
992 return emptynewfile or any(
993 self.special_re.match(h) for h in self.header
993 self.special_re.match(h) for h in self.header
994 )
994 )
995
995
996
996
997 class recordhunk(object):
997 class recordhunk(object):
998 """patch hunk
998 """patch hunk
999
999
1000 XXX shouldn't we merge this with the other hunk class?
1000 XXX shouldn't we merge this with the other hunk class?
1001 """
1001 """
1002
1002
1003 def __init__(
1003 def __init__(
1004 self,
1004 self,
1005 header,
1005 header,
1006 fromline,
1006 fromline,
1007 toline,
1007 toline,
1008 proc,
1008 proc,
1009 before,
1009 before,
1010 hunk,
1010 hunk,
1011 after,
1011 after,
1012 maxcontext=None,
1012 maxcontext=None,
1013 ):
1013 ):
1014 def trimcontext(lines, reverse=False):
1014 def trimcontext(lines, reverse=False):
1015 if maxcontext is not None:
1015 if maxcontext is not None:
1016 delta = len(lines) - maxcontext
1016 delta = len(lines) - maxcontext
1017 if delta > 0:
1017 if delta > 0:
1018 if reverse:
1018 if reverse:
1019 return delta, lines[delta:]
1019 return delta, lines[delta:]
1020 else:
1020 else:
1021 return delta, lines[:maxcontext]
1021 return delta, lines[:maxcontext]
1022 return 0, lines
1022 return 0, lines
1023
1023
1024 self.header = header
1024 self.header = header
1025 trimedbefore, self.before = trimcontext(before, True)
1025 trimedbefore, self.before = trimcontext(before, True)
1026 self.fromline = fromline + trimedbefore
1026 self.fromline = fromline + trimedbefore
1027 self.toline = toline + trimedbefore
1027 self.toline = toline + trimedbefore
1028 _trimedafter, self.after = trimcontext(after, False)
1028 _trimedafter, self.after = trimcontext(after, False)
1029 self.proc = proc
1029 self.proc = proc
1030 self.hunk = hunk
1030 self.hunk = hunk
1031 self.added, self.removed = self.countchanges(self.hunk)
1031 self.added, self.removed = self.countchanges(self.hunk)
1032
1032
1033 def __eq__(self, v):
1033 def __eq__(self, v):
1034 if not isinstance(v, recordhunk):
1034 if not isinstance(v, recordhunk):
1035 return False
1035 return False
1036
1036
1037 return (
1037 return (
1038 (v.hunk == self.hunk)
1038 (v.hunk == self.hunk)
1039 and (v.proc == self.proc)
1039 and (v.proc == self.proc)
1040 and (self.fromline == v.fromline)
1040 and (self.fromline == v.fromline)
1041 and (self.header.files() == v.header.files())
1041 and (self.header.files() == v.header.files())
1042 )
1042 )
1043
1043
1044 def __hash__(self):
1044 def __hash__(self):
1045 return hash(
1045 return hash(
1046 (
1046 (
1047 tuple(self.hunk),
1047 tuple(self.hunk),
1048 tuple(self.header.files()),
1048 tuple(self.header.files()),
1049 self.fromline,
1049 self.fromline,
1050 self.proc,
1050 self.proc,
1051 )
1051 )
1052 )
1052 )
1053
1053
1054 def countchanges(self, hunk):
1054 def countchanges(self, hunk):
1055 """hunk -> (n+,n-)"""
1055 """hunk -> (n+,n-)"""
1056 add = len([h for h in hunk if h.startswith(b'+')])
1056 add = len([h for h in hunk if h.startswith(b'+')])
1057 rem = len([h for h in hunk if h.startswith(b'-')])
1057 rem = len([h for h in hunk if h.startswith(b'-')])
1058 return add, rem
1058 return add, rem
1059
1059
1060 def reversehunk(self):
1060 def reversehunk(self):
1061 """return another recordhunk which is the reverse of the hunk
1061 """return another recordhunk which is the reverse of the hunk
1062
1062
1063 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
1063 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
1064 that, swap fromline/toline and +/- signs while keep other things
1064 that, swap fromline/toline and +/- signs while keep other things
1065 unchanged.
1065 unchanged.
1066 """
1066 """
1067 m = {b'+': b'-', b'-': b'+', b'\\': b'\\'}
1067 m = {b'+': b'-', b'-': b'+', b'\\': b'\\'}
1068 hunk = [b'%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
1068 hunk = [b'%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
1069 return recordhunk(
1069 return recordhunk(
1070 self.header,
1070 self.header,
1071 self.toline,
1071 self.toline,
1072 self.fromline,
1072 self.fromline,
1073 self.proc,
1073 self.proc,
1074 self.before,
1074 self.before,
1075 hunk,
1075 hunk,
1076 self.after,
1076 self.after,
1077 )
1077 )
1078
1078
1079 def write(self, fp):
1079 def write(self, fp):
1080 delta = len(self.before) + len(self.after)
1080 delta = len(self.before) + len(self.after)
1081 if self.after and self.after[-1] == diffhelper.MISSING_NEWLINE_MARKER:
1081 if self.after and self.after[-1] == diffhelper.MISSING_NEWLINE_MARKER:
1082 delta -= 1
1082 delta -= 1
1083 fromlen = delta + self.removed
1083 fromlen = delta + self.removed
1084 tolen = delta + self.added
1084 tolen = delta + self.added
1085 fp.write(
1085 fp.write(
1086 b'@@ -%d,%d +%d,%d @@%s\n'
1086 b'@@ -%d,%d +%d,%d @@%s\n'
1087 % (
1087 % (
1088 self.fromline,
1088 self.fromline,
1089 fromlen,
1089 fromlen,
1090 self.toline,
1090 self.toline,
1091 tolen,
1091 tolen,
1092 self.proc and (b' ' + self.proc),
1092 self.proc and (b' ' + self.proc),
1093 )
1093 )
1094 )
1094 )
1095 fp.write(b''.join(self.before + self.hunk + self.after))
1095 fp.write(b''.join(self.before + self.hunk + self.after))
1096
1096
1097 pretty = write
1097 pretty = write
1098
1098
1099 def filename(self):
1099 def filename(self):
1100 return self.header.filename()
1100 return self.header.filename()
1101
1101
1102 @encoding.strmethod
1102 @encoding.strmethod
1103 def __repr__(self):
1103 def __repr__(self):
1104 return b'<hunk %r@%d>' % (self.filename(), self.fromline)
1104 return b'<hunk %r@%d>' % (self.filename(), self.fromline)
1105
1105
1106
1106
1107 def getmessages():
1107 def getmessages():
1108 return {
1108 return {
1109 b'multiple': {
1109 b'multiple': {
1110 b'apply': _(b"apply change %d/%d to '%s'?"),
1110 b'apply': _(b"apply change %d/%d to '%s'?"),
1111 b'discard': _(b"discard change %d/%d to '%s'?"),
1111 b'discard': _(b"discard change %d/%d to '%s'?"),
1112 b'keep': _(b"keep change %d/%d to '%s'?"),
1112 b'keep': _(b"keep change %d/%d to '%s'?"),
1113 b'record': _(b"record change %d/%d to '%s'?"),
1113 b'record': _(b"record change %d/%d to '%s'?"),
1114 },
1114 },
1115 b'single': {
1115 b'single': {
1116 b'apply': _(b"apply this change to '%s'?"),
1116 b'apply': _(b"apply this change to '%s'?"),
1117 b'discard': _(b"discard this change to '%s'?"),
1117 b'discard': _(b"discard this change to '%s'?"),
1118 b'keep': _(b"keep this change to '%s'?"),
1118 b'keep': _(b"keep this change to '%s'?"),
1119 b'record': _(b"record this change to '%s'?"),
1119 b'record': _(b"record this change to '%s'?"),
1120 },
1120 },
1121 b'help': {
1121 b'help': {
1122 b'apply': _(
1122 b'apply': _(
1123 b'[Ynesfdaq?]'
1123 b'[Ynesfdaq?]'
1124 b'$$ &Yes, apply this change'
1124 b'$$ &Yes, apply this change'
1125 b'$$ &No, skip this change'
1125 b'$$ &No, skip this change'
1126 b'$$ &Edit this change manually'
1126 b'$$ &Edit this change manually'
1127 b'$$ &Skip remaining changes to this file'
1127 b'$$ &Skip remaining changes to this file'
1128 b'$$ Apply remaining changes to this &file'
1128 b'$$ Apply remaining changes to this &file'
1129 b'$$ &Done, skip remaining changes and files'
1129 b'$$ &Done, skip remaining changes and files'
1130 b'$$ Apply &all changes to all remaining files'
1130 b'$$ Apply &all changes to all remaining files'
1131 b'$$ &Quit, applying no changes'
1131 b'$$ &Quit, applying no changes'
1132 b'$$ &? (display help)'
1132 b'$$ &? (display help)'
1133 ),
1133 ),
1134 b'discard': _(
1134 b'discard': _(
1135 b'[Ynesfdaq?]'
1135 b'[Ynesfdaq?]'
1136 b'$$ &Yes, discard this change'
1136 b'$$ &Yes, discard this change'
1137 b'$$ &No, skip this change'
1137 b'$$ &No, skip this change'
1138 b'$$ &Edit this change manually'
1138 b'$$ &Edit this change manually'
1139 b'$$ &Skip remaining changes to this file'
1139 b'$$ &Skip remaining changes to this file'
1140 b'$$ Discard remaining changes to this &file'
1140 b'$$ Discard remaining changes to this &file'
1141 b'$$ &Done, skip remaining changes and files'
1141 b'$$ &Done, skip remaining changes and files'
1142 b'$$ Discard &all changes to all remaining files'
1142 b'$$ Discard &all changes to all remaining files'
1143 b'$$ &Quit, discarding no changes'
1143 b'$$ &Quit, discarding no changes'
1144 b'$$ &? (display help)'
1144 b'$$ &? (display help)'
1145 ),
1145 ),
1146 b'keep': _(
1146 b'keep': _(
1147 b'[Ynesfdaq?]'
1147 b'[Ynesfdaq?]'
1148 b'$$ &Yes, keep this change'
1148 b'$$ &Yes, keep this change'
1149 b'$$ &No, skip this change'
1149 b'$$ &No, skip this change'
1150 b'$$ &Edit this change manually'
1150 b'$$ &Edit this change manually'
1151 b'$$ &Skip remaining changes to this file'
1151 b'$$ &Skip remaining changes to this file'
1152 b'$$ Keep remaining changes to this &file'
1152 b'$$ Keep remaining changes to this &file'
1153 b'$$ &Done, skip remaining changes and files'
1153 b'$$ &Done, skip remaining changes and files'
1154 b'$$ Keep &all changes to all remaining files'
1154 b'$$ Keep &all changes to all remaining files'
1155 b'$$ &Quit, keeping all changes'
1155 b'$$ &Quit, keeping all changes'
1156 b'$$ &? (display help)'
1156 b'$$ &? (display help)'
1157 ),
1157 ),
1158 b'record': _(
1158 b'record': _(
1159 b'[Ynesfdaq?]'
1159 b'[Ynesfdaq?]'
1160 b'$$ &Yes, record this change'
1160 b'$$ &Yes, record this change'
1161 b'$$ &No, skip this change'
1161 b'$$ &No, skip this change'
1162 b'$$ &Edit this change manually'
1162 b'$$ &Edit this change manually'
1163 b'$$ &Skip remaining changes to this file'
1163 b'$$ &Skip remaining changes to this file'
1164 b'$$ Record remaining changes to this &file'
1164 b'$$ Record remaining changes to this &file'
1165 b'$$ &Done, skip remaining changes and files'
1165 b'$$ &Done, skip remaining changes and files'
1166 b'$$ Record &all changes to all remaining files'
1166 b'$$ Record &all changes to all remaining files'
1167 b'$$ &Quit, recording no changes'
1167 b'$$ &Quit, recording no changes'
1168 b'$$ &? (display help)'
1168 b'$$ &? (display help)'
1169 ),
1169 ),
1170 },
1170 },
1171 }
1171 }
1172
1172
1173
1173
1174 def filterpatch(ui, headers, match, operation=None):
1174 def filterpatch(ui, headers, match, operation=None):
1175 """Interactively filter patch chunks into applied-only chunks"""
1175 """Interactively filter patch chunks into applied-only chunks"""
1176 messages = getmessages()
1176 messages = getmessages()
1177
1177
1178 if operation is None:
1178 if operation is None:
1179 operation = b'record'
1179 operation = b'record'
1180
1180
1181 def prompt(skipfile, skipall, query, chunk):
1181 def prompt(skipfile, skipall, query, chunk):
1182 """prompt query, and process base inputs
1182 """prompt query, and process base inputs
1183
1183
1184 - y/n for the rest of file
1184 - y/n for the rest of file
1185 - y/n for the rest
1185 - y/n for the rest
1186 - ? (help)
1186 - ? (help)
1187 - q (quit)
1187 - q (quit)
1188
1188
1189 Return True/False and possibly updated skipfile and skipall.
1189 Return True/False and possibly updated skipfile and skipall.
1190 """
1190 """
1191 newpatches = None
1191 newpatches = None
1192 if skipall is not None:
1192 if skipall is not None:
1193 return skipall, skipfile, skipall, newpatches
1193 return skipall, skipfile, skipall, newpatches
1194 if skipfile is not None:
1194 if skipfile is not None:
1195 return skipfile, skipfile, skipall, newpatches
1195 return skipfile, skipfile, skipall, newpatches
1196 while True:
1196 while True:
1197 resps = messages[b'help'][operation]
1197 resps = messages[b'help'][operation]
1198 # IMPORTANT: keep the last line of this prompt short (<40 english
1198 # IMPORTANT: keep the last line of this prompt short (<40 english
1199 # chars is a good target) because of issue6158.
1199 # chars is a good target) because of issue6158.
1200 r = ui.promptchoice(b"%s\n(enter ? for help) %s" % (query, resps))
1200 r = ui.promptchoice(b"%s\n(enter ? for help) %s" % (query, resps))
1201 ui.write(b"\n")
1201 ui.write(b"\n")
1202 if r == 8: # ?
1202 if r == 8: # ?
1203 for c, t in ui.extractchoices(resps)[1]:
1203 for c, t in ui.extractchoices(resps)[1]:
1204 ui.write(b'%s - %s\n' % (c, encoding.lower(t)))
1204 ui.write(b'%s - %s\n' % (c, encoding.lower(t)))
1205 continue
1205 continue
1206 elif r == 0: # yes
1206 elif r == 0: # yes
1207 ret = True
1207 ret = True
1208 elif r == 1: # no
1208 elif r == 1: # no
1209 ret = False
1209 ret = False
1210 elif r == 2: # Edit patch
1210 elif r == 2: # Edit patch
1211 if chunk is None:
1211 if chunk is None:
1212 ui.write(_(b'cannot edit patch for whole file'))
1212 ui.write(_(b'cannot edit patch for whole file'))
1213 ui.write(b"\n")
1213 ui.write(b"\n")
1214 continue
1214 continue
1215 if chunk.header.binary():
1215 if chunk.header.binary():
1216 ui.write(_(b'cannot edit patch for binary file'))
1216 ui.write(_(b'cannot edit patch for binary file'))
1217 ui.write(b"\n")
1217 ui.write(b"\n")
1218 continue
1218 continue
1219 # Patch comment based on the Git one (based on comment at end of
1219 # Patch comment based on the Git one (based on comment at end of
1220 # https://mercurial-scm.org/wiki/RecordExtension)
1220 # https://mercurial-scm.org/wiki/RecordExtension)
1221 phelp = b'---' + _(
1221 phelp = b'---' + _(
1222 b"""
1222 b"""
1223 To remove '-' lines, make them ' ' lines (context).
1223 To remove '-' lines, make them ' ' lines (context).
1224 To remove '+' lines, delete them.
1224 To remove '+' lines, delete them.
1225 Lines starting with # will be removed from the patch.
1225 Lines starting with # will be removed from the patch.
1226
1226
1227 If the patch applies cleanly, the edited hunk will immediately be
1227 If the patch applies cleanly, the edited hunk will immediately be
1228 added to the record list. If it does not apply cleanly, a rejects
1228 added to the record list. If it does not apply cleanly, a rejects
1229 file will be generated: you can use that when you try again. If
1229 file will be generated: you can use that when you try again. If
1230 all lines of the hunk are removed, then the edit is aborted and
1230 all lines of the hunk are removed, then the edit is aborted and
1231 the hunk is left unchanged.
1231 the hunk is left unchanged.
1232 """
1232 """
1233 )
1233 )
1234 (patchfd, patchfn) = pycompat.mkstemp(
1234 (patchfd, patchfn) = pycompat.mkstemp(
1235 prefix=b"hg-editor-", suffix=b".diff"
1235 prefix=b"hg-editor-", suffix=b".diff"
1236 )
1236 )
1237 ncpatchfp = None
1237 ncpatchfp = None
1238 try:
1238 try:
1239 # Write the initial patch
1239 # Write the initial patch
1240 f = util.nativeeolwriter(os.fdopen(patchfd, 'wb'))
1240 f = util.nativeeolwriter(os.fdopen(patchfd, 'wb'))
1241 chunk.header.write(f)
1241 chunk.header.write(f)
1242 chunk.write(f)
1242 chunk.write(f)
1243 f.write(
1243 f.write(
1244 b''.join(
1244 b''.join(
1245 [b'# ' + i + b'\n' for i in phelp.splitlines()]
1245 [b'# ' + i + b'\n' for i in phelp.splitlines()]
1246 )
1246 )
1247 )
1247 )
1248 f.close()
1248 f.close()
1249 # Start the editor and wait for it to complete
1249 # Start the editor and wait for it to complete
1250 editor = ui.geteditor()
1250 editor = ui.geteditor()
1251 ret = ui.system(
1251 ret = ui.system(
1252 b"%s \"%s\"" % (editor, patchfn),
1252 b"%s \"%s\"" % (editor, patchfn),
1253 environ={b'HGUSER': ui.username()},
1253 environ={b'HGUSER': ui.username()},
1254 blockedtag=b'filterpatch',
1254 blockedtag=b'filterpatch',
1255 )
1255 )
1256 if ret != 0:
1256 if ret != 0:
1257 ui.warn(_(b"editor exited with exit code %d\n") % ret)
1257 ui.warn(_(b"editor exited with exit code %d\n") % ret)
1258 continue
1258 continue
1259 # Remove comment lines
1259 # Remove comment lines
1260 patchfp = open(patchfn, 'rb')
1260 patchfp = open(patchfn, 'rb')
1261 ncpatchfp = stringio()
1261 ncpatchfp = stringio()
1262 for line in util.iterfile(patchfp):
1262 for line in patchfp:
1263 line = util.fromnativeeol(line)
1263 line = util.fromnativeeol(line)
1264 if not line.startswith(b'#'):
1264 if not line.startswith(b'#'):
1265 ncpatchfp.write(line)
1265 ncpatchfp.write(line)
1266 patchfp.close()
1266 patchfp.close()
1267 ncpatchfp.seek(0)
1267 ncpatchfp.seek(0)
1268 newpatches = parsepatch(ncpatchfp)
1268 newpatches = parsepatch(ncpatchfp)
1269 finally:
1269 finally:
1270 os.unlink(patchfn)
1270 os.unlink(patchfn)
1271 del ncpatchfp
1271 del ncpatchfp
1272 # Signal that the chunk shouldn't be applied as-is, but
1272 # Signal that the chunk shouldn't be applied as-is, but
1273 # provide the new patch to be used instead.
1273 # provide the new patch to be used instead.
1274 ret = False
1274 ret = False
1275 elif r == 3: # Skip
1275 elif r == 3: # Skip
1276 ret = skipfile = False
1276 ret = skipfile = False
1277 elif r == 4: # file (Record remaining)
1277 elif r == 4: # file (Record remaining)
1278 ret = skipfile = True
1278 ret = skipfile = True
1279 elif r == 5: # done, skip remaining
1279 elif r == 5: # done, skip remaining
1280 ret = skipall = False
1280 ret = skipall = False
1281 elif r == 6: # all
1281 elif r == 6: # all
1282 ret = skipall = True
1282 ret = skipall = True
1283 elif r == 7: # quit
1283 elif r == 7: # quit
1284 raise error.CanceledError(_(b'user quit'))
1284 raise error.CanceledError(_(b'user quit'))
1285 return ret, skipfile, skipall, newpatches
1285 return ret, skipfile, skipall, newpatches
1286
1286
1287 seen = set()
1287 seen = set()
1288 applied = {} # 'filename' -> [] of chunks
1288 applied = {} # 'filename' -> [] of chunks
1289 skipfile, skipall = None, None
1289 skipfile, skipall = None, None
1290 pos, total = 1, sum(len(h.hunks) for h in headers)
1290 pos, total = 1, sum(len(h.hunks) for h in headers)
1291 for h in headers:
1291 for h in headers:
1292 pos += len(h.hunks)
1292 pos += len(h.hunks)
1293 skipfile = None
1293 skipfile = None
1294 fixoffset = 0
1294 fixoffset = 0
1295 hdr = b''.join(h.header)
1295 hdr = b''.join(h.header)
1296 if hdr in seen:
1296 if hdr in seen:
1297 continue
1297 continue
1298 seen.add(hdr)
1298 seen.add(hdr)
1299 if skipall is None:
1299 if skipall is None:
1300 h.pretty(ui)
1300 h.pretty(ui)
1301 files = h.files()
1301 files = h.files()
1302 msg = _(b'examine changes to %s?') % _(b' and ').join(
1302 msg = _(b'examine changes to %s?') % _(b' and ').join(
1303 b"'%s'" % f for f in files
1303 b"'%s'" % f for f in files
1304 )
1304 )
1305 if all(match.exact(f) for f in files):
1305 if all(match.exact(f) for f in files):
1306 r, skipall, np = True, None, None
1306 r, skipall, np = True, None, None
1307 else:
1307 else:
1308 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1308 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1309 if not r:
1309 if not r:
1310 continue
1310 continue
1311 applied[h.filename()] = [h]
1311 applied[h.filename()] = [h]
1312 if h.allhunks():
1312 if h.allhunks():
1313 applied[h.filename()] += h.hunks
1313 applied[h.filename()] += h.hunks
1314 continue
1314 continue
1315 for i, chunk in enumerate(h.hunks):
1315 for i, chunk in enumerate(h.hunks):
1316 if skipfile is None and skipall is None:
1316 if skipfile is None and skipall is None:
1317 chunk.pretty(ui)
1317 chunk.pretty(ui)
1318 if total == 1:
1318 if total == 1:
1319 msg = messages[b'single'][operation] % chunk.filename()
1319 msg = messages[b'single'][operation] % chunk.filename()
1320 else:
1320 else:
1321 idx = pos - len(h.hunks) + i
1321 idx = pos - len(h.hunks) + i
1322 msg = messages[b'multiple'][operation] % (
1322 msg = messages[b'multiple'][operation] % (
1323 idx,
1323 idx,
1324 total,
1324 total,
1325 chunk.filename(),
1325 chunk.filename(),
1326 )
1326 )
1327 r, skipfile, skipall, newpatches = prompt(
1327 r, skipfile, skipall, newpatches = prompt(
1328 skipfile, skipall, msg, chunk
1328 skipfile, skipall, msg, chunk
1329 )
1329 )
1330 if r:
1330 if r:
1331 if fixoffset:
1331 if fixoffset:
1332 chunk = copy.copy(chunk)
1332 chunk = copy.copy(chunk)
1333 chunk.toline += fixoffset
1333 chunk.toline += fixoffset
1334 applied[chunk.filename()].append(chunk)
1334 applied[chunk.filename()].append(chunk)
1335 elif newpatches is not None:
1335 elif newpatches is not None:
1336 for newpatch in newpatches:
1336 for newpatch in newpatches:
1337 for newhunk in newpatch.hunks:
1337 for newhunk in newpatch.hunks:
1338 if fixoffset:
1338 if fixoffset:
1339 newhunk.toline += fixoffset
1339 newhunk.toline += fixoffset
1340 applied[newhunk.filename()].append(newhunk)
1340 applied[newhunk.filename()].append(newhunk)
1341 else:
1341 else:
1342 fixoffset += chunk.removed - chunk.added
1342 fixoffset += chunk.removed - chunk.added
1343 return (
1343 return (
1344 sum(
1344 sum(
1345 [h for h in applied.values() if h[0].special() or len(h) > 1],
1345 [h for h in applied.values() if h[0].special() or len(h) > 1],
1346 [],
1346 [],
1347 ),
1347 ),
1348 {},
1348 {},
1349 )
1349 )
1350
1350
1351
1351
1352 class hunk(object):
1352 class hunk(object):
1353 def __init__(self, desc, num, lr, context):
1353 def __init__(self, desc, num, lr, context):
1354 self.number = num
1354 self.number = num
1355 self.desc = desc
1355 self.desc = desc
1356 self.hunk = [desc]
1356 self.hunk = [desc]
1357 self.a = []
1357 self.a = []
1358 self.b = []
1358 self.b = []
1359 self.starta = self.lena = None
1359 self.starta = self.lena = None
1360 self.startb = self.lenb = None
1360 self.startb = self.lenb = None
1361 if lr is not None:
1361 if lr is not None:
1362 if context:
1362 if context:
1363 self.read_context_hunk(lr)
1363 self.read_context_hunk(lr)
1364 else:
1364 else:
1365 self.read_unified_hunk(lr)
1365 self.read_unified_hunk(lr)
1366
1366
1367 def getnormalized(self):
1367 def getnormalized(self):
1368 """Return a copy with line endings normalized to LF."""
1368 """Return a copy with line endings normalized to LF."""
1369
1369
1370 def normalize(lines):
1370 def normalize(lines):
1371 nlines = []
1371 nlines = []
1372 for line in lines:
1372 for line in lines:
1373 if line.endswith(b'\r\n'):
1373 if line.endswith(b'\r\n'):
1374 line = line[:-2] + b'\n'
1374 line = line[:-2] + b'\n'
1375 nlines.append(line)
1375 nlines.append(line)
1376 return nlines
1376 return nlines
1377
1377
1378 # Dummy object, it is rebuilt manually
1378 # Dummy object, it is rebuilt manually
1379 nh = hunk(self.desc, self.number, None, None)
1379 nh = hunk(self.desc, self.number, None, None)
1380 nh.number = self.number
1380 nh.number = self.number
1381 nh.desc = self.desc
1381 nh.desc = self.desc
1382 nh.hunk = self.hunk
1382 nh.hunk = self.hunk
1383 nh.a = normalize(self.a)
1383 nh.a = normalize(self.a)
1384 nh.b = normalize(self.b)
1384 nh.b = normalize(self.b)
1385 nh.starta = self.starta
1385 nh.starta = self.starta
1386 nh.startb = self.startb
1386 nh.startb = self.startb
1387 nh.lena = self.lena
1387 nh.lena = self.lena
1388 nh.lenb = self.lenb
1388 nh.lenb = self.lenb
1389 return nh
1389 return nh
1390
1390
1391 def read_unified_hunk(self, lr):
1391 def read_unified_hunk(self, lr):
1392 m = unidesc.match(self.desc)
1392 m = unidesc.match(self.desc)
1393 if not m:
1393 if not m:
1394 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1394 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1395 self.starta, self.lena, self.startb, self.lenb = m.groups()
1395 self.starta, self.lena, self.startb, self.lenb = m.groups()
1396 if self.lena is None:
1396 if self.lena is None:
1397 self.lena = 1
1397 self.lena = 1
1398 else:
1398 else:
1399 self.lena = int(self.lena)
1399 self.lena = int(self.lena)
1400 if self.lenb is None:
1400 if self.lenb is None:
1401 self.lenb = 1
1401 self.lenb = 1
1402 else:
1402 else:
1403 self.lenb = int(self.lenb)
1403 self.lenb = int(self.lenb)
1404 self.starta = int(self.starta)
1404 self.starta = int(self.starta)
1405 self.startb = int(self.startb)
1405 self.startb = int(self.startb)
1406 try:
1406 try:
1407 diffhelper.addlines(
1407 diffhelper.addlines(
1408 lr, self.hunk, self.lena, self.lenb, self.a, self.b
1408 lr, self.hunk, self.lena, self.lenb, self.a, self.b
1409 )
1409 )
1410 except error.ParseError as e:
1410 except error.ParseError as e:
1411 raise PatchParseError(_(b"bad hunk #%d: %s") % (self.number, e))
1411 raise PatchParseError(_(b"bad hunk #%d: %s") % (self.number, e))
1412 # if we hit eof before finishing out the hunk, the last line will
1412 # if we hit eof before finishing out the hunk, the last line will
1413 # be zero length. Lets try to fix it up.
1413 # be zero length. Lets try to fix it up.
1414 while len(self.hunk[-1]) == 0:
1414 while len(self.hunk[-1]) == 0:
1415 del self.hunk[-1]
1415 del self.hunk[-1]
1416 del self.a[-1]
1416 del self.a[-1]
1417 del self.b[-1]
1417 del self.b[-1]
1418 self.lena -= 1
1418 self.lena -= 1
1419 self.lenb -= 1
1419 self.lenb -= 1
1420 self._fixnewline(lr)
1420 self._fixnewline(lr)
1421
1421
1422 def read_context_hunk(self, lr):
1422 def read_context_hunk(self, lr):
1423 self.desc = lr.readline()
1423 self.desc = lr.readline()
1424 m = contextdesc.match(self.desc)
1424 m = contextdesc.match(self.desc)
1425 if not m:
1425 if not m:
1426 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1426 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1427 self.starta, aend = m.groups()
1427 self.starta, aend = m.groups()
1428 self.starta = int(self.starta)
1428 self.starta = int(self.starta)
1429 if aend is None:
1429 if aend is None:
1430 aend = self.starta
1430 aend = self.starta
1431 self.lena = int(aend) - self.starta
1431 self.lena = int(aend) - self.starta
1432 if self.starta:
1432 if self.starta:
1433 self.lena += 1
1433 self.lena += 1
1434 for x in pycompat.xrange(self.lena):
1434 for x in pycompat.xrange(self.lena):
1435 l = lr.readline()
1435 l = lr.readline()
1436 if l.startswith(b'---'):
1436 if l.startswith(b'---'):
1437 # lines addition, old block is empty
1437 # lines addition, old block is empty
1438 lr.push(l)
1438 lr.push(l)
1439 break
1439 break
1440 s = l[2:]
1440 s = l[2:]
1441 if l.startswith(b'- ') or l.startswith(b'! '):
1441 if l.startswith(b'- ') or l.startswith(b'! '):
1442 u = b'-' + s
1442 u = b'-' + s
1443 elif l.startswith(b' '):
1443 elif l.startswith(b' '):
1444 u = b' ' + s
1444 u = b' ' + s
1445 else:
1445 else:
1446 raise PatchParseError(
1446 raise PatchParseError(
1447 _(b"bad hunk #%d old text line %d") % (self.number, x)
1447 _(b"bad hunk #%d old text line %d") % (self.number, x)
1448 )
1448 )
1449 self.a.append(u)
1449 self.a.append(u)
1450 self.hunk.append(u)
1450 self.hunk.append(u)
1451
1451
1452 l = lr.readline()
1452 l = lr.readline()
1453 if l.startswith(br'\ '):
1453 if l.startswith(br'\ '):
1454 s = self.a[-1][:-1]
1454 s = self.a[-1][:-1]
1455 self.a[-1] = s
1455 self.a[-1] = s
1456 self.hunk[-1] = s
1456 self.hunk[-1] = s
1457 l = lr.readline()
1457 l = lr.readline()
1458 m = contextdesc.match(l)
1458 m = contextdesc.match(l)
1459 if not m:
1459 if not m:
1460 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1460 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1461 self.startb, bend = m.groups()
1461 self.startb, bend = m.groups()
1462 self.startb = int(self.startb)
1462 self.startb = int(self.startb)
1463 if bend is None:
1463 if bend is None:
1464 bend = self.startb
1464 bend = self.startb
1465 self.lenb = int(bend) - self.startb
1465 self.lenb = int(bend) - self.startb
1466 if self.startb:
1466 if self.startb:
1467 self.lenb += 1
1467 self.lenb += 1
1468 hunki = 1
1468 hunki = 1
1469 for x in pycompat.xrange(self.lenb):
1469 for x in pycompat.xrange(self.lenb):
1470 l = lr.readline()
1470 l = lr.readline()
1471 if l.startswith(br'\ '):
1471 if l.startswith(br'\ '):
1472 # XXX: the only way to hit this is with an invalid line range.
1472 # XXX: the only way to hit this is with an invalid line range.
1473 # The no-eol marker is not counted in the line range, but I
1473 # The no-eol marker is not counted in the line range, but I
1474 # guess there are diff(1) out there which behave differently.
1474 # guess there are diff(1) out there which behave differently.
1475 s = self.b[-1][:-1]
1475 s = self.b[-1][:-1]
1476 self.b[-1] = s
1476 self.b[-1] = s
1477 self.hunk[hunki - 1] = s
1477 self.hunk[hunki - 1] = s
1478 continue
1478 continue
1479 if not l:
1479 if not l:
1480 # line deletions, new block is empty and we hit EOF
1480 # line deletions, new block is empty and we hit EOF
1481 lr.push(l)
1481 lr.push(l)
1482 break
1482 break
1483 s = l[2:]
1483 s = l[2:]
1484 if l.startswith(b'+ ') or l.startswith(b'! '):
1484 if l.startswith(b'+ ') or l.startswith(b'! '):
1485 u = b'+' + s
1485 u = b'+' + s
1486 elif l.startswith(b' '):
1486 elif l.startswith(b' '):
1487 u = b' ' + s
1487 u = b' ' + s
1488 elif len(self.b) == 0:
1488 elif len(self.b) == 0:
1489 # line deletions, new block is empty
1489 # line deletions, new block is empty
1490 lr.push(l)
1490 lr.push(l)
1491 break
1491 break
1492 else:
1492 else:
1493 raise PatchParseError(
1493 raise PatchParseError(
1494 _(b"bad hunk #%d old text line %d") % (self.number, x)
1494 _(b"bad hunk #%d old text line %d") % (self.number, x)
1495 )
1495 )
1496 self.b.append(s)
1496 self.b.append(s)
1497 while True:
1497 while True:
1498 if hunki >= len(self.hunk):
1498 if hunki >= len(self.hunk):
1499 h = b""
1499 h = b""
1500 else:
1500 else:
1501 h = self.hunk[hunki]
1501 h = self.hunk[hunki]
1502 hunki += 1
1502 hunki += 1
1503 if h == u:
1503 if h == u:
1504 break
1504 break
1505 elif h.startswith(b'-'):
1505 elif h.startswith(b'-'):
1506 continue
1506 continue
1507 else:
1507 else:
1508 self.hunk.insert(hunki - 1, u)
1508 self.hunk.insert(hunki - 1, u)
1509 break
1509 break
1510
1510
1511 if not self.a:
1511 if not self.a:
1512 # this happens when lines were only added to the hunk
1512 # this happens when lines were only added to the hunk
1513 for x in self.hunk:
1513 for x in self.hunk:
1514 if x.startswith(b'-') or x.startswith(b' '):
1514 if x.startswith(b'-') or x.startswith(b' '):
1515 self.a.append(x)
1515 self.a.append(x)
1516 if not self.b:
1516 if not self.b:
1517 # this happens when lines were only deleted from the hunk
1517 # this happens when lines were only deleted from the hunk
1518 for x in self.hunk:
1518 for x in self.hunk:
1519 if x.startswith(b'+') or x.startswith(b' '):
1519 if x.startswith(b'+') or x.startswith(b' '):
1520 self.b.append(x[1:])
1520 self.b.append(x[1:])
1521 # @@ -start,len +start,len @@
1521 # @@ -start,len +start,len @@
1522 self.desc = b"@@ -%d,%d +%d,%d @@\n" % (
1522 self.desc = b"@@ -%d,%d +%d,%d @@\n" % (
1523 self.starta,
1523 self.starta,
1524 self.lena,
1524 self.lena,
1525 self.startb,
1525 self.startb,
1526 self.lenb,
1526 self.lenb,
1527 )
1527 )
1528 self.hunk[0] = self.desc
1528 self.hunk[0] = self.desc
1529 self._fixnewline(lr)
1529 self._fixnewline(lr)
1530
1530
1531 def _fixnewline(self, lr):
1531 def _fixnewline(self, lr):
1532 l = lr.readline()
1532 l = lr.readline()
1533 if l.startswith(br'\ '):
1533 if l.startswith(br'\ '):
1534 diffhelper.fixnewline(self.hunk, self.a, self.b)
1534 diffhelper.fixnewline(self.hunk, self.a, self.b)
1535 else:
1535 else:
1536 lr.push(l)
1536 lr.push(l)
1537
1537
1538 def complete(self):
1538 def complete(self):
1539 return len(self.a) == self.lena and len(self.b) == self.lenb
1539 return len(self.a) == self.lena and len(self.b) == self.lenb
1540
1540
1541 def _fuzzit(self, old, new, fuzz, toponly):
1541 def _fuzzit(self, old, new, fuzz, toponly):
1542 # this removes context lines from the top and bottom of list 'l'. It
1542 # this removes context lines from the top and bottom of list 'l'. It
1543 # checks the hunk to make sure only context lines are removed, and then
1543 # checks the hunk to make sure only context lines are removed, and then
1544 # returns a new shortened list of lines.
1544 # returns a new shortened list of lines.
1545 fuzz = min(fuzz, len(old))
1545 fuzz = min(fuzz, len(old))
1546 if fuzz:
1546 if fuzz:
1547 top = 0
1547 top = 0
1548 bot = 0
1548 bot = 0
1549 hlen = len(self.hunk)
1549 hlen = len(self.hunk)
1550 for x in pycompat.xrange(hlen - 1):
1550 for x in pycompat.xrange(hlen - 1):
1551 # the hunk starts with the @@ line, so use x+1
1551 # the hunk starts with the @@ line, so use x+1
1552 if self.hunk[x + 1].startswith(b' '):
1552 if self.hunk[x + 1].startswith(b' '):
1553 top += 1
1553 top += 1
1554 else:
1554 else:
1555 break
1555 break
1556 if not toponly:
1556 if not toponly:
1557 for x in pycompat.xrange(hlen - 1):
1557 for x in pycompat.xrange(hlen - 1):
1558 if self.hunk[hlen - bot - 1].startswith(b' '):
1558 if self.hunk[hlen - bot - 1].startswith(b' '):
1559 bot += 1
1559 bot += 1
1560 else:
1560 else:
1561 break
1561 break
1562
1562
1563 bot = min(fuzz, bot)
1563 bot = min(fuzz, bot)
1564 top = min(fuzz, top)
1564 top = min(fuzz, top)
1565 return old[top : len(old) - bot], new[top : len(new) - bot], top
1565 return old[top : len(old) - bot], new[top : len(new) - bot], top
1566 return old, new, 0
1566 return old, new, 0
1567
1567
1568 def fuzzit(self, fuzz, toponly):
1568 def fuzzit(self, fuzz, toponly):
1569 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1569 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1570 oldstart = self.starta + top
1570 oldstart = self.starta + top
1571 newstart = self.startb + top
1571 newstart = self.startb + top
1572 # zero length hunk ranges already have their start decremented
1572 # zero length hunk ranges already have their start decremented
1573 if self.lena and oldstart > 0:
1573 if self.lena and oldstart > 0:
1574 oldstart -= 1
1574 oldstart -= 1
1575 if self.lenb and newstart > 0:
1575 if self.lenb and newstart > 0:
1576 newstart -= 1
1576 newstart -= 1
1577 return old, oldstart, new, newstart
1577 return old, oldstart, new, newstart
1578
1578
1579
1579
1580 class binhunk(object):
1580 class binhunk(object):
1581 """A binary patch file."""
1581 """A binary patch file."""
1582
1582
1583 def __init__(self, lr, fname):
1583 def __init__(self, lr, fname):
1584 self.text = None
1584 self.text = None
1585 self.delta = False
1585 self.delta = False
1586 self.hunk = [b'GIT binary patch\n']
1586 self.hunk = [b'GIT binary patch\n']
1587 self._fname = fname
1587 self._fname = fname
1588 self._read(lr)
1588 self._read(lr)
1589
1589
1590 def complete(self):
1590 def complete(self):
1591 return self.text is not None
1591 return self.text is not None
1592
1592
1593 def new(self, lines):
1593 def new(self, lines):
1594 if self.delta:
1594 if self.delta:
1595 return [applybindelta(self.text, b''.join(lines))]
1595 return [applybindelta(self.text, b''.join(lines))]
1596 return [self.text]
1596 return [self.text]
1597
1597
1598 def _read(self, lr):
1598 def _read(self, lr):
1599 def getline(lr, hunk):
1599 def getline(lr, hunk):
1600 l = lr.readline()
1600 l = lr.readline()
1601 hunk.append(l)
1601 hunk.append(l)
1602 return l.rstrip(b'\r\n')
1602 return l.rstrip(b'\r\n')
1603
1603
1604 while True:
1604 while True:
1605 line = getline(lr, self.hunk)
1605 line = getline(lr, self.hunk)
1606 if not line:
1606 if not line:
1607 raise PatchParseError(
1607 raise PatchParseError(
1608 _(b'could not extract "%s" binary data') % self._fname
1608 _(b'could not extract "%s" binary data') % self._fname
1609 )
1609 )
1610 if line.startswith(b'literal '):
1610 if line.startswith(b'literal '):
1611 size = int(line[8:].rstrip())
1611 size = int(line[8:].rstrip())
1612 break
1612 break
1613 if line.startswith(b'delta '):
1613 if line.startswith(b'delta '):
1614 size = int(line[6:].rstrip())
1614 size = int(line[6:].rstrip())
1615 self.delta = True
1615 self.delta = True
1616 break
1616 break
1617 dec = []
1617 dec = []
1618 line = getline(lr, self.hunk)
1618 line = getline(lr, self.hunk)
1619 while len(line) > 1:
1619 while len(line) > 1:
1620 l = line[0:1]
1620 l = line[0:1]
1621 if l <= b'Z' and l >= b'A':
1621 if l <= b'Z' and l >= b'A':
1622 l = ord(l) - ord(b'A') + 1
1622 l = ord(l) - ord(b'A') + 1
1623 else:
1623 else:
1624 l = ord(l) - ord(b'a') + 27
1624 l = ord(l) - ord(b'a') + 27
1625 try:
1625 try:
1626 dec.append(util.b85decode(line[1:])[:l])
1626 dec.append(util.b85decode(line[1:])[:l])
1627 except ValueError as e:
1627 except ValueError as e:
1628 raise PatchParseError(
1628 raise PatchParseError(
1629 _(b'could not decode "%s" binary patch: %s')
1629 _(b'could not decode "%s" binary patch: %s')
1630 % (self._fname, stringutil.forcebytestr(e))
1630 % (self._fname, stringutil.forcebytestr(e))
1631 )
1631 )
1632 line = getline(lr, self.hunk)
1632 line = getline(lr, self.hunk)
1633 text = zlib.decompress(b''.join(dec))
1633 text = zlib.decompress(b''.join(dec))
1634 if len(text) != size:
1634 if len(text) != size:
1635 raise PatchParseError(
1635 raise PatchParseError(
1636 _(b'"%s" length is %d bytes, should be %d')
1636 _(b'"%s" length is %d bytes, should be %d')
1637 % (self._fname, len(text), size)
1637 % (self._fname, len(text), size)
1638 )
1638 )
1639 self.text = text
1639 self.text = text
1640
1640
1641
1641
1642 def parsefilename(str):
1642 def parsefilename(str):
1643 # --- filename \t|space stuff
1643 # --- filename \t|space stuff
1644 s = str[4:].rstrip(b'\r\n')
1644 s = str[4:].rstrip(b'\r\n')
1645 i = s.find(b'\t')
1645 i = s.find(b'\t')
1646 if i < 0:
1646 if i < 0:
1647 i = s.find(b' ')
1647 i = s.find(b' ')
1648 if i < 0:
1648 if i < 0:
1649 return s
1649 return s
1650 return s[:i]
1650 return s[:i]
1651
1651
1652
1652
1653 def reversehunks(hunks):
1653 def reversehunks(hunks):
1654 '''reverse the signs in the hunks given as argument
1654 '''reverse the signs in the hunks given as argument
1655
1655
1656 This function operates on hunks coming out of patch.filterpatch, that is
1656 This function operates on hunks coming out of patch.filterpatch, that is
1657 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1657 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1658
1658
1659 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1659 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1660 ... --- a/folder1/g
1660 ... --- a/folder1/g
1661 ... +++ b/folder1/g
1661 ... +++ b/folder1/g
1662 ... @@ -1,7 +1,7 @@
1662 ... @@ -1,7 +1,7 @@
1663 ... +firstline
1663 ... +firstline
1664 ... c
1664 ... c
1665 ... 1
1665 ... 1
1666 ... 2
1666 ... 2
1667 ... + 3
1667 ... + 3
1668 ... -4
1668 ... -4
1669 ... 5
1669 ... 5
1670 ... d
1670 ... d
1671 ... +lastline"""
1671 ... +lastline"""
1672 >>> hunks = parsepatch([rawpatch])
1672 >>> hunks = parsepatch([rawpatch])
1673 >>> hunkscomingfromfilterpatch = []
1673 >>> hunkscomingfromfilterpatch = []
1674 >>> for h in hunks:
1674 >>> for h in hunks:
1675 ... hunkscomingfromfilterpatch.append(h)
1675 ... hunkscomingfromfilterpatch.append(h)
1676 ... hunkscomingfromfilterpatch.extend(h.hunks)
1676 ... hunkscomingfromfilterpatch.extend(h.hunks)
1677
1677
1678 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1678 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1679 >>> from . import util
1679 >>> from . import util
1680 >>> fp = util.stringio()
1680 >>> fp = util.stringio()
1681 >>> for c in reversedhunks:
1681 >>> for c in reversedhunks:
1682 ... c.write(fp)
1682 ... c.write(fp)
1683 >>> fp.seek(0) or None
1683 >>> fp.seek(0) or None
1684 >>> reversedpatch = fp.read()
1684 >>> reversedpatch = fp.read()
1685 >>> print(pycompat.sysstr(reversedpatch))
1685 >>> print(pycompat.sysstr(reversedpatch))
1686 diff --git a/folder1/g b/folder1/g
1686 diff --git a/folder1/g b/folder1/g
1687 --- a/folder1/g
1687 --- a/folder1/g
1688 +++ b/folder1/g
1688 +++ b/folder1/g
1689 @@ -1,4 +1,3 @@
1689 @@ -1,4 +1,3 @@
1690 -firstline
1690 -firstline
1691 c
1691 c
1692 1
1692 1
1693 2
1693 2
1694 @@ -2,6 +1,6 @@
1694 @@ -2,6 +1,6 @@
1695 c
1695 c
1696 1
1696 1
1697 2
1697 2
1698 - 3
1698 - 3
1699 +4
1699 +4
1700 5
1700 5
1701 d
1701 d
1702 @@ -6,3 +5,2 @@
1702 @@ -6,3 +5,2 @@
1703 5
1703 5
1704 d
1704 d
1705 -lastline
1705 -lastline
1706
1706
1707 '''
1707 '''
1708
1708
1709 newhunks = []
1709 newhunks = []
1710 for c in hunks:
1710 for c in hunks:
1711 if util.safehasattr(c, b'reversehunk'):
1711 if util.safehasattr(c, b'reversehunk'):
1712 c = c.reversehunk()
1712 c = c.reversehunk()
1713 newhunks.append(c)
1713 newhunks.append(c)
1714 return newhunks
1714 return newhunks
1715
1715
1716
1716
1717 def parsepatch(originalchunks, maxcontext=None):
1717 def parsepatch(originalchunks, maxcontext=None):
1718 """patch -> [] of headers -> [] of hunks
1718 """patch -> [] of headers -> [] of hunks
1719
1719
1720 If maxcontext is not None, trim context lines if necessary.
1720 If maxcontext is not None, trim context lines if necessary.
1721
1721
1722 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1722 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1723 ... --- a/folder1/g
1723 ... --- a/folder1/g
1724 ... +++ b/folder1/g
1724 ... +++ b/folder1/g
1725 ... @@ -1,8 +1,10 @@
1725 ... @@ -1,8 +1,10 @@
1726 ... 1
1726 ... 1
1727 ... 2
1727 ... 2
1728 ... -3
1728 ... -3
1729 ... 4
1729 ... 4
1730 ... 5
1730 ... 5
1731 ... 6
1731 ... 6
1732 ... +6.1
1732 ... +6.1
1733 ... +6.2
1733 ... +6.2
1734 ... 7
1734 ... 7
1735 ... 8
1735 ... 8
1736 ... +9'''
1736 ... +9'''
1737 >>> out = util.stringio()
1737 >>> out = util.stringio()
1738 >>> headers = parsepatch([rawpatch], maxcontext=1)
1738 >>> headers = parsepatch([rawpatch], maxcontext=1)
1739 >>> for header in headers:
1739 >>> for header in headers:
1740 ... header.write(out)
1740 ... header.write(out)
1741 ... for hunk in header.hunks:
1741 ... for hunk in header.hunks:
1742 ... hunk.write(out)
1742 ... hunk.write(out)
1743 >>> print(pycompat.sysstr(out.getvalue()))
1743 >>> print(pycompat.sysstr(out.getvalue()))
1744 diff --git a/folder1/g b/folder1/g
1744 diff --git a/folder1/g b/folder1/g
1745 --- a/folder1/g
1745 --- a/folder1/g
1746 +++ b/folder1/g
1746 +++ b/folder1/g
1747 @@ -2,3 +2,2 @@
1747 @@ -2,3 +2,2 @@
1748 2
1748 2
1749 -3
1749 -3
1750 4
1750 4
1751 @@ -6,2 +5,4 @@
1751 @@ -6,2 +5,4 @@
1752 6
1752 6
1753 +6.1
1753 +6.1
1754 +6.2
1754 +6.2
1755 7
1755 7
1756 @@ -8,1 +9,2 @@
1756 @@ -8,1 +9,2 @@
1757 8
1757 8
1758 +9
1758 +9
1759 """
1759 """
1760
1760
1761 class parser(object):
1761 class parser(object):
1762 """patch parsing state machine"""
1762 """patch parsing state machine"""
1763
1763
1764 def __init__(self):
1764 def __init__(self):
1765 self.fromline = 0
1765 self.fromline = 0
1766 self.toline = 0
1766 self.toline = 0
1767 self.proc = b''
1767 self.proc = b''
1768 self.header = None
1768 self.header = None
1769 self.context = []
1769 self.context = []
1770 self.before = []
1770 self.before = []
1771 self.hunk = []
1771 self.hunk = []
1772 self.headers = []
1772 self.headers = []
1773
1773
1774 def addrange(self, limits):
1774 def addrange(self, limits):
1775 self.addcontext([])
1775 self.addcontext([])
1776 fromstart, fromend, tostart, toend, proc = limits
1776 fromstart, fromend, tostart, toend, proc = limits
1777 self.fromline = int(fromstart)
1777 self.fromline = int(fromstart)
1778 self.toline = int(tostart)
1778 self.toline = int(tostart)
1779 self.proc = proc
1779 self.proc = proc
1780
1780
1781 def addcontext(self, context):
1781 def addcontext(self, context):
1782 if self.hunk:
1782 if self.hunk:
1783 h = recordhunk(
1783 h = recordhunk(
1784 self.header,
1784 self.header,
1785 self.fromline,
1785 self.fromline,
1786 self.toline,
1786 self.toline,
1787 self.proc,
1787 self.proc,
1788 self.before,
1788 self.before,
1789 self.hunk,
1789 self.hunk,
1790 context,
1790 context,
1791 maxcontext,
1791 maxcontext,
1792 )
1792 )
1793 self.header.hunks.append(h)
1793 self.header.hunks.append(h)
1794 self.fromline += len(self.before) + h.removed
1794 self.fromline += len(self.before) + h.removed
1795 self.toline += len(self.before) + h.added
1795 self.toline += len(self.before) + h.added
1796 self.before = []
1796 self.before = []
1797 self.hunk = []
1797 self.hunk = []
1798 self.context = context
1798 self.context = context
1799
1799
1800 def addhunk(self, hunk):
1800 def addhunk(self, hunk):
1801 if self.context:
1801 if self.context:
1802 self.before = self.context
1802 self.before = self.context
1803 self.context = []
1803 self.context = []
1804 if self.hunk:
1804 if self.hunk:
1805 self.addcontext([])
1805 self.addcontext([])
1806 self.hunk = hunk
1806 self.hunk = hunk
1807
1807
1808 def newfile(self, hdr):
1808 def newfile(self, hdr):
1809 self.addcontext([])
1809 self.addcontext([])
1810 h = header(hdr)
1810 h = header(hdr)
1811 self.headers.append(h)
1811 self.headers.append(h)
1812 self.header = h
1812 self.header = h
1813
1813
1814 def addother(self, line):
1814 def addother(self, line):
1815 pass # 'other' lines are ignored
1815 pass # 'other' lines are ignored
1816
1816
1817 def finished(self):
1817 def finished(self):
1818 self.addcontext([])
1818 self.addcontext([])
1819 return self.headers
1819 return self.headers
1820
1820
1821 transitions = {
1821 transitions = {
1822 b'file': {
1822 b'file': {
1823 b'context': addcontext,
1823 b'context': addcontext,
1824 b'file': newfile,
1824 b'file': newfile,
1825 b'hunk': addhunk,
1825 b'hunk': addhunk,
1826 b'range': addrange,
1826 b'range': addrange,
1827 },
1827 },
1828 b'context': {
1828 b'context': {
1829 b'file': newfile,
1829 b'file': newfile,
1830 b'hunk': addhunk,
1830 b'hunk': addhunk,
1831 b'range': addrange,
1831 b'range': addrange,
1832 b'other': addother,
1832 b'other': addother,
1833 },
1833 },
1834 b'hunk': {
1834 b'hunk': {
1835 b'context': addcontext,
1835 b'context': addcontext,
1836 b'file': newfile,
1836 b'file': newfile,
1837 b'range': addrange,
1837 b'range': addrange,
1838 },
1838 },
1839 b'range': {b'context': addcontext, b'hunk': addhunk},
1839 b'range': {b'context': addcontext, b'hunk': addhunk},
1840 b'other': {b'other': addother},
1840 b'other': {b'other': addother},
1841 }
1841 }
1842
1842
1843 p = parser()
1843 p = parser()
1844 fp = stringio()
1844 fp = stringio()
1845 fp.write(b''.join(originalchunks))
1845 fp.write(b''.join(originalchunks))
1846 fp.seek(0)
1846 fp.seek(0)
1847
1847
1848 state = b'context'
1848 state = b'context'
1849 for newstate, data in scanpatch(fp):
1849 for newstate, data in scanpatch(fp):
1850 try:
1850 try:
1851 p.transitions[state][newstate](p, data)
1851 p.transitions[state][newstate](p, data)
1852 except KeyError:
1852 except KeyError:
1853 raise PatchParseError(
1853 raise PatchParseError(
1854 b'unhandled transition: %s -> %s' % (state, newstate)
1854 b'unhandled transition: %s -> %s' % (state, newstate)
1855 )
1855 )
1856 state = newstate
1856 state = newstate
1857 del fp
1857 del fp
1858 return p.finished()
1858 return p.finished()
1859
1859
1860
1860
1861 def pathtransform(path, strip, prefix):
1861 def pathtransform(path, strip, prefix):
1862 """turn a path from a patch into a path suitable for the repository
1862 """turn a path from a patch into a path suitable for the repository
1863
1863
1864 prefix, if not empty, is expected to be normalized with a / at the end.
1864 prefix, if not empty, is expected to be normalized with a / at the end.
1865
1865
1866 Returns (stripped components, path in repository).
1866 Returns (stripped components, path in repository).
1867
1867
1868 >>> pathtransform(b'a/b/c', 0, b'')
1868 >>> pathtransform(b'a/b/c', 0, b'')
1869 ('', 'a/b/c')
1869 ('', 'a/b/c')
1870 >>> pathtransform(b' a/b/c ', 0, b'')
1870 >>> pathtransform(b' a/b/c ', 0, b'')
1871 ('', ' a/b/c')
1871 ('', ' a/b/c')
1872 >>> pathtransform(b' a/b/c ', 2, b'')
1872 >>> pathtransform(b' a/b/c ', 2, b'')
1873 ('a/b/', 'c')
1873 ('a/b/', 'c')
1874 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1874 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1875 ('', 'd/e/a/b/c')
1875 ('', 'd/e/a/b/c')
1876 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1876 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1877 ('a//b/', 'd/e/c')
1877 ('a//b/', 'd/e/c')
1878 >>> pathtransform(b'a/b/c', 3, b'')
1878 >>> pathtransform(b'a/b/c', 3, b'')
1879 Traceback (most recent call last):
1879 Traceback (most recent call last):
1880 PatchApplicationError: unable to strip away 1 of 3 dirs from a/b/c
1880 PatchApplicationError: unable to strip away 1 of 3 dirs from a/b/c
1881 """
1881 """
1882 pathlen = len(path)
1882 pathlen = len(path)
1883 i = 0
1883 i = 0
1884 if strip == 0:
1884 if strip == 0:
1885 return b'', prefix + path.rstrip()
1885 return b'', prefix + path.rstrip()
1886 count = strip
1886 count = strip
1887 while count > 0:
1887 while count > 0:
1888 i = path.find(b'/', i)
1888 i = path.find(b'/', i)
1889 if i == -1:
1889 if i == -1:
1890 raise PatchApplicationError(
1890 raise PatchApplicationError(
1891 _(b"unable to strip away %d of %d dirs from %s")
1891 _(b"unable to strip away %d of %d dirs from %s")
1892 % (count, strip, path)
1892 % (count, strip, path)
1893 )
1893 )
1894 i += 1
1894 i += 1
1895 # consume '//' in the path
1895 # consume '//' in the path
1896 while i < pathlen - 1 and path[i : i + 1] == b'/':
1896 while i < pathlen - 1 and path[i : i + 1] == b'/':
1897 i += 1
1897 i += 1
1898 count -= 1
1898 count -= 1
1899 return path[:i].lstrip(), prefix + path[i:].rstrip()
1899 return path[:i].lstrip(), prefix + path[i:].rstrip()
1900
1900
1901
1901
1902 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1902 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1903 nulla = afile_orig == b"/dev/null"
1903 nulla = afile_orig == b"/dev/null"
1904 nullb = bfile_orig == b"/dev/null"
1904 nullb = bfile_orig == b"/dev/null"
1905 create = nulla and hunk.starta == 0 and hunk.lena == 0
1905 create = nulla and hunk.starta == 0 and hunk.lena == 0
1906 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1906 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1907 abase, afile = pathtransform(afile_orig, strip, prefix)
1907 abase, afile = pathtransform(afile_orig, strip, prefix)
1908 gooda = not nulla and backend.exists(afile)
1908 gooda = not nulla and backend.exists(afile)
1909 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1909 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1910 if afile == bfile:
1910 if afile == bfile:
1911 goodb = gooda
1911 goodb = gooda
1912 else:
1912 else:
1913 goodb = not nullb and backend.exists(bfile)
1913 goodb = not nullb and backend.exists(bfile)
1914 missing = not goodb and not gooda and not create
1914 missing = not goodb and not gooda and not create
1915
1915
1916 # some diff programs apparently produce patches where the afile is
1916 # some diff programs apparently produce patches where the afile is
1917 # not /dev/null, but afile starts with bfile
1917 # not /dev/null, but afile starts with bfile
1918 abasedir = afile[: afile.rfind(b'/') + 1]
1918 abasedir = afile[: afile.rfind(b'/') + 1]
1919 bbasedir = bfile[: bfile.rfind(b'/') + 1]
1919 bbasedir = bfile[: bfile.rfind(b'/') + 1]
1920 if (
1920 if (
1921 missing
1921 missing
1922 and abasedir == bbasedir
1922 and abasedir == bbasedir
1923 and afile.startswith(bfile)
1923 and afile.startswith(bfile)
1924 and hunk.starta == 0
1924 and hunk.starta == 0
1925 and hunk.lena == 0
1925 and hunk.lena == 0
1926 ):
1926 ):
1927 create = True
1927 create = True
1928 missing = False
1928 missing = False
1929
1929
1930 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1930 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1931 # diff is between a file and its backup. In this case, the original
1931 # diff is between a file and its backup. In this case, the original
1932 # file should be patched (see original mpatch code).
1932 # file should be patched (see original mpatch code).
1933 isbackup = abase == bbase and bfile.startswith(afile)
1933 isbackup = abase == bbase and bfile.startswith(afile)
1934 fname = None
1934 fname = None
1935 if not missing:
1935 if not missing:
1936 if gooda and goodb:
1936 if gooda and goodb:
1937 if isbackup:
1937 if isbackup:
1938 fname = afile
1938 fname = afile
1939 else:
1939 else:
1940 fname = bfile
1940 fname = bfile
1941 elif gooda:
1941 elif gooda:
1942 fname = afile
1942 fname = afile
1943
1943
1944 if not fname:
1944 if not fname:
1945 if not nullb:
1945 if not nullb:
1946 if isbackup:
1946 if isbackup:
1947 fname = afile
1947 fname = afile
1948 else:
1948 else:
1949 fname = bfile
1949 fname = bfile
1950 elif not nulla:
1950 elif not nulla:
1951 fname = afile
1951 fname = afile
1952 else:
1952 else:
1953 raise PatchParseError(_(b"undefined source and destination files"))
1953 raise PatchParseError(_(b"undefined source and destination files"))
1954
1954
1955 gp = patchmeta(fname)
1955 gp = patchmeta(fname)
1956 if create:
1956 if create:
1957 gp.op = b'ADD'
1957 gp.op = b'ADD'
1958 elif remove:
1958 elif remove:
1959 gp.op = b'DELETE'
1959 gp.op = b'DELETE'
1960 return gp
1960 return gp
1961
1961
1962
1962
1963 def scanpatch(fp):
1963 def scanpatch(fp):
1964 """like patch.iterhunks, but yield different events
1964 """like patch.iterhunks, but yield different events
1965
1965
1966 - ('file', [header_lines + fromfile + tofile])
1966 - ('file', [header_lines + fromfile + tofile])
1967 - ('context', [context_lines])
1967 - ('context', [context_lines])
1968 - ('hunk', [hunk_lines])
1968 - ('hunk', [hunk_lines])
1969 - ('range', (-start,len, +start,len, proc))
1969 - ('range', (-start,len, +start,len, proc))
1970 """
1970 """
1971 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1971 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1972 lr = linereader(fp)
1972 lr = linereader(fp)
1973
1973
1974 def scanwhile(first, p):
1974 def scanwhile(first, p):
1975 """scan lr while predicate holds"""
1975 """scan lr while predicate holds"""
1976 lines = [first]
1976 lines = [first]
1977 for line in iter(lr.readline, b''):
1977 for line in iter(lr.readline, b''):
1978 if p(line):
1978 if p(line):
1979 lines.append(line)
1979 lines.append(line)
1980 else:
1980 else:
1981 lr.push(line)
1981 lr.push(line)
1982 break
1982 break
1983 return lines
1983 return lines
1984
1984
1985 for line in iter(lr.readline, b''):
1985 for line in iter(lr.readline, b''):
1986 if line.startswith(b'diff --git a/') or line.startswith(b'diff -r '):
1986 if line.startswith(b'diff --git a/') or line.startswith(b'diff -r '):
1987
1987
1988 def notheader(line):
1988 def notheader(line):
1989 s = line.split(None, 1)
1989 s = line.split(None, 1)
1990 return not s or s[0] not in (b'---', b'diff')
1990 return not s or s[0] not in (b'---', b'diff')
1991
1991
1992 header = scanwhile(line, notheader)
1992 header = scanwhile(line, notheader)
1993 fromfile = lr.readline()
1993 fromfile = lr.readline()
1994 if fromfile.startswith(b'---'):
1994 if fromfile.startswith(b'---'):
1995 tofile = lr.readline()
1995 tofile = lr.readline()
1996 header += [fromfile, tofile]
1996 header += [fromfile, tofile]
1997 else:
1997 else:
1998 lr.push(fromfile)
1998 lr.push(fromfile)
1999 yield b'file', header
1999 yield b'file', header
2000 elif line.startswith(b' '):
2000 elif line.startswith(b' '):
2001 cs = (b' ', b'\\')
2001 cs = (b' ', b'\\')
2002 yield b'context', scanwhile(line, lambda l: l.startswith(cs))
2002 yield b'context', scanwhile(line, lambda l: l.startswith(cs))
2003 elif line.startswith((b'-', b'+')):
2003 elif line.startswith((b'-', b'+')):
2004 cs = (b'-', b'+', b'\\')
2004 cs = (b'-', b'+', b'\\')
2005 yield b'hunk', scanwhile(line, lambda l: l.startswith(cs))
2005 yield b'hunk', scanwhile(line, lambda l: l.startswith(cs))
2006 else:
2006 else:
2007 m = lines_re.match(line)
2007 m = lines_re.match(line)
2008 if m:
2008 if m:
2009 yield b'range', m.groups()
2009 yield b'range', m.groups()
2010 else:
2010 else:
2011 yield b'other', line
2011 yield b'other', line
2012
2012
2013
2013
2014 def scangitpatch(lr, firstline):
2014 def scangitpatch(lr, firstline):
2015 """
2015 """
2016 Git patches can emit:
2016 Git patches can emit:
2017 - rename a to b
2017 - rename a to b
2018 - change b
2018 - change b
2019 - copy a to c
2019 - copy a to c
2020 - change c
2020 - change c
2021
2021
2022 We cannot apply this sequence as-is, the renamed 'a' could not be
2022 We cannot apply this sequence as-is, the renamed 'a' could not be
2023 found for it would have been renamed already. And we cannot copy
2023 found for it would have been renamed already. And we cannot copy
2024 from 'b' instead because 'b' would have been changed already. So
2024 from 'b' instead because 'b' would have been changed already. So
2025 we scan the git patch for copy and rename commands so we can
2025 we scan the git patch for copy and rename commands so we can
2026 perform the copies ahead of time.
2026 perform the copies ahead of time.
2027 """
2027 """
2028 pos = 0
2028 pos = 0
2029 try:
2029 try:
2030 pos = lr.fp.tell()
2030 pos = lr.fp.tell()
2031 fp = lr.fp
2031 fp = lr.fp
2032 except IOError:
2032 except IOError:
2033 fp = stringio(lr.fp.read())
2033 fp = stringio(lr.fp.read())
2034 gitlr = linereader(fp)
2034 gitlr = linereader(fp)
2035 gitlr.push(firstline)
2035 gitlr.push(firstline)
2036 gitpatches = readgitpatch(gitlr)
2036 gitpatches = readgitpatch(gitlr)
2037 fp.seek(pos)
2037 fp.seek(pos)
2038 return gitpatches
2038 return gitpatches
2039
2039
2040
2040
2041 def iterhunks(fp):
2041 def iterhunks(fp):
2042 """Read a patch and yield the following events:
2042 """Read a patch and yield the following events:
2043 - ("file", afile, bfile, firsthunk): select a new target file.
2043 - ("file", afile, bfile, firsthunk): select a new target file.
2044 - ("hunk", hunk): a new hunk is ready to be applied, follows a
2044 - ("hunk", hunk): a new hunk is ready to be applied, follows a
2045 "file" event.
2045 "file" event.
2046 - ("git", gitchanges): current diff is in git format, gitchanges
2046 - ("git", gitchanges): current diff is in git format, gitchanges
2047 maps filenames to gitpatch records. Unique event.
2047 maps filenames to gitpatch records. Unique event.
2048 """
2048 """
2049 afile = b""
2049 afile = b""
2050 bfile = b""
2050 bfile = b""
2051 state = None
2051 state = None
2052 hunknum = 0
2052 hunknum = 0
2053 emitfile = newfile = False
2053 emitfile = newfile = False
2054 gitpatches = None
2054 gitpatches = None
2055
2055
2056 # our states
2056 # our states
2057 BFILE = 1
2057 BFILE = 1
2058 context = None
2058 context = None
2059 lr = linereader(fp)
2059 lr = linereader(fp)
2060
2060
2061 for x in iter(lr.readline, b''):
2061 for x in iter(lr.readline, b''):
2062 if state == BFILE and (
2062 if state == BFILE and (
2063 (not context and x.startswith(b'@'))
2063 (not context and x.startswith(b'@'))
2064 or (context is not False and x.startswith(b'***************'))
2064 or (context is not False and x.startswith(b'***************'))
2065 or x.startswith(b'GIT binary patch')
2065 or x.startswith(b'GIT binary patch')
2066 ):
2066 ):
2067 gp = None
2067 gp = None
2068 if gitpatches and gitpatches[-1].ispatching(afile, bfile):
2068 if gitpatches and gitpatches[-1].ispatching(afile, bfile):
2069 gp = gitpatches.pop()
2069 gp = gitpatches.pop()
2070 if x.startswith(b'GIT binary patch'):
2070 if x.startswith(b'GIT binary patch'):
2071 h = binhunk(lr, gp.path)
2071 h = binhunk(lr, gp.path)
2072 else:
2072 else:
2073 if context is None and x.startswith(b'***************'):
2073 if context is None and x.startswith(b'***************'):
2074 context = True
2074 context = True
2075 h = hunk(x, hunknum + 1, lr, context)
2075 h = hunk(x, hunknum + 1, lr, context)
2076 hunknum += 1
2076 hunknum += 1
2077 if emitfile:
2077 if emitfile:
2078 emitfile = False
2078 emitfile = False
2079 yield b'file', (afile, bfile, h, gp and gp.copy() or None)
2079 yield b'file', (afile, bfile, h, gp and gp.copy() or None)
2080 yield b'hunk', h
2080 yield b'hunk', h
2081 elif x.startswith(b'diff --git a/'):
2081 elif x.startswith(b'diff --git a/'):
2082 m = gitre.match(x.rstrip(b'\r\n'))
2082 m = gitre.match(x.rstrip(b'\r\n'))
2083 if not m:
2083 if not m:
2084 continue
2084 continue
2085 if gitpatches is None:
2085 if gitpatches is None:
2086 # scan whole input for git metadata
2086 # scan whole input for git metadata
2087 gitpatches = scangitpatch(lr, x)
2087 gitpatches = scangitpatch(lr, x)
2088 yield b'git', [
2088 yield b'git', [
2089 g.copy() for g in gitpatches if g.op in (b'COPY', b'RENAME')
2089 g.copy() for g in gitpatches if g.op in (b'COPY', b'RENAME')
2090 ]
2090 ]
2091 gitpatches.reverse()
2091 gitpatches.reverse()
2092 afile = b'a/' + m.group(1)
2092 afile = b'a/' + m.group(1)
2093 bfile = b'b/' + m.group(2)
2093 bfile = b'b/' + m.group(2)
2094 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
2094 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
2095 gp = gitpatches.pop()
2095 gp = gitpatches.pop()
2096 yield b'file', (
2096 yield b'file', (
2097 b'a/' + gp.path,
2097 b'a/' + gp.path,
2098 b'b/' + gp.path,
2098 b'b/' + gp.path,
2099 None,
2099 None,
2100 gp.copy(),
2100 gp.copy(),
2101 )
2101 )
2102 if not gitpatches:
2102 if not gitpatches:
2103 raise PatchParseError(
2103 raise PatchParseError(
2104 _(b'failed to synchronize metadata for "%s"') % afile[2:]
2104 _(b'failed to synchronize metadata for "%s"') % afile[2:]
2105 )
2105 )
2106 newfile = True
2106 newfile = True
2107 elif x.startswith(b'---'):
2107 elif x.startswith(b'---'):
2108 # check for a unified diff
2108 # check for a unified diff
2109 l2 = lr.readline()
2109 l2 = lr.readline()
2110 if not l2.startswith(b'+++'):
2110 if not l2.startswith(b'+++'):
2111 lr.push(l2)
2111 lr.push(l2)
2112 continue
2112 continue
2113 newfile = True
2113 newfile = True
2114 context = False
2114 context = False
2115 afile = parsefilename(x)
2115 afile = parsefilename(x)
2116 bfile = parsefilename(l2)
2116 bfile = parsefilename(l2)
2117 elif x.startswith(b'***'):
2117 elif x.startswith(b'***'):
2118 # check for a context diff
2118 # check for a context diff
2119 l2 = lr.readline()
2119 l2 = lr.readline()
2120 if not l2.startswith(b'---'):
2120 if not l2.startswith(b'---'):
2121 lr.push(l2)
2121 lr.push(l2)
2122 continue
2122 continue
2123 l3 = lr.readline()
2123 l3 = lr.readline()
2124 lr.push(l3)
2124 lr.push(l3)
2125 if not l3.startswith(b"***************"):
2125 if not l3.startswith(b"***************"):
2126 lr.push(l2)
2126 lr.push(l2)
2127 continue
2127 continue
2128 newfile = True
2128 newfile = True
2129 context = True
2129 context = True
2130 afile = parsefilename(x)
2130 afile = parsefilename(x)
2131 bfile = parsefilename(l2)
2131 bfile = parsefilename(l2)
2132
2132
2133 if newfile:
2133 if newfile:
2134 newfile = False
2134 newfile = False
2135 emitfile = True
2135 emitfile = True
2136 state = BFILE
2136 state = BFILE
2137 hunknum = 0
2137 hunknum = 0
2138
2138
2139 while gitpatches:
2139 while gitpatches:
2140 gp = gitpatches.pop()
2140 gp = gitpatches.pop()
2141 yield b'file', (b'a/' + gp.path, b'b/' + gp.path, None, gp.copy())
2141 yield b'file', (b'a/' + gp.path, b'b/' + gp.path, None, gp.copy())
2142
2142
2143
2143
2144 def applybindelta(binchunk, data):
2144 def applybindelta(binchunk, data):
2145 """Apply a binary delta hunk
2145 """Apply a binary delta hunk
2146 The algorithm used is the algorithm from git's patch-delta.c
2146 The algorithm used is the algorithm from git's patch-delta.c
2147 """
2147 """
2148
2148
2149 def deltahead(binchunk):
2149 def deltahead(binchunk):
2150 i = 0
2150 i = 0
2151 for c in pycompat.bytestr(binchunk):
2151 for c in pycompat.bytestr(binchunk):
2152 i += 1
2152 i += 1
2153 if not (ord(c) & 0x80):
2153 if not (ord(c) & 0x80):
2154 return i
2154 return i
2155 return i
2155 return i
2156
2156
2157 out = b""
2157 out = b""
2158 s = deltahead(binchunk)
2158 s = deltahead(binchunk)
2159 binchunk = binchunk[s:]
2159 binchunk = binchunk[s:]
2160 s = deltahead(binchunk)
2160 s = deltahead(binchunk)
2161 binchunk = binchunk[s:]
2161 binchunk = binchunk[s:]
2162 i = 0
2162 i = 0
2163 while i < len(binchunk):
2163 while i < len(binchunk):
2164 cmd = ord(binchunk[i : i + 1])
2164 cmd = ord(binchunk[i : i + 1])
2165 i += 1
2165 i += 1
2166 if cmd & 0x80:
2166 if cmd & 0x80:
2167 offset = 0
2167 offset = 0
2168 size = 0
2168 size = 0
2169 if cmd & 0x01:
2169 if cmd & 0x01:
2170 offset = ord(binchunk[i : i + 1])
2170 offset = ord(binchunk[i : i + 1])
2171 i += 1
2171 i += 1
2172 if cmd & 0x02:
2172 if cmd & 0x02:
2173 offset |= ord(binchunk[i : i + 1]) << 8
2173 offset |= ord(binchunk[i : i + 1]) << 8
2174 i += 1
2174 i += 1
2175 if cmd & 0x04:
2175 if cmd & 0x04:
2176 offset |= ord(binchunk[i : i + 1]) << 16
2176 offset |= ord(binchunk[i : i + 1]) << 16
2177 i += 1
2177 i += 1
2178 if cmd & 0x08:
2178 if cmd & 0x08:
2179 offset |= ord(binchunk[i : i + 1]) << 24
2179 offset |= ord(binchunk[i : i + 1]) << 24
2180 i += 1
2180 i += 1
2181 if cmd & 0x10:
2181 if cmd & 0x10:
2182 size = ord(binchunk[i : i + 1])
2182 size = ord(binchunk[i : i + 1])
2183 i += 1
2183 i += 1
2184 if cmd & 0x20:
2184 if cmd & 0x20:
2185 size |= ord(binchunk[i : i + 1]) << 8
2185 size |= ord(binchunk[i : i + 1]) << 8
2186 i += 1
2186 i += 1
2187 if cmd & 0x40:
2187 if cmd & 0x40:
2188 size |= ord(binchunk[i : i + 1]) << 16
2188 size |= ord(binchunk[i : i + 1]) << 16
2189 i += 1
2189 i += 1
2190 if size == 0:
2190 if size == 0:
2191 size = 0x10000
2191 size = 0x10000
2192 offset_end = offset + size
2192 offset_end = offset + size
2193 out += data[offset:offset_end]
2193 out += data[offset:offset_end]
2194 elif cmd != 0:
2194 elif cmd != 0:
2195 offset_end = i + cmd
2195 offset_end = i + cmd
2196 out += binchunk[i:offset_end]
2196 out += binchunk[i:offset_end]
2197 i += cmd
2197 i += cmd
2198 else:
2198 else:
2199 raise PatchApplicationError(_(b'unexpected delta opcode 0'))
2199 raise PatchApplicationError(_(b'unexpected delta opcode 0'))
2200 return out
2200 return out
2201
2201
2202
2202
2203 def applydiff(ui, fp, backend, store, strip=1, prefix=b'', eolmode=b'strict'):
2203 def applydiff(ui, fp, backend, store, strip=1, prefix=b'', eolmode=b'strict'):
2204 """Reads a patch from fp and tries to apply it.
2204 """Reads a patch from fp and tries to apply it.
2205
2205
2206 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
2206 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
2207 there was any fuzz.
2207 there was any fuzz.
2208
2208
2209 If 'eolmode' is 'strict', the patch content and patched file are
2209 If 'eolmode' is 'strict', the patch content and patched file are
2210 read in binary mode. Otherwise, line endings are ignored when
2210 read in binary mode. Otherwise, line endings are ignored when
2211 patching then normalized according to 'eolmode'.
2211 patching then normalized according to 'eolmode'.
2212 """
2212 """
2213 return _applydiff(
2213 return _applydiff(
2214 ui,
2214 ui,
2215 fp,
2215 fp,
2216 patchfile,
2216 patchfile,
2217 backend,
2217 backend,
2218 store,
2218 store,
2219 strip=strip,
2219 strip=strip,
2220 prefix=prefix,
2220 prefix=prefix,
2221 eolmode=eolmode,
2221 eolmode=eolmode,
2222 )
2222 )
2223
2223
2224
2224
2225 def _canonprefix(repo, prefix):
2225 def _canonprefix(repo, prefix):
2226 if prefix:
2226 if prefix:
2227 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2227 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2228 if prefix != b'':
2228 if prefix != b'':
2229 prefix += b'/'
2229 prefix += b'/'
2230 return prefix
2230 return prefix
2231
2231
2232
2232
2233 def _applydiff(
2233 def _applydiff(
2234 ui, fp, patcher, backend, store, strip=1, prefix=b'', eolmode=b'strict'
2234 ui, fp, patcher, backend, store, strip=1, prefix=b'', eolmode=b'strict'
2235 ):
2235 ):
2236 prefix = _canonprefix(backend.repo, prefix)
2236 prefix = _canonprefix(backend.repo, prefix)
2237
2237
2238 def pstrip(p):
2238 def pstrip(p):
2239 return pathtransform(p, strip - 1, prefix)[1]
2239 return pathtransform(p, strip - 1, prefix)[1]
2240
2240
2241 rejects = 0
2241 rejects = 0
2242 err = 0
2242 err = 0
2243 current_file = None
2243 current_file = None
2244
2244
2245 for state, values in iterhunks(fp):
2245 for state, values in iterhunks(fp):
2246 if state == b'hunk':
2246 if state == b'hunk':
2247 if not current_file:
2247 if not current_file:
2248 continue
2248 continue
2249 ret = current_file.apply(values)
2249 ret = current_file.apply(values)
2250 if ret > 0:
2250 if ret > 0:
2251 err = 1
2251 err = 1
2252 elif state == b'file':
2252 elif state == b'file':
2253 if current_file:
2253 if current_file:
2254 rejects += current_file.close()
2254 rejects += current_file.close()
2255 current_file = None
2255 current_file = None
2256 afile, bfile, first_hunk, gp = values
2256 afile, bfile, first_hunk, gp = values
2257 if gp:
2257 if gp:
2258 gp.path = pstrip(gp.path)
2258 gp.path = pstrip(gp.path)
2259 if gp.oldpath:
2259 if gp.oldpath:
2260 gp.oldpath = pstrip(gp.oldpath)
2260 gp.oldpath = pstrip(gp.oldpath)
2261 else:
2261 else:
2262 gp = makepatchmeta(
2262 gp = makepatchmeta(
2263 backend, afile, bfile, first_hunk, strip, prefix
2263 backend, afile, bfile, first_hunk, strip, prefix
2264 )
2264 )
2265 if gp.op == b'RENAME':
2265 if gp.op == b'RENAME':
2266 backend.unlink(gp.oldpath)
2266 backend.unlink(gp.oldpath)
2267 if not first_hunk:
2267 if not first_hunk:
2268 if gp.op == b'DELETE':
2268 if gp.op == b'DELETE':
2269 backend.unlink(gp.path)
2269 backend.unlink(gp.path)
2270 continue
2270 continue
2271 data, mode = None, None
2271 data, mode = None, None
2272 if gp.op in (b'RENAME', b'COPY'):
2272 if gp.op in (b'RENAME', b'COPY'):
2273 data, mode = store.getfile(gp.oldpath)[:2]
2273 data, mode = store.getfile(gp.oldpath)[:2]
2274 if data is None:
2274 if data is None:
2275 # This means that the old path does not exist
2275 # This means that the old path does not exist
2276 raise PatchApplicationError(
2276 raise PatchApplicationError(
2277 _(b"source file '%s' does not exist") % gp.oldpath
2277 _(b"source file '%s' does not exist") % gp.oldpath
2278 )
2278 )
2279 if gp.mode:
2279 if gp.mode:
2280 mode = gp.mode
2280 mode = gp.mode
2281 if gp.op == b'ADD':
2281 if gp.op == b'ADD':
2282 # Added files without content have no hunk and
2282 # Added files without content have no hunk and
2283 # must be created
2283 # must be created
2284 data = b''
2284 data = b''
2285 if data or mode:
2285 if data or mode:
2286 if gp.op in (b'ADD', b'RENAME', b'COPY') and backend.exists(
2286 if gp.op in (b'ADD', b'RENAME', b'COPY') and backend.exists(
2287 gp.path
2287 gp.path
2288 ):
2288 ):
2289 raise PatchApplicationError(
2289 raise PatchApplicationError(
2290 _(
2290 _(
2291 b"cannot create %s: destination "
2291 b"cannot create %s: destination "
2292 b"already exists"
2292 b"already exists"
2293 )
2293 )
2294 % gp.path
2294 % gp.path
2295 )
2295 )
2296 backend.setfile(gp.path, data, mode, gp.oldpath)
2296 backend.setfile(gp.path, data, mode, gp.oldpath)
2297 continue
2297 continue
2298 try:
2298 try:
2299 current_file = patcher(ui, gp, backend, store, eolmode=eolmode)
2299 current_file = patcher(ui, gp, backend, store, eolmode=eolmode)
2300 except PatchError as inst:
2300 except PatchError as inst:
2301 ui.warn(stringutil.forcebytestr(inst) + b'\n')
2301 ui.warn(stringutil.forcebytestr(inst) + b'\n')
2302 current_file = None
2302 current_file = None
2303 rejects += 1
2303 rejects += 1
2304 continue
2304 continue
2305 elif state == b'git':
2305 elif state == b'git':
2306 for gp in values:
2306 for gp in values:
2307 path = pstrip(gp.oldpath)
2307 path = pstrip(gp.oldpath)
2308 data, mode = backend.getfile(path)
2308 data, mode = backend.getfile(path)
2309 if data is None:
2309 if data is None:
2310 # The error ignored here will trigger a getfile()
2310 # The error ignored here will trigger a getfile()
2311 # error in a place more appropriate for error
2311 # error in a place more appropriate for error
2312 # handling, and will not interrupt the patching
2312 # handling, and will not interrupt the patching
2313 # process.
2313 # process.
2314 pass
2314 pass
2315 else:
2315 else:
2316 store.setfile(path, data, mode)
2316 store.setfile(path, data, mode)
2317 else:
2317 else:
2318 raise error.Abort(_(b'unsupported parser state: %s') % state)
2318 raise error.Abort(_(b'unsupported parser state: %s') % state)
2319
2319
2320 if current_file:
2320 if current_file:
2321 rejects += current_file.close()
2321 rejects += current_file.close()
2322
2322
2323 if rejects:
2323 if rejects:
2324 return -1
2324 return -1
2325 return err
2325 return err
2326
2326
2327
2327
2328 def _externalpatch(ui, repo, patcher, patchname, strip, files, similarity):
2328 def _externalpatch(ui, repo, patcher, patchname, strip, files, similarity):
2329 """use <patcher> to apply <patchname> to the working directory.
2329 """use <patcher> to apply <patchname> to the working directory.
2330 returns whether patch was applied with fuzz factor."""
2330 returns whether patch was applied with fuzz factor."""
2331
2331
2332 fuzz = False
2332 fuzz = False
2333 args = []
2333 args = []
2334 cwd = repo.root
2334 cwd = repo.root
2335 if cwd:
2335 if cwd:
2336 args.append(b'-d %s' % procutil.shellquote(cwd))
2336 args.append(b'-d %s' % procutil.shellquote(cwd))
2337 cmd = b'%s %s -p%d < %s' % (
2337 cmd = b'%s %s -p%d < %s' % (
2338 patcher,
2338 patcher,
2339 b' '.join(args),
2339 b' '.join(args),
2340 strip,
2340 strip,
2341 procutil.shellquote(patchname),
2341 procutil.shellquote(patchname),
2342 )
2342 )
2343 ui.debug(b'Using external patch tool: %s\n' % cmd)
2343 ui.debug(b'Using external patch tool: %s\n' % cmd)
2344 fp = procutil.popen(cmd, b'rb')
2344 fp = procutil.popen(cmd, b'rb')
2345 try:
2345 try:
2346 for line in util.iterfile(fp):
2346 for line in fp:
2347 line = line.rstrip()
2347 line = line.rstrip()
2348 ui.note(line + b'\n')
2348 ui.note(line + b'\n')
2349 if line.startswith(b'patching file '):
2349 if line.startswith(b'patching file '):
2350 pf = util.parsepatchoutput(line)
2350 pf = util.parsepatchoutput(line)
2351 printed_file = False
2351 printed_file = False
2352 files.add(pf)
2352 files.add(pf)
2353 elif line.find(b'with fuzz') >= 0:
2353 elif line.find(b'with fuzz') >= 0:
2354 fuzz = True
2354 fuzz = True
2355 if not printed_file:
2355 if not printed_file:
2356 ui.warn(pf + b'\n')
2356 ui.warn(pf + b'\n')
2357 printed_file = True
2357 printed_file = True
2358 ui.warn(line + b'\n')
2358 ui.warn(line + b'\n')
2359 elif line.find(b'saving rejects to file') >= 0:
2359 elif line.find(b'saving rejects to file') >= 0:
2360 ui.warn(line + b'\n')
2360 ui.warn(line + b'\n')
2361 elif line.find(b'FAILED') >= 0:
2361 elif line.find(b'FAILED') >= 0:
2362 if not printed_file:
2362 if not printed_file:
2363 ui.warn(pf + b'\n')
2363 ui.warn(pf + b'\n')
2364 printed_file = True
2364 printed_file = True
2365 ui.warn(line + b'\n')
2365 ui.warn(line + b'\n')
2366 finally:
2366 finally:
2367 if files:
2367 if files:
2368 scmutil.marktouched(repo, files, similarity)
2368 scmutil.marktouched(repo, files, similarity)
2369 code = fp.close()
2369 code = fp.close()
2370 if code:
2370 if code:
2371 raise PatchApplicationError(
2371 raise PatchApplicationError(
2372 _(b"patch command failed: %s") % procutil.explainexit(code)
2372 _(b"patch command failed: %s") % procutil.explainexit(code)
2373 )
2373 )
2374 return fuzz
2374 return fuzz
2375
2375
2376
2376
2377 def patchbackend(
2377 def patchbackend(
2378 ui, backend, patchobj, strip, prefix, files=None, eolmode=b'strict'
2378 ui, backend, patchobj, strip, prefix, files=None, eolmode=b'strict'
2379 ):
2379 ):
2380 if files is None:
2380 if files is None:
2381 files = set()
2381 files = set()
2382 if eolmode is None:
2382 if eolmode is None:
2383 eolmode = ui.config(b'patch', b'eol')
2383 eolmode = ui.config(b'patch', b'eol')
2384 if eolmode.lower() not in eolmodes:
2384 if eolmode.lower() not in eolmodes:
2385 raise error.Abort(_(b'unsupported line endings type: %s') % eolmode)
2385 raise error.Abort(_(b'unsupported line endings type: %s') % eolmode)
2386 eolmode = eolmode.lower()
2386 eolmode = eolmode.lower()
2387
2387
2388 store = filestore()
2388 store = filestore()
2389 try:
2389 try:
2390 fp = open(patchobj, b'rb')
2390 fp = open(patchobj, b'rb')
2391 except TypeError:
2391 except TypeError:
2392 fp = patchobj
2392 fp = patchobj
2393 try:
2393 try:
2394 ret = applydiff(
2394 ret = applydiff(
2395 ui, fp, backend, store, strip=strip, prefix=prefix, eolmode=eolmode
2395 ui, fp, backend, store, strip=strip, prefix=prefix, eolmode=eolmode
2396 )
2396 )
2397 finally:
2397 finally:
2398 if fp != patchobj:
2398 if fp != patchobj:
2399 fp.close()
2399 fp.close()
2400 files.update(backend.close())
2400 files.update(backend.close())
2401 store.close()
2401 store.close()
2402 if ret < 0:
2402 if ret < 0:
2403 raise PatchApplicationError(_(b'patch failed to apply'))
2403 raise PatchApplicationError(_(b'patch failed to apply'))
2404 return ret > 0
2404 return ret > 0
2405
2405
2406
2406
2407 def internalpatch(
2407 def internalpatch(
2408 ui,
2408 ui,
2409 repo,
2409 repo,
2410 patchobj,
2410 patchobj,
2411 strip,
2411 strip,
2412 prefix=b'',
2412 prefix=b'',
2413 files=None,
2413 files=None,
2414 eolmode=b'strict',
2414 eolmode=b'strict',
2415 similarity=0,
2415 similarity=0,
2416 ):
2416 ):
2417 """use builtin patch to apply <patchobj> to the working directory.
2417 """use builtin patch to apply <patchobj> to the working directory.
2418 returns whether patch was applied with fuzz factor."""
2418 returns whether patch was applied with fuzz factor."""
2419 backend = workingbackend(ui, repo, similarity)
2419 backend = workingbackend(ui, repo, similarity)
2420 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2420 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2421
2421
2422
2422
2423 def patchrepo(
2423 def patchrepo(
2424 ui, repo, ctx, store, patchobj, strip, prefix, files=None, eolmode=b'strict'
2424 ui, repo, ctx, store, patchobj, strip, prefix, files=None, eolmode=b'strict'
2425 ):
2425 ):
2426 backend = repobackend(ui, repo, ctx, store)
2426 backend = repobackend(ui, repo, ctx, store)
2427 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2427 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2428
2428
2429
2429
2430 def patch(
2430 def patch(
2431 ui,
2431 ui,
2432 repo,
2432 repo,
2433 patchname,
2433 patchname,
2434 strip=1,
2434 strip=1,
2435 prefix=b'',
2435 prefix=b'',
2436 files=None,
2436 files=None,
2437 eolmode=b'strict',
2437 eolmode=b'strict',
2438 similarity=0,
2438 similarity=0,
2439 ):
2439 ):
2440 """Apply <patchname> to the working directory.
2440 """Apply <patchname> to the working directory.
2441
2441
2442 'eolmode' specifies how end of lines should be handled. It can be:
2442 'eolmode' specifies how end of lines should be handled. It can be:
2443 - 'strict': inputs are read in binary mode, EOLs are preserved
2443 - 'strict': inputs are read in binary mode, EOLs are preserved
2444 - 'crlf': EOLs are ignored when patching and reset to CRLF
2444 - 'crlf': EOLs are ignored when patching and reset to CRLF
2445 - 'lf': EOLs are ignored when patching and reset to LF
2445 - 'lf': EOLs are ignored when patching and reset to LF
2446 - None: get it from user settings, default to 'strict'
2446 - None: get it from user settings, default to 'strict'
2447 'eolmode' is ignored when using an external patcher program.
2447 'eolmode' is ignored when using an external patcher program.
2448
2448
2449 Returns whether patch was applied with fuzz factor.
2449 Returns whether patch was applied with fuzz factor.
2450 """
2450 """
2451 patcher = ui.config(b'ui', b'patch')
2451 patcher = ui.config(b'ui', b'patch')
2452 if files is None:
2452 if files is None:
2453 files = set()
2453 files = set()
2454 if patcher:
2454 if patcher:
2455 return _externalpatch(
2455 return _externalpatch(
2456 ui, repo, patcher, patchname, strip, files, similarity
2456 ui, repo, patcher, patchname, strip, files, similarity
2457 )
2457 )
2458 return internalpatch(
2458 return internalpatch(
2459 ui, repo, patchname, strip, prefix, files, eolmode, similarity
2459 ui, repo, patchname, strip, prefix, files, eolmode, similarity
2460 )
2460 )
2461
2461
2462
2462
2463 def changedfiles(ui, repo, patchpath, strip=1, prefix=b''):
2463 def changedfiles(ui, repo, patchpath, strip=1, prefix=b''):
2464 backend = fsbackend(ui, repo.root)
2464 backend = fsbackend(ui, repo.root)
2465 prefix = _canonprefix(repo, prefix)
2465 prefix = _canonprefix(repo, prefix)
2466 with open(patchpath, b'rb') as fp:
2466 with open(patchpath, b'rb') as fp:
2467 changed = set()
2467 changed = set()
2468 for state, values in iterhunks(fp):
2468 for state, values in iterhunks(fp):
2469 if state == b'file':
2469 if state == b'file':
2470 afile, bfile, first_hunk, gp = values
2470 afile, bfile, first_hunk, gp = values
2471 if gp:
2471 if gp:
2472 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2472 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2473 if gp.oldpath:
2473 if gp.oldpath:
2474 gp.oldpath = pathtransform(
2474 gp.oldpath = pathtransform(
2475 gp.oldpath, strip - 1, prefix
2475 gp.oldpath, strip - 1, prefix
2476 )[1]
2476 )[1]
2477 else:
2477 else:
2478 gp = makepatchmeta(
2478 gp = makepatchmeta(
2479 backend, afile, bfile, first_hunk, strip, prefix
2479 backend, afile, bfile, first_hunk, strip, prefix
2480 )
2480 )
2481 changed.add(gp.path)
2481 changed.add(gp.path)
2482 if gp.op == b'RENAME':
2482 if gp.op == b'RENAME':
2483 changed.add(gp.oldpath)
2483 changed.add(gp.oldpath)
2484 elif state not in (b'hunk', b'git'):
2484 elif state not in (b'hunk', b'git'):
2485 raise error.Abort(_(b'unsupported parser state: %s') % state)
2485 raise error.Abort(_(b'unsupported parser state: %s') % state)
2486 return changed
2486 return changed
2487
2487
2488
2488
2489 class GitDiffRequired(Exception):
2489 class GitDiffRequired(Exception):
2490 pass
2490 pass
2491
2491
2492
2492
2493 diffopts = diffutil.diffallopts
2493 diffopts = diffutil.diffallopts
2494 diffallopts = diffutil.diffallopts
2494 diffallopts = diffutil.diffallopts
2495 difffeatureopts = diffutil.difffeatureopts
2495 difffeatureopts = diffutil.difffeatureopts
2496
2496
2497
2497
2498 def diff(
2498 def diff(
2499 repo,
2499 repo,
2500 node1=None,
2500 node1=None,
2501 node2=None,
2501 node2=None,
2502 match=None,
2502 match=None,
2503 changes=None,
2503 changes=None,
2504 opts=None,
2504 opts=None,
2505 losedatafn=None,
2505 losedatafn=None,
2506 pathfn=None,
2506 pathfn=None,
2507 copy=None,
2507 copy=None,
2508 copysourcematch=None,
2508 copysourcematch=None,
2509 hunksfilterfn=None,
2509 hunksfilterfn=None,
2510 ):
2510 ):
2511 """yields diff of changes to files between two nodes, or node and
2511 """yields diff of changes to files between two nodes, or node and
2512 working directory.
2512 working directory.
2513
2513
2514 if node1 is None, use first dirstate parent instead.
2514 if node1 is None, use first dirstate parent instead.
2515 if node2 is None, compare node1 with working directory.
2515 if node2 is None, compare node1 with working directory.
2516
2516
2517 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2517 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2518 every time some change cannot be represented with the current
2518 every time some change cannot be represented with the current
2519 patch format. Return False to upgrade to git patch format, True to
2519 patch format. Return False to upgrade to git patch format, True to
2520 accept the loss or raise an exception to abort the diff. It is
2520 accept the loss or raise an exception to abort the diff. It is
2521 called with the name of current file being diffed as 'fn'. If set
2521 called with the name of current file being diffed as 'fn'. If set
2522 to None, patches will always be upgraded to git format when
2522 to None, patches will always be upgraded to git format when
2523 necessary.
2523 necessary.
2524
2524
2525 prefix is a filename prefix that is prepended to all filenames on
2525 prefix is a filename prefix that is prepended to all filenames on
2526 display (used for subrepos).
2526 display (used for subrepos).
2527
2527
2528 relroot, if not empty, must be normalized with a trailing /. Any match
2528 relroot, if not empty, must be normalized with a trailing /. Any match
2529 patterns that fall outside it will be ignored.
2529 patterns that fall outside it will be ignored.
2530
2530
2531 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2531 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2532 information.
2532 information.
2533
2533
2534 if copysourcematch is not None, then copy sources will be filtered by this
2534 if copysourcematch is not None, then copy sources will be filtered by this
2535 matcher
2535 matcher
2536
2536
2537 hunksfilterfn, if not None, should be a function taking a filectx and
2537 hunksfilterfn, if not None, should be a function taking a filectx and
2538 hunks generator that may yield filtered hunks.
2538 hunks generator that may yield filtered hunks.
2539 """
2539 """
2540 if not node1 and not node2:
2540 if not node1 and not node2:
2541 node1 = repo.dirstate.p1()
2541 node1 = repo.dirstate.p1()
2542
2542
2543 ctx1 = repo[node1]
2543 ctx1 = repo[node1]
2544 ctx2 = repo[node2]
2544 ctx2 = repo[node2]
2545
2545
2546 for fctx1, fctx2, hdr, hunks in diffhunks(
2546 for fctx1, fctx2, hdr, hunks in diffhunks(
2547 repo,
2547 repo,
2548 ctx1=ctx1,
2548 ctx1=ctx1,
2549 ctx2=ctx2,
2549 ctx2=ctx2,
2550 match=match,
2550 match=match,
2551 changes=changes,
2551 changes=changes,
2552 opts=opts,
2552 opts=opts,
2553 losedatafn=losedatafn,
2553 losedatafn=losedatafn,
2554 pathfn=pathfn,
2554 pathfn=pathfn,
2555 copy=copy,
2555 copy=copy,
2556 copysourcematch=copysourcematch,
2556 copysourcematch=copysourcematch,
2557 ):
2557 ):
2558 if hunksfilterfn is not None:
2558 if hunksfilterfn is not None:
2559 # If the file has been removed, fctx2 is None; but this should
2559 # If the file has been removed, fctx2 is None; but this should
2560 # not occur here since we catch removed files early in
2560 # not occur here since we catch removed files early in
2561 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2561 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2562 assert (
2562 assert (
2563 fctx2 is not None
2563 fctx2 is not None
2564 ), b'fctx2 unexpectly None in diff hunks filtering'
2564 ), b'fctx2 unexpectly None in diff hunks filtering'
2565 hunks = hunksfilterfn(fctx2, hunks)
2565 hunks = hunksfilterfn(fctx2, hunks)
2566 text = b''.join(b''.join(hlines) for hrange, hlines in hunks)
2566 text = b''.join(b''.join(hlines) for hrange, hlines in hunks)
2567 if hdr and (text or len(hdr) > 1):
2567 if hdr and (text or len(hdr) > 1):
2568 yield b'\n'.join(hdr) + b'\n'
2568 yield b'\n'.join(hdr) + b'\n'
2569 if text:
2569 if text:
2570 yield text
2570 yield text
2571
2571
2572
2572
2573 def diffhunks(
2573 def diffhunks(
2574 repo,
2574 repo,
2575 ctx1,
2575 ctx1,
2576 ctx2,
2576 ctx2,
2577 match=None,
2577 match=None,
2578 changes=None,
2578 changes=None,
2579 opts=None,
2579 opts=None,
2580 losedatafn=None,
2580 losedatafn=None,
2581 pathfn=None,
2581 pathfn=None,
2582 copy=None,
2582 copy=None,
2583 copysourcematch=None,
2583 copysourcematch=None,
2584 ):
2584 ):
2585 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2585 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2586 where `header` is a list of diff headers and `hunks` is an iterable of
2586 where `header` is a list of diff headers and `hunks` is an iterable of
2587 (`hunkrange`, `hunklines`) tuples.
2587 (`hunkrange`, `hunklines`) tuples.
2588
2588
2589 See diff() for the meaning of parameters.
2589 See diff() for the meaning of parameters.
2590 """
2590 """
2591
2591
2592 if opts is None:
2592 if opts is None:
2593 opts = mdiff.defaultopts
2593 opts = mdiff.defaultopts
2594
2594
2595 def lrugetfilectx():
2595 def lrugetfilectx():
2596 cache = {}
2596 cache = {}
2597 order = collections.deque()
2597 order = collections.deque()
2598
2598
2599 def getfilectx(f, ctx):
2599 def getfilectx(f, ctx):
2600 fctx = ctx.filectx(f, filelog=cache.get(f))
2600 fctx = ctx.filectx(f, filelog=cache.get(f))
2601 if f not in cache:
2601 if f not in cache:
2602 if len(cache) > 20:
2602 if len(cache) > 20:
2603 del cache[order.popleft()]
2603 del cache[order.popleft()]
2604 cache[f] = fctx.filelog()
2604 cache[f] = fctx.filelog()
2605 else:
2605 else:
2606 order.remove(f)
2606 order.remove(f)
2607 order.append(f)
2607 order.append(f)
2608 return fctx
2608 return fctx
2609
2609
2610 return getfilectx
2610 return getfilectx
2611
2611
2612 getfilectx = lrugetfilectx()
2612 getfilectx = lrugetfilectx()
2613
2613
2614 if not changes:
2614 if not changes:
2615 changes = ctx1.status(ctx2, match=match)
2615 changes = ctx1.status(ctx2, match=match)
2616 if isinstance(changes, list):
2616 if isinstance(changes, list):
2617 modified, added, removed = changes[:3]
2617 modified, added, removed = changes[:3]
2618 else:
2618 else:
2619 modified, added, removed = (
2619 modified, added, removed = (
2620 changes.modified,
2620 changes.modified,
2621 changes.added,
2621 changes.added,
2622 changes.removed,
2622 changes.removed,
2623 )
2623 )
2624
2624
2625 if not modified and not added and not removed:
2625 if not modified and not added and not removed:
2626 return []
2626 return []
2627
2627
2628 if repo.ui.debugflag:
2628 if repo.ui.debugflag:
2629 hexfunc = hex
2629 hexfunc = hex
2630 else:
2630 else:
2631 hexfunc = short
2631 hexfunc = short
2632 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2632 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2633
2633
2634 if copy is None:
2634 if copy is None:
2635 copy = {}
2635 copy = {}
2636 if opts.git or opts.upgrade:
2636 if opts.git or opts.upgrade:
2637 copy = copies.pathcopies(ctx1, ctx2, match=match)
2637 copy = copies.pathcopies(ctx1, ctx2, match=match)
2638
2638
2639 if copysourcematch:
2639 if copysourcematch:
2640 # filter out copies where source side isn't inside the matcher
2640 # filter out copies where source side isn't inside the matcher
2641 # (copies.pathcopies() already filtered out the destination)
2641 # (copies.pathcopies() already filtered out the destination)
2642 copy = {dst: src for dst, src in copy.items() if copysourcematch(src)}
2642 copy = {dst: src for dst, src in copy.items() if copysourcematch(src)}
2643
2643
2644 modifiedset = set(modified)
2644 modifiedset = set(modified)
2645 addedset = set(added)
2645 addedset = set(added)
2646 removedset = set(removed)
2646 removedset = set(removed)
2647 for f in modified:
2647 for f in modified:
2648 if f not in ctx1:
2648 if f not in ctx1:
2649 # Fix up added, since merged-in additions appear as
2649 # Fix up added, since merged-in additions appear as
2650 # modifications during merges
2650 # modifications during merges
2651 modifiedset.remove(f)
2651 modifiedset.remove(f)
2652 addedset.add(f)
2652 addedset.add(f)
2653 for f in removed:
2653 for f in removed:
2654 if f not in ctx1:
2654 if f not in ctx1:
2655 # Merged-in additions that are then removed are reported as removed.
2655 # Merged-in additions that are then removed are reported as removed.
2656 # They are not in ctx1, so We don't want to show them in the diff.
2656 # They are not in ctx1, so We don't want to show them in the diff.
2657 removedset.remove(f)
2657 removedset.remove(f)
2658 modified = sorted(modifiedset)
2658 modified = sorted(modifiedset)
2659 added = sorted(addedset)
2659 added = sorted(addedset)
2660 removed = sorted(removedset)
2660 removed = sorted(removedset)
2661 for dst, src in list(copy.items()):
2661 for dst, src in list(copy.items()):
2662 if src not in ctx1:
2662 if src not in ctx1:
2663 # Files merged in during a merge and then copied/renamed are
2663 # Files merged in during a merge and then copied/renamed are
2664 # reported as copies. We want to show them in the diff as additions.
2664 # reported as copies. We want to show them in the diff as additions.
2665 del copy[dst]
2665 del copy[dst]
2666
2666
2667 prefetchmatch = scmutil.matchfiles(
2667 prefetchmatch = scmutil.matchfiles(
2668 repo, list(modifiedset | addedset | removedset)
2668 repo, list(modifiedset | addedset | removedset)
2669 )
2669 )
2670 revmatches = [
2670 revmatches = [
2671 (ctx1.rev(), prefetchmatch),
2671 (ctx1.rev(), prefetchmatch),
2672 (ctx2.rev(), prefetchmatch),
2672 (ctx2.rev(), prefetchmatch),
2673 ]
2673 ]
2674 scmutil.prefetchfiles(repo, revmatches)
2674 scmutil.prefetchfiles(repo, revmatches)
2675
2675
2676 def difffn(opts, losedata):
2676 def difffn(opts, losedata):
2677 return trydiff(
2677 return trydiff(
2678 repo,
2678 repo,
2679 revs,
2679 revs,
2680 ctx1,
2680 ctx1,
2681 ctx2,
2681 ctx2,
2682 modified,
2682 modified,
2683 added,
2683 added,
2684 removed,
2684 removed,
2685 copy,
2685 copy,
2686 getfilectx,
2686 getfilectx,
2687 opts,
2687 opts,
2688 losedata,
2688 losedata,
2689 pathfn,
2689 pathfn,
2690 )
2690 )
2691
2691
2692 if opts.upgrade and not opts.git:
2692 if opts.upgrade and not opts.git:
2693 try:
2693 try:
2694
2694
2695 def losedata(fn):
2695 def losedata(fn):
2696 if not losedatafn or not losedatafn(fn=fn):
2696 if not losedatafn or not losedatafn(fn=fn):
2697 raise GitDiffRequired
2697 raise GitDiffRequired
2698
2698
2699 # Buffer the whole output until we are sure it can be generated
2699 # Buffer the whole output until we are sure it can be generated
2700 return list(difffn(opts.copy(git=False), losedata))
2700 return list(difffn(opts.copy(git=False), losedata))
2701 except GitDiffRequired:
2701 except GitDiffRequired:
2702 return difffn(opts.copy(git=True), None)
2702 return difffn(opts.copy(git=True), None)
2703 else:
2703 else:
2704 return difffn(opts, None)
2704 return difffn(opts, None)
2705
2705
2706
2706
2707 def diffsinglehunk(hunklines):
2707 def diffsinglehunk(hunklines):
2708 """yield tokens for a list of lines in a single hunk"""
2708 """yield tokens for a list of lines in a single hunk"""
2709 for line in hunklines:
2709 for line in hunklines:
2710 # chomp
2710 # chomp
2711 chompline = line.rstrip(b'\r\n')
2711 chompline = line.rstrip(b'\r\n')
2712 # highlight tabs and trailing whitespace
2712 # highlight tabs and trailing whitespace
2713 stripline = chompline.rstrip()
2713 stripline = chompline.rstrip()
2714 if line.startswith(b'-'):
2714 if line.startswith(b'-'):
2715 label = b'diff.deleted'
2715 label = b'diff.deleted'
2716 elif line.startswith(b'+'):
2716 elif line.startswith(b'+'):
2717 label = b'diff.inserted'
2717 label = b'diff.inserted'
2718 else:
2718 else:
2719 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2719 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2720 for token in tabsplitter.findall(stripline):
2720 for token in tabsplitter.findall(stripline):
2721 if token.startswith(b'\t'):
2721 if token.startswith(b'\t'):
2722 yield (token, b'diff.tab')
2722 yield (token, b'diff.tab')
2723 else:
2723 else:
2724 yield (token, label)
2724 yield (token, label)
2725
2725
2726 if chompline != stripline:
2726 if chompline != stripline:
2727 yield (chompline[len(stripline) :], b'diff.trailingwhitespace')
2727 yield (chompline[len(stripline) :], b'diff.trailingwhitespace')
2728 if chompline != line:
2728 if chompline != line:
2729 yield (line[len(chompline) :], b'')
2729 yield (line[len(chompline) :], b'')
2730
2730
2731
2731
2732 def diffsinglehunkinline(hunklines):
2732 def diffsinglehunkinline(hunklines):
2733 """yield tokens for a list of lines in a single hunk, with inline colors"""
2733 """yield tokens for a list of lines in a single hunk, with inline colors"""
2734 # prepare deleted, and inserted content
2734 # prepare deleted, and inserted content
2735 a = bytearray()
2735 a = bytearray()
2736 b = bytearray()
2736 b = bytearray()
2737 for line in hunklines:
2737 for line in hunklines:
2738 if line[0:1] == b'-':
2738 if line[0:1] == b'-':
2739 a += line[1:]
2739 a += line[1:]
2740 elif line[0:1] == b'+':
2740 elif line[0:1] == b'+':
2741 b += line[1:]
2741 b += line[1:]
2742 else:
2742 else:
2743 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2743 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2744 # fast path: if either side is empty, use diffsinglehunk
2744 # fast path: if either side is empty, use diffsinglehunk
2745 if not a or not b:
2745 if not a or not b:
2746 for t in diffsinglehunk(hunklines):
2746 for t in diffsinglehunk(hunklines):
2747 yield t
2747 yield t
2748 return
2748 return
2749 # re-split the content into words
2749 # re-split the content into words
2750 al = wordsplitter.findall(bytes(a))
2750 al = wordsplitter.findall(bytes(a))
2751 bl = wordsplitter.findall(bytes(b))
2751 bl = wordsplitter.findall(bytes(b))
2752 # re-arrange the words to lines since the diff algorithm is line-based
2752 # re-arrange the words to lines since the diff algorithm is line-based
2753 aln = [s if s == b'\n' else s + b'\n' for s in al]
2753 aln = [s if s == b'\n' else s + b'\n' for s in al]
2754 bln = [s if s == b'\n' else s + b'\n' for s in bl]
2754 bln = [s if s == b'\n' else s + b'\n' for s in bl]
2755 an = b''.join(aln)
2755 an = b''.join(aln)
2756 bn = b''.join(bln)
2756 bn = b''.join(bln)
2757 # run the diff algorithm, prepare atokens and btokens
2757 # run the diff algorithm, prepare atokens and btokens
2758 atokens = []
2758 atokens = []
2759 btokens = []
2759 btokens = []
2760 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2760 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2761 for (a1, a2, b1, b2), btype in blocks:
2761 for (a1, a2, b1, b2), btype in blocks:
2762 changed = btype == b'!'
2762 changed = btype == b'!'
2763 for token in mdiff.splitnewlines(b''.join(al[a1:a2])):
2763 for token in mdiff.splitnewlines(b''.join(al[a1:a2])):
2764 atokens.append((changed, token))
2764 atokens.append((changed, token))
2765 for token in mdiff.splitnewlines(b''.join(bl[b1:b2])):
2765 for token in mdiff.splitnewlines(b''.join(bl[b1:b2])):
2766 btokens.append((changed, token))
2766 btokens.append((changed, token))
2767
2767
2768 # yield deleted tokens, then inserted ones
2768 # yield deleted tokens, then inserted ones
2769 for prefix, label, tokens in [
2769 for prefix, label, tokens in [
2770 (b'-', b'diff.deleted', atokens),
2770 (b'-', b'diff.deleted', atokens),
2771 (b'+', b'diff.inserted', btokens),
2771 (b'+', b'diff.inserted', btokens),
2772 ]:
2772 ]:
2773 nextisnewline = True
2773 nextisnewline = True
2774 for changed, token in tokens:
2774 for changed, token in tokens:
2775 if nextisnewline:
2775 if nextisnewline:
2776 yield (prefix, label)
2776 yield (prefix, label)
2777 nextisnewline = False
2777 nextisnewline = False
2778 # special handling line end
2778 # special handling line end
2779 isendofline = token.endswith(b'\n')
2779 isendofline = token.endswith(b'\n')
2780 if isendofline:
2780 if isendofline:
2781 chomp = token[:-1] # chomp
2781 chomp = token[:-1] # chomp
2782 if chomp.endswith(b'\r'):
2782 if chomp.endswith(b'\r'):
2783 chomp = chomp[:-1]
2783 chomp = chomp[:-1]
2784 endofline = token[len(chomp) :]
2784 endofline = token[len(chomp) :]
2785 token = chomp.rstrip() # detect spaces at the end
2785 token = chomp.rstrip() # detect spaces at the end
2786 endspaces = chomp[len(token) :]
2786 endspaces = chomp[len(token) :]
2787 # scan tabs
2787 # scan tabs
2788 for maybetab in tabsplitter.findall(token):
2788 for maybetab in tabsplitter.findall(token):
2789 if b'\t' == maybetab[0:1]:
2789 if b'\t' == maybetab[0:1]:
2790 currentlabel = b'diff.tab'
2790 currentlabel = b'diff.tab'
2791 else:
2791 else:
2792 if changed:
2792 if changed:
2793 currentlabel = label + b'.changed'
2793 currentlabel = label + b'.changed'
2794 else:
2794 else:
2795 currentlabel = label + b'.unchanged'
2795 currentlabel = label + b'.unchanged'
2796 yield (maybetab, currentlabel)
2796 yield (maybetab, currentlabel)
2797 if isendofline:
2797 if isendofline:
2798 if endspaces:
2798 if endspaces:
2799 yield (endspaces, b'diff.trailingwhitespace')
2799 yield (endspaces, b'diff.trailingwhitespace')
2800 yield (endofline, b'')
2800 yield (endofline, b'')
2801 nextisnewline = True
2801 nextisnewline = True
2802
2802
2803
2803
2804 def difflabel(func, *args, **kw):
2804 def difflabel(func, *args, **kw):
2805 '''yields 2-tuples of (output, label) based on the output of func()'''
2805 '''yields 2-tuples of (output, label) based on the output of func()'''
2806 if kw.get('opts') and kw['opts'].worddiff:
2806 if kw.get('opts') and kw['opts'].worddiff:
2807 dodiffhunk = diffsinglehunkinline
2807 dodiffhunk = diffsinglehunkinline
2808 else:
2808 else:
2809 dodiffhunk = diffsinglehunk
2809 dodiffhunk = diffsinglehunk
2810 headprefixes = [
2810 headprefixes = [
2811 (b'diff', b'diff.diffline'),
2811 (b'diff', b'diff.diffline'),
2812 (b'copy', b'diff.extended'),
2812 (b'copy', b'diff.extended'),
2813 (b'rename', b'diff.extended'),
2813 (b'rename', b'diff.extended'),
2814 (b'old', b'diff.extended'),
2814 (b'old', b'diff.extended'),
2815 (b'new', b'diff.extended'),
2815 (b'new', b'diff.extended'),
2816 (b'deleted', b'diff.extended'),
2816 (b'deleted', b'diff.extended'),
2817 (b'index', b'diff.extended'),
2817 (b'index', b'diff.extended'),
2818 (b'similarity', b'diff.extended'),
2818 (b'similarity', b'diff.extended'),
2819 (b'---', b'diff.file_a'),
2819 (b'---', b'diff.file_a'),
2820 (b'+++', b'diff.file_b'),
2820 (b'+++', b'diff.file_b'),
2821 ]
2821 ]
2822 textprefixes = [
2822 textprefixes = [
2823 (b'@', b'diff.hunk'),
2823 (b'@', b'diff.hunk'),
2824 # - and + are handled by diffsinglehunk
2824 # - and + are handled by diffsinglehunk
2825 ]
2825 ]
2826 head = False
2826 head = False
2827
2827
2828 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2828 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2829 hunkbuffer = []
2829 hunkbuffer = []
2830
2830
2831 def consumehunkbuffer():
2831 def consumehunkbuffer():
2832 if hunkbuffer:
2832 if hunkbuffer:
2833 for token in dodiffhunk(hunkbuffer):
2833 for token in dodiffhunk(hunkbuffer):
2834 yield token
2834 yield token
2835 hunkbuffer[:] = []
2835 hunkbuffer[:] = []
2836
2836
2837 for chunk in func(*args, **kw):
2837 for chunk in func(*args, **kw):
2838 lines = chunk.split(b'\n')
2838 lines = chunk.split(b'\n')
2839 linecount = len(lines)
2839 linecount = len(lines)
2840 for i, line in enumerate(lines):
2840 for i, line in enumerate(lines):
2841 if head:
2841 if head:
2842 if line.startswith(b'@'):
2842 if line.startswith(b'@'):
2843 head = False
2843 head = False
2844 else:
2844 else:
2845 if line and not line.startswith(
2845 if line and not line.startswith(
2846 (b' ', b'+', b'-', b'@', b'\\')
2846 (b' ', b'+', b'-', b'@', b'\\')
2847 ):
2847 ):
2848 head = True
2848 head = True
2849 diffline = False
2849 diffline = False
2850 if not head and line and line.startswith((b'+', b'-')):
2850 if not head and line and line.startswith((b'+', b'-')):
2851 diffline = True
2851 diffline = True
2852
2852
2853 prefixes = textprefixes
2853 prefixes = textprefixes
2854 if head:
2854 if head:
2855 prefixes = headprefixes
2855 prefixes = headprefixes
2856 if diffline:
2856 if diffline:
2857 # buffered
2857 # buffered
2858 bufferedline = line
2858 bufferedline = line
2859 if i + 1 < linecount:
2859 if i + 1 < linecount:
2860 bufferedline += b"\n"
2860 bufferedline += b"\n"
2861 hunkbuffer.append(bufferedline)
2861 hunkbuffer.append(bufferedline)
2862 else:
2862 else:
2863 # unbuffered
2863 # unbuffered
2864 for token in consumehunkbuffer():
2864 for token in consumehunkbuffer():
2865 yield token
2865 yield token
2866 stripline = line.rstrip()
2866 stripline = line.rstrip()
2867 for prefix, label in prefixes:
2867 for prefix, label in prefixes:
2868 if stripline.startswith(prefix):
2868 if stripline.startswith(prefix):
2869 yield (stripline, label)
2869 yield (stripline, label)
2870 if line != stripline:
2870 if line != stripline:
2871 yield (
2871 yield (
2872 line[len(stripline) :],
2872 line[len(stripline) :],
2873 b'diff.trailingwhitespace',
2873 b'diff.trailingwhitespace',
2874 )
2874 )
2875 break
2875 break
2876 else:
2876 else:
2877 yield (line, b'')
2877 yield (line, b'')
2878 if i + 1 < linecount:
2878 if i + 1 < linecount:
2879 yield (b'\n', b'')
2879 yield (b'\n', b'')
2880 for token in consumehunkbuffer():
2880 for token in consumehunkbuffer():
2881 yield token
2881 yield token
2882
2882
2883
2883
2884 def diffui(*args, **kw):
2884 def diffui(*args, **kw):
2885 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2885 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2886 return difflabel(diff, *args, **kw)
2886 return difflabel(diff, *args, **kw)
2887
2887
2888
2888
2889 def _filepairs(modified, added, removed, copy, opts):
2889 def _filepairs(modified, added, removed, copy, opts):
2890 """generates tuples (f1, f2, copyop), where f1 is the name of the file
2890 """generates tuples (f1, f2, copyop), where f1 is the name of the file
2891 before and f2 is the the name after. For added files, f1 will be None,
2891 before and f2 is the the name after. For added files, f1 will be None,
2892 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2892 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2893 or 'rename' (the latter two only if opts.git is set)."""
2893 or 'rename' (the latter two only if opts.git is set)."""
2894 gone = set()
2894 gone = set()
2895
2895
2896 copyto = {v: k for k, v in copy.items()}
2896 copyto = {v: k for k, v in copy.items()}
2897
2897
2898 addedset, removedset = set(added), set(removed)
2898 addedset, removedset = set(added), set(removed)
2899
2899
2900 for f in sorted(modified + added + removed):
2900 for f in sorted(modified + added + removed):
2901 copyop = None
2901 copyop = None
2902 f1, f2 = f, f
2902 f1, f2 = f, f
2903 if f in addedset:
2903 if f in addedset:
2904 f1 = None
2904 f1 = None
2905 if f in copy:
2905 if f in copy:
2906 if opts.git:
2906 if opts.git:
2907 f1 = copy[f]
2907 f1 = copy[f]
2908 if f1 in removedset and f1 not in gone:
2908 if f1 in removedset and f1 not in gone:
2909 copyop = b'rename'
2909 copyop = b'rename'
2910 gone.add(f1)
2910 gone.add(f1)
2911 else:
2911 else:
2912 copyop = b'copy'
2912 copyop = b'copy'
2913 elif f in removedset:
2913 elif f in removedset:
2914 f2 = None
2914 f2 = None
2915 if opts.git:
2915 if opts.git:
2916 # have we already reported a copy above?
2916 # have we already reported a copy above?
2917 if (
2917 if (
2918 f in copyto
2918 f in copyto
2919 and copyto[f] in addedset
2919 and copyto[f] in addedset
2920 and copy[copyto[f]] == f
2920 and copy[copyto[f]] == f
2921 ):
2921 ):
2922 continue
2922 continue
2923 yield f1, f2, copyop
2923 yield f1, f2, copyop
2924
2924
2925
2925
2926 def _gitindex(text):
2926 def _gitindex(text):
2927 if not text:
2927 if not text:
2928 text = b""
2928 text = b""
2929 l = len(text)
2929 l = len(text)
2930 s = hashutil.sha1(b'blob %d\0' % l)
2930 s = hashutil.sha1(b'blob %d\0' % l)
2931 s.update(text)
2931 s.update(text)
2932 return hex(s.digest())
2932 return hex(s.digest())
2933
2933
2934
2934
2935 _gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'}
2935 _gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'}
2936
2936
2937
2937
2938 def trydiff(
2938 def trydiff(
2939 repo,
2939 repo,
2940 revs,
2940 revs,
2941 ctx1,
2941 ctx1,
2942 ctx2,
2942 ctx2,
2943 modified,
2943 modified,
2944 added,
2944 added,
2945 removed,
2945 removed,
2946 copy,
2946 copy,
2947 getfilectx,
2947 getfilectx,
2948 opts,
2948 opts,
2949 losedatafn,
2949 losedatafn,
2950 pathfn,
2950 pathfn,
2951 ):
2951 ):
2952 """given input data, generate a diff and yield it in blocks
2952 """given input data, generate a diff and yield it in blocks
2953
2953
2954 If generating a diff would lose data like flags or binary data and
2954 If generating a diff would lose data like flags or binary data and
2955 losedatafn is not None, it will be called.
2955 losedatafn is not None, it will be called.
2956
2956
2957 pathfn is applied to every path in the diff output.
2957 pathfn is applied to every path in the diff output.
2958 """
2958 """
2959
2959
2960 if opts.noprefix:
2960 if opts.noprefix:
2961 aprefix = bprefix = b''
2961 aprefix = bprefix = b''
2962 else:
2962 else:
2963 aprefix = b'a/'
2963 aprefix = b'a/'
2964 bprefix = b'b/'
2964 bprefix = b'b/'
2965
2965
2966 def diffline(f, revs):
2966 def diffline(f, revs):
2967 revinfo = b' '.join([b"-r %s" % rev for rev in revs])
2967 revinfo = b' '.join([b"-r %s" % rev for rev in revs])
2968 return b'diff %s %s' % (revinfo, f)
2968 return b'diff %s %s' % (revinfo, f)
2969
2969
2970 def isempty(fctx):
2970 def isempty(fctx):
2971 return fctx is None or fctx.size() == 0
2971 return fctx is None or fctx.size() == 0
2972
2972
2973 date1 = dateutil.datestr(ctx1.date())
2973 date1 = dateutil.datestr(ctx1.date())
2974 date2 = dateutil.datestr(ctx2.date())
2974 date2 = dateutil.datestr(ctx2.date())
2975
2975
2976 if not pathfn:
2976 if not pathfn:
2977 pathfn = lambda f: f
2977 pathfn = lambda f: f
2978
2978
2979 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2979 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2980 content1 = None
2980 content1 = None
2981 content2 = None
2981 content2 = None
2982 fctx1 = None
2982 fctx1 = None
2983 fctx2 = None
2983 fctx2 = None
2984 flag1 = None
2984 flag1 = None
2985 flag2 = None
2985 flag2 = None
2986 if f1:
2986 if f1:
2987 fctx1 = getfilectx(f1, ctx1)
2987 fctx1 = getfilectx(f1, ctx1)
2988 if opts.git or losedatafn:
2988 if opts.git or losedatafn:
2989 flag1 = ctx1.flags(f1)
2989 flag1 = ctx1.flags(f1)
2990 if f2:
2990 if f2:
2991 fctx2 = getfilectx(f2, ctx2)
2991 fctx2 = getfilectx(f2, ctx2)
2992 if opts.git or losedatafn:
2992 if opts.git or losedatafn:
2993 flag2 = ctx2.flags(f2)
2993 flag2 = ctx2.flags(f2)
2994 # if binary is True, output "summary" or "base85", but not "text diff"
2994 # if binary is True, output "summary" or "base85", but not "text diff"
2995 if opts.text:
2995 if opts.text:
2996 binary = False
2996 binary = False
2997 else:
2997 else:
2998 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2998 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2999
2999
3000 if losedatafn and not opts.git:
3000 if losedatafn and not opts.git:
3001 if (
3001 if (
3002 binary
3002 binary
3003 or
3003 or
3004 # copy/rename
3004 # copy/rename
3005 f2 in copy
3005 f2 in copy
3006 or
3006 or
3007 # empty file creation
3007 # empty file creation
3008 (not f1 and isempty(fctx2))
3008 (not f1 and isempty(fctx2))
3009 or
3009 or
3010 # empty file deletion
3010 # empty file deletion
3011 (isempty(fctx1) and not f2)
3011 (isempty(fctx1) and not f2)
3012 or
3012 or
3013 # create with flags
3013 # create with flags
3014 (not f1 and flag2)
3014 (not f1 and flag2)
3015 or
3015 or
3016 # change flags
3016 # change flags
3017 (f1 and f2 and flag1 != flag2)
3017 (f1 and f2 and flag1 != flag2)
3018 ):
3018 ):
3019 losedatafn(f2 or f1)
3019 losedatafn(f2 or f1)
3020
3020
3021 path1 = pathfn(f1 or f2)
3021 path1 = pathfn(f1 or f2)
3022 path2 = pathfn(f2 or f1)
3022 path2 = pathfn(f2 or f1)
3023 header = []
3023 header = []
3024 if opts.git:
3024 if opts.git:
3025 header.append(
3025 header.append(
3026 b'diff --git %s%s %s%s' % (aprefix, path1, bprefix, path2)
3026 b'diff --git %s%s %s%s' % (aprefix, path1, bprefix, path2)
3027 )
3027 )
3028 if not f1: # added
3028 if not f1: # added
3029 header.append(b'new file mode %s' % _gitmode[flag2])
3029 header.append(b'new file mode %s' % _gitmode[flag2])
3030 elif not f2: # removed
3030 elif not f2: # removed
3031 header.append(b'deleted file mode %s' % _gitmode[flag1])
3031 header.append(b'deleted file mode %s' % _gitmode[flag1])
3032 else: # modified/copied/renamed
3032 else: # modified/copied/renamed
3033 mode1, mode2 = _gitmode[flag1], _gitmode[flag2]
3033 mode1, mode2 = _gitmode[flag1], _gitmode[flag2]
3034 if mode1 != mode2:
3034 if mode1 != mode2:
3035 header.append(b'old mode %s' % mode1)
3035 header.append(b'old mode %s' % mode1)
3036 header.append(b'new mode %s' % mode2)
3036 header.append(b'new mode %s' % mode2)
3037 if copyop is not None:
3037 if copyop is not None:
3038 if opts.showsimilarity:
3038 if opts.showsimilarity:
3039 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
3039 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
3040 header.append(b'similarity index %d%%' % sim)
3040 header.append(b'similarity index %d%%' % sim)
3041 header.append(b'%s from %s' % (copyop, path1))
3041 header.append(b'%s from %s' % (copyop, path1))
3042 header.append(b'%s to %s' % (copyop, path2))
3042 header.append(b'%s to %s' % (copyop, path2))
3043 elif revs:
3043 elif revs:
3044 header.append(diffline(path1, revs))
3044 header.append(diffline(path1, revs))
3045
3045
3046 # fctx.is | diffopts | what to | is fctx.data()
3046 # fctx.is | diffopts | what to | is fctx.data()
3047 # binary() | text nobinary git index | output? | outputted?
3047 # binary() | text nobinary git index | output? | outputted?
3048 # ------------------------------------|----------------------------
3048 # ------------------------------------|----------------------------
3049 # yes | no no no * | summary | no
3049 # yes | no no no * | summary | no
3050 # yes | no no yes * | base85 | yes
3050 # yes | no no yes * | base85 | yes
3051 # yes | no yes no * | summary | no
3051 # yes | no yes no * | summary | no
3052 # yes | no yes yes 0 | summary | no
3052 # yes | no yes yes 0 | summary | no
3053 # yes | no yes yes >0 | summary | semi [1]
3053 # yes | no yes yes >0 | summary | semi [1]
3054 # yes | yes * * * | text diff | yes
3054 # yes | yes * * * | text diff | yes
3055 # no | * * * * | text diff | yes
3055 # no | * * * * | text diff | yes
3056 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
3056 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
3057 if binary and (
3057 if binary and (
3058 not opts.git or (opts.git and opts.nobinary and not opts.index)
3058 not opts.git or (opts.git and opts.nobinary and not opts.index)
3059 ):
3059 ):
3060 # fast path: no binary content will be displayed, content1 and
3060 # fast path: no binary content will be displayed, content1 and
3061 # content2 are only used for equivalent test. cmp() could have a
3061 # content2 are only used for equivalent test. cmp() could have a
3062 # fast path.
3062 # fast path.
3063 if fctx1 is not None:
3063 if fctx1 is not None:
3064 content1 = b'\0'
3064 content1 = b'\0'
3065 if fctx2 is not None:
3065 if fctx2 is not None:
3066 if fctx1 is not None and not fctx1.cmp(fctx2):
3066 if fctx1 is not None and not fctx1.cmp(fctx2):
3067 content2 = b'\0' # not different
3067 content2 = b'\0' # not different
3068 else:
3068 else:
3069 content2 = b'\0\0'
3069 content2 = b'\0\0'
3070 else:
3070 else:
3071 # normal path: load contents
3071 # normal path: load contents
3072 if fctx1 is not None:
3072 if fctx1 is not None:
3073 content1 = fctx1.data()
3073 content1 = fctx1.data()
3074 if fctx2 is not None:
3074 if fctx2 is not None:
3075 content2 = fctx2.data()
3075 content2 = fctx2.data()
3076
3076
3077 data1 = (ctx1, fctx1, path1, flag1, content1, date1)
3077 data1 = (ctx1, fctx1, path1, flag1, content1, date1)
3078 data2 = (ctx2, fctx2, path2, flag2, content2, date2)
3078 data2 = (ctx2, fctx2, path2, flag2, content2, date2)
3079 yield diffcontent(data1, data2, header, binary, opts)
3079 yield diffcontent(data1, data2, header, binary, opts)
3080
3080
3081
3081
3082 def diffcontent(data1, data2, header, binary, opts):
3082 def diffcontent(data1, data2, header, binary, opts):
3083 """diffs two versions of a file.
3083 """diffs two versions of a file.
3084
3084
3085 data1 and data2 are tuples containg:
3085 data1 and data2 are tuples containg:
3086
3086
3087 * ctx: changeset for the file
3087 * ctx: changeset for the file
3088 * fctx: file context for that file
3088 * fctx: file context for that file
3089 * path1: name of the file
3089 * path1: name of the file
3090 * flag: flags of the file
3090 * flag: flags of the file
3091 * content: full content of the file (can be null in case of binary)
3091 * content: full content of the file (can be null in case of binary)
3092 * date: date of the changeset
3092 * date: date of the changeset
3093
3093
3094 header: the patch header
3094 header: the patch header
3095 binary: whether the any of the version of file is binary or not
3095 binary: whether the any of the version of file is binary or not
3096 opts: user passed options
3096 opts: user passed options
3097
3097
3098 It exists as a separate function so that extensions like extdiff can wrap
3098 It exists as a separate function so that extensions like extdiff can wrap
3099 it and use the file content directly.
3099 it and use the file content directly.
3100 """
3100 """
3101
3101
3102 ctx1, fctx1, path1, flag1, content1, date1 = data1
3102 ctx1, fctx1, path1, flag1, content1, date1 = data1
3103 ctx2, fctx2, path2, flag2, content2, date2 = data2
3103 ctx2, fctx2, path2, flag2, content2, date2 = data2
3104 index1 = _gitindex(content1) if path1 in ctx1 else sha1nodeconstants.nullhex
3104 index1 = _gitindex(content1) if path1 in ctx1 else sha1nodeconstants.nullhex
3105 index2 = _gitindex(content2) if path2 in ctx2 else sha1nodeconstants.nullhex
3105 index2 = _gitindex(content2) if path2 in ctx2 else sha1nodeconstants.nullhex
3106 if binary and opts.git and not opts.nobinary:
3106 if binary and opts.git and not opts.nobinary:
3107 text = mdiff.b85diff(content1, content2)
3107 text = mdiff.b85diff(content1, content2)
3108 if text:
3108 if text:
3109 header.append(b'index %s..%s' % (index1, index2))
3109 header.append(b'index %s..%s' % (index1, index2))
3110 hunks = ((None, [text]),)
3110 hunks = ((None, [text]),)
3111 else:
3111 else:
3112 if opts.git and opts.index > 0:
3112 if opts.git and opts.index > 0:
3113 flag = flag1
3113 flag = flag1
3114 if flag is None:
3114 if flag is None:
3115 flag = flag2
3115 flag = flag2
3116 header.append(
3116 header.append(
3117 b'index %s..%s %s'
3117 b'index %s..%s %s'
3118 % (
3118 % (
3119 index1[0 : opts.index],
3119 index1[0 : opts.index],
3120 index2[0 : opts.index],
3120 index2[0 : opts.index],
3121 _gitmode[flag],
3121 _gitmode[flag],
3122 )
3122 )
3123 )
3123 )
3124
3124
3125 uheaders, hunks = mdiff.unidiff(
3125 uheaders, hunks = mdiff.unidiff(
3126 content1,
3126 content1,
3127 date1,
3127 date1,
3128 content2,
3128 content2,
3129 date2,
3129 date2,
3130 path1,
3130 path1,
3131 path2,
3131 path2,
3132 binary=binary,
3132 binary=binary,
3133 opts=opts,
3133 opts=opts,
3134 )
3134 )
3135 header.extend(uheaders)
3135 header.extend(uheaders)
3136 return fctx1, fctx2, header, hunks
3136 return fctx1, fctx2, header, hunks
3137
3137
3138
3138
3139 def diffstatsum(stats):
3139 def diffstatsum(stats):
3140 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
3140 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
3141 for f, a, r, b in stats:
3141 for f, a, r, b in stats:
3142 maxfile = max(maxfile, encoding.colwidth(f))
3142 maxfile = max(maxfile, encoding.colwidth(f))
3143 maxtotal = max(maxtotal, a + r)
3143 maxtotal = max(maxtotal, a + r)
3144 addtotal += a
3144 addtotal += a
3145 removetotal += r
3145 removetotal += r
3146 binary = binary or b
3146 binary = binary or b
3147
3147
3148 return maxfile, maxtotal, addtotal, removetotal, binary
3148 return maxfile, maxtotal, addtotal, removetotal, binary
3149
3149
3150
3150
3151 def diffstatdata(lines):
3151 def diffstatdata(lines):
3152 diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$')
3152 diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$')
3153
3153
3154 results = []
3154 results = []
3155 filename, adds, removes, isbinary = None, 0, 0, False
3155 filename, adds, removes, isbinary = None, 0, 0, False
3156
3156
3157 def addresult():
3157 def addresult():
3158 if filename:
3158 if filename:
3159 results.append((filename, adds, removes, isbinary))
3159 results.append((filename, adds, removes, isbinary))
3160
3160
3161 # inheader is used to track if a line is in the
3161 # inheader is used to track if a line is in the
3162 # header portion of the diff. This helps properly account
3162 # header portion of the diff. This helps properly account
3163 # for lines that start with '--' or '++'
3163 # for lines that start with '--' or '++'
3164 inheader = False
3164 inheader = False
3165
3165
3166 for line in lines:
3166 for line in lines:
3167 if line.startswith(b'diff'):
3167 if line.startswith(b'diff'):
3168 addresult()
3168 addresult()
3169 # starting a new file diff
3169 # starting a new file diff
3170 # set numbers to 0 and reset inheader
3170 # set numbers to 0 and reset inheader
3171 inheader = True
3171 inheader = True
3172 adds, removes, isbinary = 0, 0, False
3172 adds, removes, isbinary = 0, 0, False
3173 if line.startswith(b'diff --git a/'):
3173 if line.startswith(b'diff --git a/'):
3174 filename = gitre.search(line).group(2)
3174 filename = gitre.search(line).group(2)
3175 elif line.startswith(b'diff -r'):
3175 elif line.startswith(b'diff -r'):
3176 # format: "diff -r ... -r ... filename"
3176 # format: "diff -r ... -r ... filename"
3177 filename = diffre.search(line).group(1)
3177 filename = diffre.search(line).group(1)
3178 elif line.startswith(b'@@'):
3178 elif line.startswith(b'@@'):
3179 inheader = False
3179 inheader = False
3180 elif line.startswith(b'+') and not inheader:
3180 elif line.startswith(b'+') and not inheader:
3181 adds += 1
3181 adds += 1
3182 elif line.startswith(b'-') and not inheader:
3182 elif line.startswith(b'-') and not inheader:
3183 removes += 1
3183 removes += 1
3184 elif line.startswith(b'GIT binary patch') or line.startswith(
3184 elif line.startswith(b'GIT binary patch') or line.startswith(
3185 b'Binary file'
3185 b'Binary file'
3186 ):
3186 ):
3187 isbinary = True
3187 isbinary = True
3188 elif line.startswith(b'rename from'):
3188 elif line.startswith(b'rename from'):
3189 filename = line[12:]
3189 filename = line[12:]
3190 elif line.startswith(b'rename to'):
3190 elif line.startswith(b'rename to'):
3191 filename += b' => %s' % line[10:]
3191 filename += b' => %s' % line[10:]
3192 addresult()
3192 addresult()
3193 return results
3193 return results
3194
3194
3195
3195
3196 def diffstat(lines, width=80):
3196 def diffstat(lines, width=80):
3197 output = []
3197 output = []
3198 stats = diffstatdata(lines)
3198 stats = diffstatdata(lines)
3199 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
3199 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
3200
3200
3201 countwidth = len(str(maxtotal))
3201 countwidth = len(str(maxtotal))
3202 if hasbinary and countwidth < 3:
3202 if hasbinary and countwidth < 3:
3203 countwidth = 3
3203 countwidth = 3
3204 graphwidth = width - countwidth - maxname - 6
3204 graphwidth = width - countwidth - maxname - 6
3205 if graphwidth < 10:
3205 if graphwidth < 10:
3206 graphwidth = 10
3206 graphwidth = 10
3207
3207
3208 def scale(i):
3208 def scale(i):
3209 if maxtotal <= graphwidth:
3209 if maxtotal <= graphwidth:
3210 return i
3210 return i
3211 # If diffstat runs out of room it doesn't print anything,
3211 # If diffstat runs out of room it doesn't print anything,
3212 # which isn't very useful, so always print at least one + or -
3212 # which isn't very useful, so always print at least one + or -
3213 # if there were at least some changes.
3213 # if there were at least some changes.
3214 return max(i * graphwidth // maxtotal, int(bool(i)))
3214 return max(i * graphwidth // maxtotal, int(bool(i)))
3215
3215
3216 for filename, adds, removes, isbinary in stats:
3216 for filename, adds, removes, isbinary in stats:
3217 if isbinary:
3217 if isbinary:
3218 count = b'Bin'
3218 count = b'Bin'
3219 else:
3219 else:
3220 count = b'%d' % (adds + removes)
3220 count = b'%d' % (adds + removes)
3221 pluses = b'+' * scale(adds)
3221 pluses = b'+' * scale(adds)
3222 minuses = b'-' * scale(removes)
3222 minuses = b'-' * scale(removes)
3223 output.append(
3223 output.append(
3224 b' %s%s | %*s %s%s\n'
3224 b' %s%s | %*s %s%s\n'
3225 % (
3225 % (
3226 filename,
3226 filename,
3227 b' ' * (maxname - encoding.colwidth(filename)),
3227 b' ' * (maxname - encoding.colwidth(filename)),
3228 countwidth,
3228 countwidth,
3229 count,
3229 count,
3230 pluses,
3230 pluses,
3231 minuses,
3231 minuses,
3232 )
3232 )
3233 )
3233 )
3234
3234
3235 if stats:
3235 if stats:
3236 output.append(
3236 output.append(
3237 _(b' %d files changed, %d insertions(+), %d deletions(-)\n')
3237 _(b' %d files changed, %d insertions(+), %d deletions(-)\n')
3238 % (len(stats), totaladds, totalremoves)
3238 % (len(stats), totaladds, totalremoves)
3239 )
3239 )
3240
3240
3241 return b''.join(output)
3241 return b''.join(output)
3242
3242
3243
3243
3244 def diffstatui(*args, **kw):
3244 def diffstatui(*args, **kw):
3245 """like diffstat(), but yields 2-tuples of (output, label) for
3245 """like diffstat(), but yields 2-tuples of (output, label) for
3246 ui.write()
3246 ui.write()
3247 """
3247 """
3248
3248
3249 for line in diffstat(*args, **kw).splitlines():
3249 for line in diffstat(*args, **kw).splitlines():
3250 if line and line[-1] in b'+-':
3250 if line and line[-1] in b'+-':
3251 name, graph = line.rsplit(b' ', 1)
3251 name, graph = line.rsplit(b' ', 1)
3252 yield (name + b' ', b'')
3252 yield (name + b' ', b'')
3253 m = re.search(br'\++', graph)
3253 m = re.search(br'\++', graph)
3254 if m:
3254 if m:
3255 yield (m.group(0), b'diffstat.inserted')
3255 yield (m.group(0), b'diffstat.inserted')
3256 m = re.search(br'-+', graph)
3256 m = re.search(br'-+', graph)
3257 if m:
3257 if m:
3258 yield (m.group(0), b'diffstat.deleted')
3258 yield (m.group(0), b'diffstat.deleted')
3259 else:
3259 else:
3260 yield (line, b'')
3260 yield (line, b'')
3261 yield (b'\n', b'')
3261 yield (b'\n', b'')
@@ -1,849 +1,849 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import errno
9 import errno
10 import functools
10 import functools
11 import os
11 import os
12 import re
12 import re
13 import stat
13 import stat
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .node import hex
17 from .node import hex
18 from . import (
18 from . import (
19 changelog,
19 changelog,
20 error,
20 error,
21 manifest,
21 manifest,
22 policy,
22 policy,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 vfs as vfsmod,
25 vfs as vfsmod,
26 )
26 )
27 from .utils import hashutil
27 from .utils import hashutil
28
28
29 parsers = policy.importmod('parsers')
29 parsers = policy.importmod('parsers')
30 # how much bytes should be read from fncache in one read
30 # how much bytes should be read from fncache in one read
31 # It is done to prevent loading large fncache files into memory
31 # It is done to prevent loading large fncache files into memory
32 fncache_chunksize = 10 ** 6
32 fncache_chunksize = 10 ** 6
33
33
34
34
35 def _matchtrackedpath(path, matcher):
35 def _matchtrackedpath(path, matcher):
36 """parses a fncache entry and returns whether the entry is tracking a path
36 """parses a fncache entry and returns whether the entry is tracking a path
37 matched by matcher or not.
37 matched by matcher or not.
38
38
39 If matcher is None, returns True"""
39 If matcher is None, returns True"""
40
40
41 if matcher is None:
41 if matcher is None:
42 return True
42 return True
43 path = decodedir(path)
43 path = decodedir(path)
44 if path.startswith(b'data/'):
44 if path.startswith(b'data/'):
45 return matcher(path[len(b'data/') : -len(b'.i')])
45 return matcher(path[len(b'data/') : -len(b'.i')])
46 elif path.startswith(b'meta/'):
46 elif path.startswith(b'meta/'):
47 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
47 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48
48
49 raise error.ProgrammingError(b"cannot decode path %s" % path)
49 raise error.ProgrammingError(b"cannot decode path %s" % path)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 """
55 """
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
64 """
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 """
76 """
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 """
83 """
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 """characters that are problematic for filesystems
94 """characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 """
101 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 """
112 """
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 """
134 """
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in cmap.items():
147 for k, v in cmap.items():
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in pycompat.xrange(1, 4):
153 for l in pycompat.xrange(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join(
164 lambda s: b''.join(
165 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
165 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
166 ),
166 ),
167 lambda s: b''.join(list(decode(s))),
167 lambda s: b''.join(list(decode(s))),
168 )
168 )
169
169
170
170
171 _encodefname, _decodefname = _buildencodefun()
171 _encodefname, _decodefname = _buildencodefun()
172
172
173
173
174 def encodefilename(s):
174 def encodefilename(s):
175 """
175 """
176 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
177 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
178 """
178 """
179 return _encodefname(encodedir(s))
179 return _encodefname(encodedir(s))
180
180
181
181
182 def decodefilename(s):
182 def decodefilename(s):
183 """
183 """
184 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
185 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
186 """
186 """
187 return decodedir(_decodefname(s))
187 return decodedir(_decodefname(s))
188
188
189
189
190 def _buildlowerencodefun():
190 def _buildlowerencodefun():
191 """
191 """
192 >>> f = _buildlowerencodefun()
192 >>> f = _buildlowerencodefun()
193 >>> f(b'nothing/special.txt')
193 >>> f(b'nothing/special.txt')
194 'nothing/special.txt'
194 'nothing/special.txt'
195 >>> f(b'HELLO')
195 >>> f(b'HELLO')
196 'hello'
196 'hello'
197 >>> f(b'hello:world?')
197 >>> f(b'hello:world?')
198 'hello~3aworld~3f'
198 'hello~3aworld~3f'
199 >>> f(b'the\\x07quick\\xADshot')
199 >>> f(b'the\\x07quick\\xADshot')
200 'the~07quick~adshot'
200 'the~07quick~adshot'
201 """
201 """
202 xchr = pycompat.bytechr
202 xchr = pycompat.bytechr
203 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
203 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
204 for x in _reserved():
204 for x in _reserved():
205 cmap[xchr(x)] = b"~%02x" % x
205 cmap[xchr(x)] = b"~%02x" % x
206 for x in range(ord(b"A"), ord(b"Z") + 1):
206 for x in range(ord(b"A"), ord(b"Z") + 1):
207 cmap[xchr(x)] = xchr(x).lower()
207 cmap[xchr(x)] = xchr(x).lower()
208
208
209 def lowerencode(s):
209 def lowerencode(s):
210 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
211
211
212 return lowerencode
212 return lowerencode
213
213
214
214
215 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
216
216
217 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
218 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
219 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
220
220
221
221
222 def _auxencode(path, dotencode):
222 def _auxencode(path, dotencode):
223 """
223 """
224 Encodes filenames containing names reserved by Windows or which end in
224 Encodes filenames containing names reserved by Windows or which end in
225 period or space. Does not touch other single reserved characters c.
225 period or space. Does not touch other single reserved characters c.
226 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
227 Additionally encodes space or period at the beginning, if dotencode is
227 Additionally encodes space or period at the beginning, if dotencode is
228 True. Parameter path is assumed to be all lowercase.
228 True. Parameter path is assumed to be all lowercase.
229 A segment only needs encoding if a reserved name appears as a
229 A segment only needs encoding if a reserved name appears as a
230 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
231 doesn't need encoding.
231 doesn't need encoding.
232
232
233 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
234 >>> _auxencode(s.split(b'/'), True)
234 >>> _auxencode(s.split(b'/'), True)
235 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
236 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
237 >>> _auxencode(s.split(b'/'), False)
237 >>> _auxencode(s.split(b'/'), False)
238 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
239 >>> _auxencode([b'foo. '], True)
239 >>> _auxencode([b'foo. '], True)
240 ['foo.~20']
240 ['foo.~20']
241 >>> _auxencode([b' .foo'], True)
241 >>> _auxencode([b' .foo'], True)
242 ['~20.foo']
242 ['~20.foo']
243 """
243 """
244 for i, n in enumerate(path):
244 for i, n in enumerate(path):
245 if not n:
245 if not n:
246 continue
246 continue
247 if dotencode and n[0] in b'. ':
247 if dotencode and n[0] in b'. ':
248 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 n = b"~%02x" % ord(n[0:1]) + n[1:]
249 path[i] = n
249 path[i] = n
250 else:
250 else:
251 l = n.find(b'.')
251 l = n.find(b'.')
252 if l == -1:
252 if l == -1:
253 l = len(n)
253 l = len(n)
254 if (l == 3 and n[:3] in _winres3) or (
254 if (l == 3 and n[:3] in _winres3) or (
255 l == 4
255 l == 4
256 and n[3:4] <= b'9'
256 and n[3:4] <= b'9'
257 and n[3:4] >= b'1'
257 and n[3:4] >= b'1'
258 and n[:3] in _winres4
258 and n[:3] in _winres4
259 ):
259 ):
260 # encode third letter ('aux' -> 'au~78')
260 # encode third letter ('aux' -> 'au~78')
261 ec = b"~%02x" % ord(n[2:3])
261 ec = b"~%02x" % ord(n[2:3])
262 n = n[0:2] + ec + n[3:]
262 n = n[0:2] + ec + n[3:]
263 path[i] = n
263 path[i] = n
264 if n[-1] in b'. ':
264 if n[-1] in b'. ':
265 # encode last period or space ('foo...' -> 'foo..~2e')
265 # encode last period or space ('foo...' -> 'foo..~2e')
266 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
267 return path
267 return path
268
268
269
269
270 _maxstorepathlen = 120
270 _maxstorepathlen = 120
271 _dirprefixlen = 8
271 _dirprefixlen = 8
272 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
273
273
274
274
275 def _hashencode(path, dotencode):
275 def _hashencode(path, dotencode):
276 digest = hex(hashutil.sha1(path).digest())
276 digest = hex(hashutil.sha1(path).digest())
277 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
278 parts = _auxencode(le, dotencode)
278 parts = _auxencode(le, dotencode)
279 basename = parts[-1]
279 basename = parts[-1]
280 _root, ext = os.path.splitext(basename)
280 _root, ext = os.path.splitext(basename)
281 sdirs = []
281 sdirs = []
282 sdirslen = 0
282 sdirslen = 0
283 for p in parts[:-1]:
283 for p in parts[:-1]:
284 d = p[:_dirprefixlen]
284 d = p[:_dirprefixlen]
285 if d[-1] in b'. ':
285 if d[-1] in b'. ':
286 # Windows can't access dirs ending in period or space
286 # Windows can't access dirs ending in period or space
287 d = d[:-1] + b'_'
287 d = d[:-1] + b'_'
288 if sdirslen == 0:
288 if sdirslen == 0:
289 t = len(d)
289 t = len(d)
290 else:
290 else:
291 t = sdirslen + 1 + len(d)
291 t = sdirslen + 1 + len(d)
292 if t > _maxshortdirslen:
292 if t > _maxshortdirslen:
293 break
293 break
294 sdirs.append(d)
294 sdirs.append(d)
295 sdirslen = t
295 sdirslen = t
296 dirs = b'/'.join(sdirs)
296 dirs = b'/'.join(sdirs)
297 if len(dirs) > 0:
297 if len(dirs) > 0:
298 dirs += b'/'
298 dirs += b'/'
299 res = b'dh/' + dirs + digest + ext
299 res = b'dh/' + dirs + digest + ext
300 spaceleft = _maxstorepathlen - len(res)
300 spaceleft = _maxstorepathlen - len(res)
301 if spaceleft > 0:
301 if spaceleft > 0:
302 filler = basename[:spaceleft]
302 filler = basename[:spaceleft]
303 res = b'dh/' + dirs + filler + digest + ext
303 res = b'dh/' + dirs + filler + digest + ext
304 return res
304 return res
305
305
306
306
307 def _hybridencode(path, dotencode):
307 def _hybridencode(path, dotencode):
308 """encodes path with a length limit
308 """encodes path with a length limit
309
309
310 Encodes all paths that begin with 'data/', according to the following.
310 Encodes all paths that begin with 'data/', according to the following.
311
311
312 Default encoding (reversible):
312 Default encoding (reversible):
313
313
314 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
315 characters are encoded as '~xx', where xx is the two digit hex code
315 characters are encoded as '~xx', where xx is the two digit hex code
316 of the character (see encodefilename).
316 of the character (see encodefilename).
317 Relevant path components consisting of Windows reserved filenames are
317 Relevant path components consisting of Windows reserved filenames are
318 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
319
319
320 Hashed encoding (not reversible):
320 Hashed encoding (not reversible):
321
321
322 If the default-encoded path is longer than _maxstorepathlen, a
322 If the default-encoded path is longer than _maxstorepathlen, a
323 non-reversible hybrid hashing of the path is done instead.
323 non-reversible hybrid hashing of the path is done instead.
324 This encoding uses up to _dirprefixlen characters of all directory
324 This encoding uses up to _dirprefixlen characters of all directory
325 levels of the lowerencoded path, but not more levels than can fit into
325 levels of the lowerencoded path, but not more levels than can fit into
326 _maxshortdirslen.
326 _maxshortdirslen.
327 Then follows the filler followed by the sha digest of the full path.
327 Then follows the filler followed by the sha digest of the full path.
328 The filler is the beginning of the basename of the lowerencoded path
328 The filler is the beginning of the basename of the lowerencoded path
329 (the basename is everything after the last path separator). The filler
329 (the basename is everything after the last path separator). The filler
330 is as long as possible, filling in characters from the basename until
330 is as long as possible, filling in characters from the basename until
331 the encoded path has _maxstorepathlen characters (or all chars of the
331 the encoded path has _maxstorepathlen characters (or all chars of the
332 basename have been taken).
332 basename have been taken).
333 The extension (e.g. '.i' or '.d') is preserved.
333 The extension (e.g. '.i' or '.d') is preserved.
334
334
335 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
336 encoding was used.
336 encoding was used.
337 """
337 """
338 path = encodedir(path)
338 path = encodedir(path)
339 ef = _encodefname(path).split(b'/')
339 ef = _encodefname(path).split(b'/')
340 res = b'/'.join(_auxencode(ef, dotencode))
340 res = b'/'.join(_auxencode(ef, dotencode))
341 if len(res) > _maxstorepathlen:
341 if len(res) > _maxstorepathlen:
342 res = _hashencode(path, dotencode)
342 res = _hashencode(path, dotencode)
343 return res
343 return res
344
344
345
345
346 def _pathencode(path):
346 def _pathencode(path):
347 de = encodedir(path)
347 de = encodedir(path)
348 if len(path) > _maxstorepathlen:
348 if len(path) > _maxstorepathlen:
349 return _hashencode(de, True)
349 return _hashencode(de, True)
350 ef = _encodefname(de).split(b'/')
350 ef = _encodefname(de).split(b'/')
351 res = b'/'.join(_auxencode(ef, True))
351 res = b'/'.join(_auxencode(ef, True))
352 if len(res) > _maxstorepathlen:
352 if len(res) > _maxstorepathlen:
353 return _hashencode(de, True)
353 return _hashencode(de, True)
354 return res
354 return res
355
355
356
356
357 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 _pathencode = getattr(parsers, 'pathencode', _pathencode)
358
358
359
359
360 def _plainhybridencode(f):
360 def _plainhybridencode(f):
361 return _hybridencode(f, False)
361 return _hybridencode(f, False)
362
362
363
363
364 def _calcmode(vfs):
364 def _calcmode(vfs):
365 try:
365 try:
366 # files in .hg/ will be created using this mode
366 # files in .hg/ will be created using this mode
367 mode = vfs.stat().st_mode
367 mode = vfs.stat().st_mode
368 # avoid some useless chmods
368 # avoid some useless chmods
369 if (0o777 & ~util.umask) == (0o777 & mode):
369 if (0o777 & ~util.umask) == (0o777 & mode):
370 mode = None
370 mode = None
371 except OSError:
371 except OSError:
372 mode = None
372 mode = None
373 return mode
373 return mode
374
374
375
375
376 _data = [
376 _data = [
377 b'bookmarks',
377 b'bookmarks',
378 b'narrowspec',
378 b'narrowspec',
379 b'data',
379 b'data',
380 b'meta',
380 b'meta',
381 b'00manifest.d',
381 b'00manifest.d',
382 b'00manifest.i',
382 b'00manifest.i',
383 b'00changelog.d',
383 b'00changelog.d',
384 b'00changelog.i',
384 b'00changelog.i',
385 b'phaseroots',
385 b'phaseroots',
386 b'obsstore',
386 b'obsstore',
387 b'requires',
387 b'requires',
388 ]
388 ]
389
389
390 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
390 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
391 REVLOG_FILES_OTHER_EXT = (
391 REVLOG_FILES_OTHER_EXT = (
392 b'.idx',
392 b'.idx',
393 b'.d',
393 b'.d',
394 b'.dat',
394 b'.dat',
395 b'.n',
395 b'.n',
396 b'.nd',
396 b'.nd',
397 b'.sda',
397 b'.sda',
398 b'd.tmpcensored',
398 b'd.tmpcensored',
399 )
399 )
400 # files that are "volatile" and might change between listing and streaming
400 # files that are "volatile" and might change between listing and streaming
401 #
401 #
402 # note: the ".nd" file are nodemap data and won't "change" but they might be
402 # note: the ".nd" file are nodemap data and won't "change" but they might be
403 # deleted.
403 # deleted.
404 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
404 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
405
405
406 # some exception to the above matching
406 # some exception to the above matching
407 #
407 #
408 # XXX This is currently not in use because of issue6542
408 # XXX This is currently not in use because of issue6542
409 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
409 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
410
410
411
411
412 def is_revlog(f, kind, st):
412 def is_revlog(f, kind, st):
413 if kind != stat.S_IFREG:
413 if kind != stat.S_IFREG:
414 return None
414 return None
415 return revlog_type(f)
415 return revlog_type(f)
416
416
417
417
418 def revlog_type(f):
418 def revlog_type(f):
419 # XXX we need to filter `undo.` created by the transaction here, however
419 # XXX we need to filter `undo.` created by the transaction here, however
420 # being naive about it also filter revlog for `undo.*` files, leading to
420 # being naive about it also filter revlog for `undo.*` files, leading to
421 # issue6542. So we no longer use EXCLUDED.
421 # issue6542. So we no longer use EXCLUDED.
422 if f.endswith(REVLOG_FILES_MAIN_EXT):
422 if f.endswith(REVLOG_FILES_MAIN_EXT):
423 return FILEFLAGS_REVLOG_MAIN
423 return FILEFLAGS_REVLOG_MAIN
424 elif f.endswith(REVLOG_FILES_OTHER_EXT):
424 elif f.endswith(REVLOG_FILES_OTHER_EXT):
425 t = FILETYPE_FILELOG_OTHER
425 t = FILETYPE_FILELOG_OTHER
426 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
426 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
427 t |= FILEFLAGS_VOLATILE
427 t |= FILEFLAGS_VOLATILE
428 return t
428 return t
429 return None
429 return None
430
430
431
431
432 # the file is part of changelog data
432 # the file is part of changelog data
433 FILEFLAGS_CHANGELOG = 1 << 13
433 FILEFLAGS_CHANGELOG = 1 << 13
434 # the file is part of manifest data
434 # the file is part of manifest data
435 FILEFLAGS_MANIFESTLOG = 1 << 12
435 FILEFLAGS_MANIFESTLOG = 1 << 12
436 # the file is part of filelog data
436 # the file is part of filelog data
437 FILEFLAGS_FILELOG = 1 << 11
437 FILEFLAGS_FILELOG = 1 << 11
438 # file that are not directly part of a revlog
438 # file that are not directly part of a revlog
439 FILEFLAGS_OTHER = 1 << 10
439 FILEFLAGS_OTHER = 1 << 10
440
440
441 # the main entry point for a revlog
441 # the main entry point for a revlog
442 FILEFLAGS_REVLOG_MAIN = 1 << 1
442 FILEFLAGS_REVLOG_MAIN = 1 << 1
443 # a secondary file for a revlog
443 # a secondary file for a revlog
444 FILEFLAGS_REVLOG_OTHER = 1 << 0
444 FILEFLAGS_REVLOG_OTHER = 1 << 0
445
445
446 # files that are "volatile" and might change between listing and streaming
446 # files that are "volatile" and might change between listing and streaming
447 FILEFLAGS_VOLATILE = 1 << 20
447 FILEFLAGS_VOLATILE = 1 << 20
448
448
449 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
449 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
450 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_OTHER = FILEFLAGS_OTHER
455 FILETYPE_OTHER = FILEFLAGS_OTHER
456
456
457
457
458 class basicstore(object):
458 class basicstore(object):
459 '''base class for local repository stores'''
459 '''base class for local repository stores'''
460
460
461 def __init__(self, path, vfstype):
461 def __init__(self, path, vfstype):
462 vfs = vfstype(path)
462 vfs = vfstype(path)
463 self.path = vfs.base
463 self.path = vfs.base
464 self.createmode = _calcmode(vfs)
464 self.createmode = _calcmode(vfs)
465 vfs.createmode = self.createmode
465 vfs.createmode = self.createmode
466 self.rawvfs = vfs
466 self.rawvfs = vfs
467 self.vfs = vfsmod.filtervfs(vfs, encodedir)
467 self.vfs = vfsmod.filtervfs(vfs, encodedir)
468 self.opener = self.vfs
468 self.opener = self.vfs
469
469
470 def join(self, f):
470 def join(self, f):
471 return self.path + b'/' + encodedir(f)
471 return self.path + b'/' + encodedir(f)
472
472
473 def _walk(self, relpath, recurse):
473 def _walk(self, relpath, recurse):
474 '''yields (revlog_type, unencoded, size)'''
474 '''yields (revlog_type, unencoded, size)'''
475 path = self.path
475 path = self.path
476 if relpath:
476 if relpath:
477 path += b'/' + relpath
477 path += b'/' + relpath
478 striplen = len(self.path) + 1
478 striplen = len(self.path) + 1
479 l = []
479 l = []
480 if self.rawvfs.isdir(path):
480 if self.rawvfs.isdir(path):
481 visit = [path]
481 visit = [path]
482 readdir = self.rawvfs.readdir
482 readdir = self.rawvfs.readdir
483 while visit:
483 while visit:
484 p = visit.pop()
484 p = visit.pop()
485 for f, kind, st in readdir(p, stat=True):
485 for f, kind, st in readdir(p, stat=True):
486 fp = p + b'/' + f
486 fp = p + b'/' + f
487 rl_type = is_revlog(f, kind, st)
487 rl_type = is_revlog(f, kind, st)
488 if rl_type is not None:
488 if rl_type is not None:
489 n = util.pconvert(fp[striplen:])
489 n = util.pconvert(fp[striplen:])
490 l.append((rl_type, decodedir(n), st.st_size))
490 l.append((rl_type, decodedir(n), st.st_size))
491 elif kind == stat.S_IFDIR and recurse:
491 elif kind == stat.S_IFDIR and recurse:
492 visit.append(fp)
492 visit.append(fp)
493 l.sort()
493 l.sort()
494 return l
494 return l
495
495
496 def changelog(self, trypending, concurrencychecker=None):
496 def changelog(self, trypending, concurrencychecker=None):
497 return changelog.changelog(
497 return changelog.changelog(
498 self.vfs,
498 self.vfs,
499 trypending=trypending,
499 trypending=trypending,
500 concurrencychecker=concurrencychecker,
500 concurrencychecker=concurrencychecker,
501 )
501 )
502
502
503 def manifestlog(self, repo, storenarrowmatch):
503 def manifestlog(self, repo, storenarrowmatch):
504 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
504 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
505 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
505 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
506
506
507 def datafiles(self, matcher=None, undecodable=None):
507 def datafiles(self, matcher=None, undecodable=None):
508 """Like walk, but excluding the changelog and root manifest.
508 """Like walk, but excluding the changelog and root manifest.
509
509
510 When [undecodable] is None, revlogs names that can't be
510 When [undecodable] is None, revlogs names that can't be
511 decoded cause an exception. When it is provided, it should
511 decoded cause an exception. When it is provided, it should
512 be a list and the filenames that can't be decoded are added
512 be a list and the filenames that can't be decoded are added
513 to it instead. This is very rarely needed."""
513 to it instead. This is very rarely needed."""
514 files = self._walk(b'data', True) + self._walk(b'meta', True)
514 files = self._walk(b'data', True) + self._walk(b'meta', True)
515 for (t, u, s) in files:
515 for (t, u, s) in files:
516 yield (FILEFLAGS_FILELOG | t, u, s)
516 yield (FILEFLAGS_FILELOG | t, u, s)
517
517
518 def topfiles(self):
518 def topfiles(self):
519 # yield manifest before changelog
519 # yield manifest before changelog
520 files = reversed(self._walk(b'', False))
520 files = reversed(self._walk(b'', False))
521 for (t, u, s) in files:
521 for (t, u, s) in files:
522 if u.startswith(b'00changelog'):
522 if u.startswith(b'00changelog'):
523 yield (FILEFLAGS_CHANGELOG | t, u, s)
523 yield (FILEFLAGS_CHANGELOG | t, u, s)
524 elif u.startswith(b'00manifest'):
524 elif u.startswith(b'00manifest'):
525 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
525 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
526 else:
526 else:
527 yield (FILETYPE_OTHER | t, u, s)
527 yield (FILETYPE_OTHER | t, u, s)
528
528
529 def walk(self, matcher=None):
529 def walk(self, matcher=None):
530 """return file related to data storage (ie: revlogs)
530 """return file related to data storage (ie: revlogs)
531
531
532 yields (file_type, unencoded, size)
532 yields (file_type, unencoded, size)
533
533
534 if a matcher is passed, storage files of only those tracked paths
534 if a matcher is passed, storage files of only those tracked paths
535 are passed with matches the matcher
535 are passed with matches the matcher
536 """
536 """
537 # yield data files first
537 # yield data files first
538 for x in self.datafiles(matcher):
538 for x in self.datafiles(matcher):
539 yield x
539 yield x
540 for x in self.topfiles():
540 for x in self.topfiles():
541 yield x
541 yield x
542
542
543 def copylist(self):
543 def copylist(self):
544 return _data
544 return _data
545
545
546 def write(self, tr):
546 def write(self, tr):
547 pass
547 pass
548
548
549 def invalidatecaches(self):
549 def invalidatecaches(self):
550 pass
550 pass
551
551
552 def markremoved(self, fn):
552 def markremoved(self, fn):
553 pass
553 pass
554
554
555 def __contains__(self, path):
555 def __contains__(self, path):
556 '''Checks if the store contains path'''
556 '''Checks if the store contains path'''
557 path = b"/".join((b"data", path))
557 path = b"/".join((b"data", path))
558 # file?
558 # file?
559 if self.vfs.exists(path + b".i"):
559 if self.vfs.exists(path + b".i"):
560 return True
560 return True
561 # dir?
561 # dir?
562 if not path.endswith(b"/"):
562 if not path.endswith(b"/"):
563 path = path + b"/"
563 path = path + b"/"
564 return self.vfs.exists(path)
564 return self.vfs.exists(path)
565
565
566
566
567 class encodedstore(basicstore):
567 class encodedstore(basicstore):
568 def __init__(self, path, vfstype):
568 def __init__(self, path, vfstype):
569 vfs = vfstype(path + b'/store')
569 vfs = vfstype(path + b'/store')
570 self.path = vfs.base
570 self.path = vfs.base
571 self.createmode = _calcmode(vfs)
571 self.createmode = _calcmode(vfs)
572 vfs.createmode = self.createmode
572 vfs.createmode = self.createmode
573 self.rawvfs = vfs
573 self.rawvfs = vfs
574 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
574 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
575 self.opener = self.vfs
575 self.opener = self.vfs
576
576
577 # note: topfiles would also need a decode phase. It is just that in
577 # note: topfiles would also need a decode phase. It is just that in
578 # practice we do not have any file outside of `data/` that needs encoding.
578 # practice we do not have any file outside of `data/` that needs encoding.
579 # However that might change so we should probably add a test and encoding
579 # However that might change so we should probably add a test and encoding
580 # decoding for it too. see issue6548
580 # decoding for it too. see issue6548
581
581
582 def datafiles(self, matcher=None, undecodable=None):
582 def datafiles(self, matcher=None, undecodable=None):
583 for t, f1, size in super(encodedstore, self).datafiles():
583 for t, f1, size in super(encodedstore, self).datafiles():
584 try:
584 try:
585 f2 = decodefilename(f1)
585 f2 = decodefilename(f1)
586 except KeyError:
586 except KeyError:
587 if undecodable is None:
587 if undecodable is None:
588 msg = _(b'undecodable revlog name %s') % f1
588 msg = _(b'undecodable revlog name %s') % f1
589 raise error.StorageError(msg)
589 raise error.StorageError(msg)
590 else:
590 else:
591 undecodable.append(f1)
591 undecodable.append(f1)
592 continue
592 continue
593 if not _matchtrackedpath(f2, matcher):
593 if not _matchtrackedpath(f2, matcher):
594 continue
594 continue
595 yield t, f2, size
595 yield t, f2, size
596
596
597 def join(self, f):
597 def join(self, f):
598 return self.path + b'/' + encodefilename(f)
598 return self.path + b'/' + encodefilename(f)
599
599
600 def copylist(self):
600 def copylist(self):
601 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
601 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
602
602
603
603
604 class fncache(object):
604 class fncache(object):
605 # the filename used to be partially encoded
605 # the filename used to be partially encoded
606 # hence the encodedir/decodedir dance
606 # hence the encodedir/decodedir dance
607 def __init__(self, vfs):
607 def __init__(self, vfs):
608 self.vfs = vfs
608 self.vfs = vfs
609 self.entries = None
609 self.entries = None
610 self._dirty = False
610 self._dirty = False
611 # set of new additions to fncache
611 # set of new additions to fncache
612 self.addls = set()
612 self.addls = set()
613
613
614 def ensureloaded(self, warn=None):
614 def ensureloaded(self, warn=None):
615 """read the fncache file if not already read.
615 """read the fncache file if not already read.
616
616
617 If the file on disk is corrupted, raise. If warn is provided,
617 If the file on disk is corrupted, raise. If warn is provided,
618 warn and keep going instead."""
618 warn and keep going instead."""
619 if self.entries is None:
619 if self.entries is None:
620 self._load(warn)
620 self._load(warn)
621
621
622 def _load(self, warn=None):
622 def _load(self, warn=None):
623 '''fill the entries from the fncache file'''
623 '''fill the entries from the fncache file'''
624 self._dirty = False
624 self._dirty = False
625 try:
625 try:
626 fp = self.vfs(b'fncache', mode=b'rb')
626 fp = self.vfs(b'fncache', mode=b'rb')
627 except IOError:
627 except IOError:
628 # skip nonexistent file
628 # skip nonexistent file
629 self.entries = set()
629 self.entries = set()
630 return
630 return
631
631
632 self.entries = set()
632 self.entries = set()
633 chunk = b''
633 chunk = b''
634 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
634 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
635 chunk += c
635 chunk += c
636 try:
636 try:
637 p = chunk.rindex(b'\n')
637 p = chunk.rindex(b'\n')
638 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
638 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
639 chunk = chunk[p + 1 :]
639 chunk = chunk[p + 1 :]
640 except ValueError:
640 except ValueError:
641 # substring '\n' not found, maybe the entry is bigger than the
641 # substring '\n' not found, maybe the entry is bigger than the
642 # chunksize, so let's keep iterating
642 # chunksize, so let's keep iterating
643 pass
643 pass
644
644
645 if chunk:
645 if chunk:
646 msg = _(b"fncache does not ends with a newline")
646 msg = _(b"fncache does not ends with a newline")
647 if warn:
647 if warn:
648 warn(msg + b'\n')
648 warn(msg + b'\n')
649 else:
649 else:
650 raise error.Abort(
650 raise error.Abort(
651 msg,
651 msg,
652 hint=_(
652 hint=_(
653 b"use 'hg debugrebuildfncache' to "
653 b"use 'hg debugrebuildfncache' to "
654 b"rebuild the fncache"
654 b"rebuild the fncache"
655 ),
655 ),
656 )
656 )
657 self._checkentries(fp, warn)
657 self._checkentries(fp, warn)
658 fp.close()
658 fp.close()
659
659
660 def _checkentries(self, fp, warn):
660 def _checkentries(self, fp, warn):
661 """make sure there is no empty string in entries"""
661 """make sure there is no empty string in entries"""
662 if b'' in self.entries:
662 if b'' in self.entries:
663 fp.seek(0)
663 fp.seek(0)
664 for n, line in enumerate(util.iterfile(fp)):
664 for n, line in enumerate(fp):
665 if not line.rstrip(b'\n'):
665 if not line.rstrip(b'\n'):
666 t = _(b'invalid entry in fncache, line %d') % (n + 1)
666 t = _(b'invalid entry in fncache, line %d') % (n + 1)
667 if warn:
667 if warn:
668 warn(t + b'\n')
668 warn(t + b'\n')
669 else:
669 else:
670 raise error.Abort(t)
670 raise error.Abort(t)
671
671
672 def write(self, tr):
672 def write(self, tr):
673 if self._dirty:
673 if self._dirty:
674 assert self.entries is not None
674 assert self.entries is not None
675 self.entries = self.entries | self.addls
675 self.entries = self.entries | self.addls
676 self.addls = set()
676 self.addls = set()
677 tr.addbackup(b'fncache')
677 tr.addbackup(b'fncache')
678 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
678 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
679 if self.entries:
679 if self.entries:
680 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
680 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
681 fp.close()
681 fp.close()
682 self._dirty = False
682 self._dirty = False
683 if self.addls:
683 if self.addls:
684 # if we have just new entries, let's append them to the fncache
684 # if we have just new entries, let's append them to the fncache
685 tr.addbackup(b'fncache')
685 tr.addbackup(b'fncache')
686 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
686 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
687 if self.addls:
687 if self.addls:
688 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
688 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
689 fp.close()
689 fp.close()
690 self.entries = None
690 self.entries = None
691 self.addls = set()
691 self.addls = set()
692
692
693 def add(self, fn):
693 def add(self, fn):
694 if self.entries is None:
694 if self.entries is None:
695 self._load()
695 self._load()
696 if fn not in self.entries:
696 if fn not in self.entries:
697 self.addls.add(fn)
697 self.addls.add(fn)
698
698
699 def remove(self, fn):
699 def remove(self, fn):
700 if self.entries is None:
700 if self.entries is None:
701 self._load()
701 self._load()
702 if fn in self.addls:
702 if fn in self.addls:
703 self.addls.remove(fn)
703 self.addls.remove(fn)
704 return
704 return
705 try:
705 try:
706 self.entries.remove(fn)
706 self.entries.remove(fn)
707 self._dirty = True
707 self._dirty = True
708 except KeyError:
708 except KeyError:
709 pass
709 pass
710
710
711 def __contains__(self, fn):
711 def __contains__(self, fn):
712 if fn in self.addls:
712 if fn in self.addls:
713 return True
713 return True
714 if self.entries is None:
714 if self.entries is None:
715 self._load()
715 self._load()
716 return fn in self.entries
716 return fn in self.entries
717
717
718 def __iter__(self):
718 def __iter__(self):
719 if self.entries is None:
719 if self.entries is None:
720 self._load()
720 self._load()
721 return iter(self.entries | self.addls)
721 return iter(self.entries | self.addls)
722
722
723
723
724 class _fncachevfs(vfsmod.proxyvfs):
724 class _fncachevfs(vfsmod.proxyvfs):
725 def __init__(self, vfs, fnc, encode):
725 def __init__(self, vfs, fnc, encode):
726 vfsmod.proxyvfs.__init__(self, vfs)
726 vfsmod.proxyvfs.__init__(self, vfs)
727 self.fncache = fnc
727 self.fncache = fnc
728 self.encode = encode
728 self.encode = encode
729
729
730 def __call__(self, path, mode=b'r', *args, **kw):
730 def __call__(self, path, mode=b'r', *args, **kw):
731 encoded = self.encode(path)
731 encoded = self.encode(path)
732 if mode not in (b'r', b'rb') and (
732 if mode not in (b'r', b'rb') and (
733 path.startswith(b'data/') or path.startswith(b'meta/')
733 path.startswith(b'data/') or path.startswith(b'meta/')
734 ):
734 ):
735 # do not trigger a fncache load when adding a file that already is
735 # do not trigger a fncache load when adding a file that already is
736 # known to exist.
736 # known to exist.
737 notload = self.fncache.entries is None and self.vfs.exists(encoded)
737 notload = self.fncache.entries is None and self.vfs.exists(encoded)
738 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
738 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
739 # when appending to an existing file, if the file has size zero,
739 # when appending to an existing file, if the file has size zero,
740 # it should be considered as missing. Such zero-size files are
740 # it should be considered as missing. Such zero-size files are
741 # the result of truncation when a transaction is aborted.
741 # the result of truncation when a transaction is aborted.
742 notload = False
742 notload = False
743 if not notload:
743 if not notload:
744 self.fncache.add(path)
744 self.fncache.add(path)
745 return self.vfs(encoded, mode, *args, **kw)
745 return self.vfs(encoded, mode, *args, **kw)
746
746
747 def join(self, path):
747 def join(self, path):
748 if path:
748 if path:
749 return self.vfs.join(self.encode(path))
749 return self.vfs.join(self.encode(path))
750 else:
750 else:
751 return self.vfs.join(path)
751 return self.vfs.join(path)
752
752
753 def register_file(self, path):
753 def register_file(self, path):
754 """generic hook point to lets fncache steer its stew"""
754 """generic hook point to lets fncache steer its stew"""
755 if path.startswith(b'data/') or path.startswith(b'meta/'):
755 if path.startswith(b'data/') or path.startswith(b'meta/'):
756 self.fncache.add(path)
756 self.fncache.add(path)
757
757
758
758
759 class fncachestore(basicstore):
759 class fncachestore(basicstore):
760 def __init__(self, path, vfstype, dotencode):
760 def __init__(self, path, vfstype, dotencode):
761 if dotencode:
761 if dotencode:
762 encode = _pathencode
762 encode = _pathencode
763 else:
763 else:
764 encode = _plainhybridencode
764 encode = _plainhybridencode
765 self.encode = encode
765 self.encode = encode
766 vfs = vfstype(path + b'/store')
766 vfs = vfstype(path + b'/store')
767 self.path = vfs.base
767 self.path = vfs.base
768 self.pathsep = self.path + b'/'
768 self.pathsep = self.path + b'/'
769 self.createmode = _calcmode(vfs)
769 self.createmode = _calcmode(vfs)
770 vfs.createmode = self.createmode
770 vfs.createmode = self.createmode
771 self.rawvfs = vfs
771 self.rawvfs = vfs
772 fnc = fncache(vfs)
772 fnc = fncache(vfs)
773 self.fncache = fnc
773 self.fncache = fnc
774 self.vfs = _fncachevfs(vfs, fnc, encode)
774 self.vfs = _fncachevfs(vfs, fnc, encode)
775 self.opener = self.vfs
775 self.opener = self.vfs
776
776
777 def join(self, f):
777 def join(self, f):
778 return self.pathsep + self.encode(f)
778 return self.pathsep + self.encode(f)
779
779
780 def getsize(self, path):
780 def getsize(self, path):
781 return self.rawvfs.stat(path).st_size
781 return self.rawvfs.stat(path).st_size
782
782
783 def datafiles(self, matcher=None, undecodable=None):
783 def datafiles(self, matcher=None, undecodable=None):
784 for f in sorted(self.fncache):
784 for f in sorted(self.fncache):
785 if not _matchtrackedpath(f, matcher):
785 if not _matchtrackedpath(f, matcher):
786 continue
786 continue
787 ef = self.encode(f)
787 ef = self.encode(f)
788 try:
788 try:
789 t = revlog_type(f)
789 t = revlog_type(f)
790 assert t is not None, f
790 assert t is not None, f
791 t |= FILEFLAGS_FILELOG
791 t |= FILEFLAGS_FILELOG
792 yield t, f, self.getsize(ef)
792 yield t, f, self.getsize(ef)
793 except OSError as err:
793 except OSError as err:
794 if err.errno != errno.ENOENT:
794 if err.errno != errno.ENOENT:
795 raise
795 raise
796
796
797 def copylist(self):
797 def copylist(self):
798 d = (
798 d = (
799 b'bookmarks',
799 b'bookmarks',
800 b'narrowspec',
800 b'narrowspec',
801 b'data',
801 b'data',
802 b'meta',
802 b'meta',
803 b'dh',
803 b'dh',
804 b'fncache',
804 b'fncache',
805 b'phaseroots',
805 b'phaseroots',
806 b'obsstore',
806 b'obsstore',
807 b'00manifest.d',
807 b'00manifest.d',
808 b'00manifest.i',
808 b'00manifest.i',
809 b'00changelog.d',
809 b'00changelog.d',
810 b'00changelog.i',
810 b'00changelog.i',
811 b'requires',
811 b'requires',
812 )
812 )
813 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
813 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
814
814
815 def write(self, tr):
815 def write(self, tr):
816 self.fncache.write(tr)
816 self.fncache.write(tr)
817
817
818 def invalidatecaches(self):
818 def invalidatecaches(self):
819 self.fncache.entries = None
819 self.fncache.entries = None
820 self.fncache.addls = set()
820 self.fncache.addls = set()
821
821
822 def markremoved(self, fn):
822 def markremoved(self, fn):
823 self.fncache.remove(fn)
823 self.fncache.remove(fn)
824
824
825 def _exists(self, f):
825 def _exists(self, f):
826 ef = self.encode(f)
826 ef = self.encode(f)
827 try:
827 try:
828 self.getsize(ef)
828 self.getsize(ef)
829 return True
829 return True
830 except OSError as err:
830 except OSError as err:
831 if err.errno != errno.ENOENT:
831 if err.errno != errno.ENOENT:
832 raise
832 raise
833 # nonexistent entry
833 # nonexistent entry
834 return False
834 return False
835
835
836 def __contains__(self, path):
836 def __contains__(self, path):
837 '''Checks if the store contains path'''
837 '''Checks if the store contains path'''
838 path = b"/".join((b"data", path))
838 path = b"/".join((b"data", path))
839 # check for files (exact match)
839 # check for files (exact match)
840 e = path + b'.i'
840 e = path + b'.i'
841 if e in self.fncache and self._exists(e):
841 if e in self.fncache and self._exists(e):
842 return True
842 return True
843 # now check for directories (prefix match)
843 # now check for directories (prefix match)
844 if not path.endswith(b'/'):
844 if not path.endswith(b'/'):
845 path += b'/'
845 path += b'/'
846 for e in self.fncache:
846 for e in self.fncache:
847 if e.startswith(path) and self._exists(e):
847 if e.startswith(path) and self._exists(e):
848 return True
848 return True
849 return False
849 return False
General Comments 0
You need to be logged in to leave comments. Login now