##// END OF EJS Templates
py3: stop using util.iterfile()...
Gregory Szorc -
r49796:fd5b8e69 default
parent child Browse files
Show More
@@ -1,597 +1,597 b''
1 1 # common.py - common code for the convert extension
2 2 #
3 3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import base64
9 9 import datetime
10 10 import errno
11 11 import os
12 12 import pickle
13 13 import re
14 14 import shlex
15 15 import subprocess
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.pycompat import open
19 19 from mercurial import (
20 20 encoding,
21 21 error,
22 22 phases,
23 23 pycompat,
24 24 util,
25 25 )
26 26 from mercurial.utils import procutil
27 27
28 28 propertycache = util.propertycache
29 29
30 30
31 31 def _encodeornone(d):
32 32 if d is None:
33 33 return
34 34 return d.encode('latin1')
35 35
36 36
37 37 class _shlexpy3proxy(object):
38 38 def __init__(self, l):
39 39 self._l = l
40 40
41 41 def __iter__(self):
42 42 return (_encodeornone(v) for v in self._l)
43 43
44 44 def get_token(self):
45 45 return _encodeornone(self._l.get_token())
46 46
47 47 @property
48 48 def infile(self):
49 49 return self._l.infile or b'<unknown>'
50 50
51 51 @property
52 52 def lineno(self):
53 53 return self._l.lineno
54 54
55 55
56 56 def shlexer(data=None, filepath=None, wordchars=None, whitespace=None):
57 57 if data is None:
58 58 if pycompat.ispy3:
59 59 data = open(filepath, b'r', encoding='latin1')
60 60 else:
61 61 data = open(filepath, b'r')
62 62 else:
63 63 if filepath is not None:
64 64 raise error.ProgrammingError(
65 65 b'shlexer only accepts data or filepath, not both'
66 66 )
67 67 if pycompat.ispy3:
68 68 data = data.decode('latin1')
69 69 l = shlex.shlex(data, infile=filepath, posix=True)
70 70 if whitespace is not None:
71 71 l.whitespace_split = True
72 72 if pycompat.ispy3:
73 73 l.whitespace += whitespace.decode('latin1')
74 74 else:
75 75 l.whitespace += whitespace
76 76 if wordchars is not None:
77 77 if pycompat.ispy3:
78 78 l.wordchars += wordchars.decode('latin1')
79 79 else:
80 80 l.wordchars += wordchars
81 81 if pycompat.ispy3:
82 82 return _shlexpy3proxy(l)
83 83 return l
84 84
85 85
86 86 if pycompat.ispy3:
87 87 base64_encodebytes = base64.encodebytes
88 88 base64_decodebytes = base64.decodebytes
89 89 else:
90 90 base64_encodebytes = base64.encodestring
91 91 base64_decodebytes = base64.decodestring
92 92
93 93
94 94 def encodeargs(args):
95 95 def encodearg(s):
96 96 lines = base64_encodebytes(s)
97 97 lines = [l.splitlines()[0] for l in pycompat.iterbytestr(lines)]
98 98 return b''.join(lines)
99 99
100 100 s = pickle.dumps(args)
101 101 return encodearg(s)
102 102
103 103
104 104 def decodeargs(s):
105 105 s = base64_decodebytes(s)
106 106 return pickle.loads(s)
107 107
108 108
109 109 class MissingTool(Exception):
110 110 pass
111 111
112 112
113 113 def checktool(exe, name=None, abort=True):
114 114 name = name or exe
115 115 if not procutil.findexe(exe):
116 116 if abort:
117 117 exc = error.Abort
118 118 else:
119 119 exc = MissingTool
120 120 raise exc(_(b'cannot find required "%s" tool') % name)
121 121
122 122
123 123 class NoRepo(Exception):
124 124 pass
125 125
126 126
127 127 SKIPREV = b'SKIP'
128 128
129 129
130 130 class commit(object):
131 131 def __init__(
132 132 self,
133 133 author,
134 134 date,
135 135 desc,
136 136 parents,
137 137 branch=None,
138 138 rev=None,
139 139 extra=None,
140 140 sortkey=None,
141 141 saverev=True,
142 142 phase=phases.draft,
143 143 optparents=None,
144 144 ctx=None,
145 145 ):
146 146 self.author = author or b'unknown'
147 147 self.date = date or b'0 0'
148 148 self.desc = desc
149 149 self.parents = parents # will be converted and used as parents
150 150 self.optparents = optparents or [] # will be used if already converted
151 151 self.branch = branch
152 152 self.rev = rev
153 153 self.extra = extra or {}
154 154 self.sortkey = sortkey
155 155 self.saverev = saverev
156 156 self.phase = phase
157 157 self.ctx = ctx # for hg to hg conversions
158 158
159 159
160 160 class converter_source(object):
161 161 """Conversion source interface"""
162 162
163 163 def __init__(self, ui, repotype, path=None, revs=None):
164 164 """Initialize conversion source (or raise NoRepo("message")
165 165 exception if path is not a valid repository)"""
166 166 self.ui = ui
167 167 self.path = path
168 168 self.revs = revs
169 169 self.repotype = repotype
170 170
171 171 self.encoding = b'utf-8'
172 172
173 173 def checkhexformat(self, revstr, mapname=b'splicemap'):
174 174 """fails if revstr is not a 40 byte hex. mercurial and git both uses
175 175 such format for their revision numbering
176 176 """
177 177 if not re.match(br'[0-9a-fA-F]{40,40}$', revstr):
178 178 raise error.Abort(
179 179 _(b'%s entry %s is not a valid revision identifier')
180 180 % (mapname, revstr)
181 181 )
182 182
183 183 def before(self):
184 184 pass
185 185
186 186 def after(self):
187 187 pass
188 188
189 189 def targetfilebelongstosource(self, targetfilename):
190 190 """Returns true if the given targetfile belongs to the source repo. This
191 191 is useful when only a subdirectory of the target belongs to the source
192 192 repo."""
193 193 # For normal full repo converts, this is always True.
194 194 return True
195 195
196 196 def setrevmap(self, revmap):
197 197 """set the map of already-converted revisions"""
198 198
199 199 def getheads(self):
200 200 """Return a list of this repository's heads"""
201 201 raise NotImplementedError
202 202
203 203 def getfile(self, name, rev):
204 204 """Return a pair (data, mode) where data is the file content
205 205 as a string and mode one of '', 'x' or 'l'. rev is the
206 206 identifier returned by a previous call to getchanges().
207 207 Data is None if file is missing/deleted in rev.
208 208 """
209 209 raise NotImplementedError
210 210
211 211 def getchanges(self, version, full):
212 212 """Returns a tuple of (files, copies, cleanp2).
213 213
214 214 files is a sorted list of (filename, id) tuples for all files
215 215 changed between version and its first parent returned by
216 216 getcommit(). If full, all files in that revision is returned.
217 217 id is the source revision id of the file.
218 218
219 219 copies is a dictionary of dest: source
220 220
221 221 cleanp2 is the set of files filenames that are clean against p2.
222 222 (Files that are clean against p1 are already not in files (unless
223 223 full). This makes it possible to handle p2 clean files similarly.)
224 224 """
225 225 raise NotImplementedError
226 226
227 227 def getcommit(self, version):
228 228 """Return the commit object for version"""
229 229 raise NotImplementedError
230 230
231 231 def numcommits(self):
232 232 """Return the number of commits in this source.
233 233
234 234 If unknown, return None.
235 235 """
236 236 return None
237 237
238 238 def gettags(self):
239 239 """Return the tags as a dictionary of name: revision
240 240
241 241 Tag names must be UTF-8 strings.
242 242 """
243 243 raise NotImplementedError
244 244
245 245 def recode(self, s, encoding=None):
246 246 if not encoding:
247 247 encoding = self.encoding or b'utf-8'
248 248
249 249 if isinstance(s, str):
250 250 return s.encode("utf-8")
251 251 try:
252 252 return s.decode(pycompat.sysstr(encoding)).encode("utf-8")
253 253 except UnicodeError:
254 254 try:
255 255 return s.decode("latin-1").encode("utf-8")
256 256 except UnicodeError:
257 257 return s.decode(pycompat.sysstr(encoding), "replace").encode(
258 258 "utf-8"
259 259 )
260 260
261 261 def getchangedfiles(self, rev, i):
262 262 """Return the files changed by rev compared to parent[i].
263 263
264 264 i is an index selecting one of the parents of rev. The return
265 265 value should be the list of files that are different in rev and
266 266 this parent.
267 267
268 268 If rev has no parents, i is None.
269 269
270 270 This function is only needed to support --filemap
271 271 """
272 272 raise NotImplementedError
273 273
274 274 def converted(self, rev, sinkrev):
275 275 '''Notify the source that a revision has been converted.'''
276 276
277 277 def hasnativeorder(self):
278 278 """Return true if this source has a meaningful, native revision
279 279 order. For instance, Mercurial revisions are store sequentially
280 280 while there is no such global ordering with Darcs.
281 281 """
282 282 return False
283 283
284 284 def hasnativeclose(self):
285 285 """Return true if this source has ability to close branch."""
286 286 return False
287 287
288 288 def lookuprev(self, rev):
289 289 """If rev is a meaningful revision reference in source, return
290 290 the referenced identifier in the same format used by getcommit().
291 291 return None otherwise.
292 292 """
293 293 return None
294 294
295 295 def getbookmarks(self):
296 296 """Return the bookmarks as a dictionary of name: revision
297 297
298 298 Bookmark names are to be UTF-8 strings.
299 299 """
300 300 return {}
301 301
302 302 def checkrevformat(self, revstr, mapname=b'splicemap'):
303 303 """revstr is a string that describes a revision in the given
304 304 source control system. Return true if revstr has correct
305 305 format.
306 306 """
307 307 return True
308 308
309 309
310 310 class converter_sink(object):
311 311 """Conversion sink (target) interface"""
312 312
313 313 def __init__(self, ui, repotype, path):
314 314 """Initialize conversion sink (or raise NoRepo("message")
315 315 exception if path is not a valid repository)
316 316
317 317 created is a list of paths to remove if a fatal error occurs
318 318 later"""
319 319 self.ui = ui
320 320 self.path = path
321 321 self.created = []
322 322 self.repotype = repotype
323 323
324 324 def revmapfile(self):
325 325 """Path to a file that will contain lines
326 326 source_rev_id sink_rev_id
327 327 mapping equivalent revision identifiers for each system."""
328 328 raise NotImplementedError
329 329
330 330 def authorfile(self):
331 331 """Path to a file that will contain lines
332 332 srcauthor=dstauthor
333 333 mapping equivalent authors identifiers for each system."""
334 334 return None
335 335
336 336 def putcommit(
337 337 self, files, copies, parents, commit, source, revmap, full, cleanp2
338 338 ):
339 339 """Create a revision with all changed files listed in 'files'
340 340 and having listed parents. 'commit' is a commit object
341 341 containing at a minimum the author, date, and message for this
342 342 changeset. 'files' is a list of (path, version) tuples,
343 343 'copies' is a dictionary mapping destinations to sources,
344 344 'source' is the source repository, and 'revmap' is a mapfile
345 345 of source revisions to converted revisions. Only getfile() and
346 346 lookuprev() should be called on 'source'. 'full' means that 'files'
347 347 is complete and all other files should be removed.
348 348 'cleanp2' is a set of the filenames that are unchanged from p2
349 349 (only in the common merge case where there two parents).
350 350
351 351 Note that the sink repository is not told to update itself to
352 352 a particular revision (or even what that revision would be)
353 353 before it receives the file data.
354 354 """
355 355 raise NotImplementedError
356 356
357 357 def puttags(self, tags):
358 358 """Put tags into sink.
359 359
360 360 tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string.
361 361 Return a pair (tag_revision, tag_parent_revision), or (None, None)
362 362 if nothing was changed.
363 363 """
364 364 raise NotImplementedError
365 365
366 366 def setbranch(self, branch, pbranches):
367 367 """Set the current branch name. Called before the first putcommit
368 368 on the branch.
369 369 branch: branch name for subsequent commits
370 370 pbranches: (converted parent revision, parent branch) tuples"""
371 371
372 372 def setfilemapmode(self, active):
373 373 """Tell the destination that we're using a filemap
374 374
375 375 Some converter_sources (svn in particular) can claim that a file
376 376 was changed in a revision, even if there was no change. This method
377 377 tells the destination that we're using a filemap and that it should
378 378 filter empty revisions.
379 379 """
380 380
381 381 def before(self):
382 382 pass
383 383
384 384 def after(self):
385 385 pass
386 386
387 387 def putbookmarks(self, bookmarks):
388 388 """Put bookmarks into sink.
389 389
390 390 bookmarks: {bookmarkname: sink_rev_id, ...}
391 391 where bookmarkname is an UTF-8 string.
392 392 """
393 393
394 394 def hascommitfrommap(self, rev):
395 395 """Return False if a rev mentioned in a filemap is known to not be
396 396 present."""
397 397 raise NotImplementedError
398 398
399 399 def hascommitforsplicemap(self, rev):
400 400 """This method is for the special needs for splicemap handling and not
401 401 for general use. Returns True if the sink contains rev, aborts on some
402 402 special cases."""
403 403 raise NotImplementedError
404 404
405 405
406 406 class commandline(object):
407 407 def __init__(self, ui, command):
408 408 self.ui = ui
409 409 self.command = command
410 410
411 411 def prerun(self):
412 412 pass
413 413
414 414 def postrun(self):
415 415 pass
416 416
417 417 def _cmdline(self, cmd, *args, **kwargs):
418 418 kwargs = pycompat.byteskwargs(kwargs)
419 419 cmdline = [self.command, cmd] + list(args)
420 420 for k, v in kwargs.items():
421 421 if len(k) == 1:
422 422 cmdline.append(b'-' + k)
423 423 else:
424 424 cmdline.append(b'--' + k.replace(b'_', b'-'))
425 425 try:
426 426 if len(k) == 1:
427 427 cmdline.append(b'' + v)
428 428 else:
429 429 cmdline[-1] += b'=' + v
430 430 except TypeError:
431 431 pass
432 432 cmdline = [procutil.shellquote(arg) for arg in cmdline]
433 433 if not self.ui.debugflag:
434 434 cmdline += [b'2>', pycompat.bytestr(os.devnull)]
435 435 cmdline = b' '.join(cmdline)
436 436 return cmdline
437 437
438 438 def _run(self, cmd, *args, **kwargs):
439 439 def popen(cmdline):
440 440 p = subprocess.Popen(
441 441 procutil.tonativestr(cmdline),
442 442 shell=True,
443 443 bufsize=-1,
444 444 close_fds=procutil.closefds,
445 445 stdout=subprocess.PIPE,
446 446 )
447 447 return p
448 448
449 449 return self._dorun(popen, cmd, *args, **kwargs)
450 450
451 451 def _run2(self, cmd, *args, **kwargs):
452 452 return self._dorun(procutil.popen2, cmd, *args, **kwargs)
453 453
454 454 def _run3(self, cmd, *args, **kwargs):
455 455 return self._dorun(procutil.popen3, cmd, *args, **kwargs)
456 456
457 457 def _dorun(self, openfunc, cmd, *args, **kwargs):
458 458 cmdline = self._cmdline(cmd, *args, **kwargs)
459 459 self.ui.debug(b'running: %s\n' % (cmdline,))
460 460 self.prerun()
461 461 try:
462 462 return openfunc(cmdline)
463 463 finally:
464 464 self.postrun()
465 465
466 466 def run(self, cmd, *args, **kwargs):
467 467 p = self._run(cmd, *args, **kwargs)
468 468 output = p.communicate()[0]
469 469 self.ui.debug(output)
470 470 return output, p.returncode
471 471
472 472 def runlines(self, cmd, *args, **kwargs):
473 473 p = self._run(cmd, *args, **kwargs)
474 474 output = p.stdout.readlines()
475 475 p.wait()
476 476 self.ui.debug(b''.join(output))
477 477 return output, p.returncode
478 478
479 479 def checkexit(self, status, output=b''):
480 480 if status:
481 481 if output:
482 482 self.ui.warn(_(b'%s error:\n') % self.command)
483 483 self.ui.warn(output)
484 484 msg = procutil.explainexit(status)
485 485 raise error.Abort(b'%s %s' % (self.command, msg))
486 486
487 487 def run0(self, cmd, *args, **kwargs):
488 488 output, status = self.run(cmd, *args, **kwargs)
489 489 self.checkexit(status, output)
490 490 return output
491 491
492 492 def runlines0(self, cmd, *args, **kwargs):
493 493 output, status = self.runlines(cmd, *args, **kwargs)
494 494 self.checkexit(status, b''.join(output))
495 495 return output
496 496
497 497 @propertycache
498 498 def argmax(self):
499 499 # POSIX requires at least 4096 bytes for ARG_MAX
500 500 argmax = 4096
501 501 try:
502 502 argmax = os.sysconf("SC_ARG_MAX")
503 503 except (AttributeError, ValueError):
504 504 pass
505 505
506 506 # Windows shells impose their own limits on command line length,
507 507 # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
508 508 # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
509 509 # details about cmd.exe limitations.
510 510
511 511 # Since ARG_MAX is for command line _and_ environment, lower our limit
512 512 # (and make happy Windows shells while doing this).
513 513 return argmax // 2 - 1
514 514
515 515 def _limit_arglist(self, arglist, cmd, *args, **kwargs):
516 516 cmdlen = len(self._cmdline(cmd, *args, **kwargs))
517 517 limit = self.argmax - cmdlen
518 518 numbytes = 0
519 519 fl = []
520 520 for fn in arglist:
521 521 b = len(fn) + 3
522 522 if numbytes + b < limit or len(fl) == 0:
523 523 fl.append(fn)
524 524 numbytes += b
525 525 else:
526 526 yield fl
527 527 fl = [fn]
528 528 numbytes = b
529 529 if fl:
530 530 yield fl
531 531
532 532 def xargs(self, arglist, cmd, *args, **kwargs):
533 533 for l in self._limit_arglist(arglist, cmd, *args, **kwargs):
534 534 self.run0(cmd, *(list(args) + l), **kwargs)
535 535
536 536
537 537 class mapfile(dict):
538 538 def __init__(self, ui, path):
539 539 super(mapfile, self).__init__()
540 540 self.ui = ui
541 541 self.path = path
542 542 self.fp = None
543 543 self.order = []
544 544 self._read()
545 545
546 546 def _read(self):
547 547 if not self.path:
548 548 return
549 549 try:
550 550 fp = open(self.path, b'rb')
551 551 except IOError as err:
552 552 if err.errno != errno.ENOENT:
553 553 raise
554 554 return
555 for i, line in enumerate(util.iterfile(fp)):
555 for i, line in enumerate(fp):
556 556 line = line.splitlines()[0].rstrip()
557 557 if not line:
558 558 # Ignore blank lines
559 559 continue
560 560 try:
561 561 key, value = line.rsplit(b' ', 1)
562 562 except ValueError:
563 563 raise error.Abort(
564 564 _(b'syntax error in %s(%d): key/value pair expected')
565 565 % (self.path, i + 1)
566 566 )
567 567 if key not in self:
568 568 self.order.append(key)
569 569 super(mapfile, self).__setitem__(key, value)
570 570 fp.close()
571 571
572 572 def __setitem__(self, key, value):
573 573 if self.fp is None:
574 574 try:
575 575 self.fp = open(self.path, b'ab')
576 576 except IOError as err:
577 577 raise error.Abort(
578 578 _(b'could not open map file %r: %s')
579 579 % (self.path, encoding.strtolocal(err.strerror))
580 580 )
581 581 self.fp.write(util.tonativeeol(b'%s %s\n' % (key, value)))
582 582 self.fp.flush()
583 583 super(mapfile, self).__setitem__(key, value)
584 584
585 585 def close(self):
586 586 if self.fp:
587 587 self.fp.close()
588 588 self.fp = None
589 589
590 590
591 591 def makedatetimestamp(t):
592 592 """Like dateutil.makedate() but for time t instead of current time"""
593 593 delta = datetime.datetime.utcfromtimestamp(
594 594 t
595 595 ) - datetime.datetime.fromtimestamp(t)
596 596 tz = delta.days * 86400 + delta.seconds
597 597 return t, tz
@@ -1,667 +1,667 b''
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import os
10 10 import shutil
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial.pycompat import open
14 14 from mercurial import (
15 15 encoding,
16 16 error,
17 17 hg,
18 18 pycompat,
19 19 scmutil,
20 20 util,
21 21 )
22 22 from mercurial.utils import dateutil
23 23
24 24 from . import (
25 25 bzr,
26 26 common,
27 27 cvs,
28 28 darcs,
29 29 filemap,
30 30 git,
31 31 gnuarch,
32 32 hg as hgconvert,
33 33 monotone,
34 34 p4,
35 35 subversion,
36 36 )
37 37
38 38 mapfile = common.mapfile
39 39 MissingTool = common.MissingTool
40 40 NoRepo = common.NoRepo
41 41 SKIPREV = common.SKIPREV
42 42
43 43 bzr_source = bzr.bzr_source
44 44 convert_cvs = cvs.convert_cvs
45 45 convert_git = git.convert_git
46 46 darcs_source = darcs.darcs_source
47 47 gnuarch_source = gnuarch.gnuarch_source
48 48 mercurial_sink = hgconvert.mercurial_sink
49 49 mercurial_source = hgconvert.mercurial_source
50 50 monotone_source = monotone.monotone_source
51 51 p4_source = p4.p4_source
52 52 svn_sink = subversion.svn_sink
53 53 svn_source = subversion.svn_source
54 54
55 55 orig_encoding = b'ascii'
56 56
57 57
58 58 def readauthormap(ui, authorfile, authors=None):
59 59 if authors is None:
60 60 authors = {}
61 61 with open(authorfile, b'rb') as afile:
62 62 for line in afile:
63 63
64 64 line = line.strip()
65 65 if not line or line.startswith(b'#'):
66 66 continue
67 67
68 68 try:
69 69 srcauthor, dstauthor = line.split(b'=', 1)
70 70 except ValueError:
71 71 msg = _(b'ignoring bad line in author map file %s: %s\n')
72 72 ui.warn(msg % (authorfile, line.rstrip()))
73 73 continue
74 74
75 75 srcauthor = srcauthor.strip()
76 76 dstauthor = dstauthor.strip()
77 77 if authors.get(srcauthor) in (None, dstauthor):
78 78 msg = _(b'mapping author %s to %s\n')
79 79 ui.debug(msg % (srcauthor, dstauthor))
80 80 authors[srcauthor] = dstauthor
81 81 continue
82 82
83 83 m = _(b'overriding mapping for author %s, was %s, will be %s\n')
84 84 ui.status(m % (srcauthor, authors[srcauthor], dstauthor))
85 85 return authors
86 86
87 87
88 88 def recode(s):
89 89 if isinstance(s, str):
90 90 return s.encode(pycompat.sysstr(orig_encoding), 'replace')
91 91 else:
92 92 return s.decode('utf-8').encode(
93 93 pycompat.sysstr(orig_encoding), 'replace'
94 94 )
95 95
96 96
97 97 def mapbranch(branch, branchmap):
98 98 """
99 99 >>> bmap = {b'default': b'branch1'}
100 100 >>> for i in [b'', None]:
101 101 ... mapbranch(i, bmap)
102 102 'branch1'
103 103 'branch1'
104 104 >>> bmap = {b'None': b'branch2'}
105 105 >>> for i in [b'', None]:
106 106 ... mapbranch(i, bmap)
107 107 'branch2'
108 108 'branch2'
109 109 >>> bmap = {b'None': b'branch3', b'default': b'branch4'}
110 110 >>> for i in [b'None', b'', None, b'default', b'branch5']:
111 111 ... mapbranch(i, bmap)
112 112 'branch3'
113 113 'branch4'
114 114 'branch4'
115 115 'branch4'
116 116 'branch5'
117 117 """
118 118 # If branch is None or empty, this commit is coming from the source
119 119 # repository's default branch and destined for the default branch in the
120 120 # destination repository. For such commits, using a literal "default"
121 121 # in branchmap below allows the user to map "default" to an alternate
122 122 # default branch in the destination repository.
123 123 branch = branchmap.get(branch or b'default', branch)
124 124 # At some point we used "None" literal to denote the default branch,
125 125 # attempt to use that for backward compatibility.
126 126 if not branch:
127 127 branch = branchmap.get(b'None', branch)
128 128 return branch
129 129
130 130
131 131 source_converters = [
132 132 (b'cvs', convert_cvs, b'branchsort'),
133 133 (b'git', convert_git, b'branchsort'),
134 134 (b'svn', svn_source, b'branchsort'),
135 135 (b'hg', mercurial_source, b'sourcesort'),
136 136 (b'darcs', darcs_source, b'branchsort'),
137 137 (b'mtn', monotone_source, b'branchsort'),
138 138 (b'gnuarch', gnuarch_source, b'branchsort'),
139 139 (b'bzr', bzr_source, b'branchsort'),
140 140 (b'p4', p4_source, b'branchsort'),
141 141 ]
142 142
143 143 sink_converters = [
144 144 (b'hg', mercurial_sink),
145 145 (b'svn', svn_sink),
146 146 ]
147 147
148 148
149 149 def convertsource(ui, path, type, revs):
150 150 exceptions = []
151 151 if type and type not in [s[0] for s in source_converters]:
152 152 raise error.Abort(_(b'%s: invalid source repository type') % type)
153 153 for name, source, sortmode in source_converters:
154 154 try:
155 155 if not type or name == type:
156 156 return source(ui, name, path, revs), sortmode
157 157 except (NoRepo, MissingTool) as inst:
158 158 exceptions.append(inst)
159 159 if not ui.quiet:
160 160 for inst in exceptions:
161 161 ui.write(b"%s\n" % pycompat.bytestr(inst.args[0]))
162 162 raise error.Abort(_(b'%s: missing or unsupported repository') % path)
163 163
164 164
165 165 def convertsink(ui, path, type):
166 166 if type and type not in [s[0] for s in sink_converters]:
167 167 raise error.Abort(_(b'%s: invalid destination repository type') % type)
168 168 for name, sink in sink_converters:
169 169 try:
170 170 if not type or name == type:
171 171 return sink(ui, name, path)
172 172 except NoRepo as inst:
173 173 ui.note(_(b"convert: %s\n") % inst)
174 174 except MissingTool as inst:
175 175 raise error.Abort(b'%s\n' % inst)
176 176 raise error.Abort(_(b'%s: unknown repository type') % path)
177 177
178 178
179 179 class progresssource(object):
180 180 def __init__(self, ui, source, filecount):
181 181 self.ui = ui
182 182 self.source = source
183 183 self.progress = ui.makeprogress(
184 184 _(b'getting files'), unit=_(b'files'), total=filecount
185 185 )
186 186
187 187 def getfile(self, file, rev):
188 188 self.progress.increment(item=file)
189 189 return self.source.getfile(file, rev)
190 190
191 191 def targetfilebelongstosource(self, targetfilename):
192 192 return self.source.targetfilebelongstosource(targetfilename)
193 193
194 194 def lookuprev(self, rev):
195 195 return self.source.lookuprev(rev)
196 196
197 197 def close(self):
198 198 self.progress.complete()
199 199
200 200
201 201 class converter(object):
202 202 def __init__(self, ui, source, dest, revmapfile, opts):
203 203
204 204 self.source = source
205 205 self.dest = dest
206 206 self.ui = ui
207 207 self.opts = opts
208 208 self.commitcache = {}
209 209 self.authors = {}
210 210 self.authorfile = None
211 211
212 212 # Record converted revisions persistently: maps source revision
213 213 # ID to target revision ID (both strings). (This is how
214 214 # incremental conversions work.)
215 215 self.map = mapfile(ui, revmapfile)
216 216
217 217 # Read first the dst author map if any
218 218 authorfile = self.dest.authorfile()
219 219 if authorfile and os.path.exists(authorfile):
220 220 self.readauthormap(authorfile)
221 221 # Extend/Override with new author map if necessary
222 222 if opts.get(b'authormap'):
223 223 self.readauthormap(opts.get(b'authormap'))
224 224 self.authorfile = self.dest.authorfile()
225 225
226 226 self.splicemap = self.parsesplicemap(opts.get(b'splicemap'))
227 227 self.branchmap = mapfile(ui, opts.get(b'branchmap'))
228 228
229 229 def parsesplicemap(self, path):
230 230 """check and validate the splicemap format and
231 231 return a child/parents dictionary.
232 232 Format checking has two parts.
233 233 1. generic format which is same across all source types
234 234 2. specific format checking which may be different for
235 235 different source type. This logic is implemented in
236 236 checkrevformat function in source files like
237 237 hg.py, subversion.py etc.
238 238 """
239 239
240 240 if not path:
241 241 return {}
242 242 m = {}
243 243 try:
244 244 fp = open(path, b'rb')
245 for i, line in enumerate(util.iterfile(fp)):
245 for i, line in enumerate(fp):
246 246 line = line.splitlines()[0].rstrip()
247 247 if not line:
248 248 # Ignore blank lines
249 249 continue
250 250 # split line
251 251 lex = common.shlexer(data=line, whitespace=b',')
252 252 line = list(lex)
253 253 # check number of parents
254 254 if not (2 <= len(line) <= 3):
255 255 raise error.Abort(
256 256 _(
257 257 b'syntax error in %s(%d): child parent1'
258 258 b'[,parent2] expected'
259 259 )
260 260 % (path, i + 1)
261 261 )
262 262 for part in line:
263 263 self.source.checkrevformat(part)
264 264 child, p1, p2 = line[0], line[1:2], line[2:]
265 265 if p1 == p2:
266 266 m[child] = p1
267 267 else:
268 268 m[child] = p1 + p2
269 269 # if file does not exist or error reading, exit
270 270 except IOError:
271 271 raise error.Abort(
272 272 _(b'splicemap file not found or error reading %s:') % path
273 273 )
274 274 return m
275 275
276 276 def walktree(self, heads):
277 277 """Return a mapping that identifies the uncommitted parents of every
278 278 uncommitted changeset."""
279 279 visit = list(heads)
280 280 known = set()
281 281 parents = {}
282 282 numcommits = self.source.numcommits()
283 283 progress = self.ui.makeprogress(
284 284 _(b'scanning'), unit=_(b'revisions'), total=numcommits
285 285 )
286 286 while visit:
287 287 n = visit.pop(0)
288 288 if n in known:
289 289 continue
290 290 if n in self.map:
291 291 m = self.map[n]
292 292 if m == SKIPREV or self.dest.hascommitfrommap(m):
293 293 continue
294 294 known.add(n)
295 295 progress.update(len(known))
296 296 commit = self.cachecommit(n)
297 297 parents[n] = []
298 298 for p in commit.parents:
299 299 parents[n].append(p)
300 300 visit.append(p)
301 301 progress.complete()
302 302
303 303 return parents
304 304
305 305 def mergesplicemap(self, parents, splicemap):
306 306 """A splicemap redefines child/parent relationships. Check the
307 307 map contains valid revision identifiers and merge the new
308 308 links in the source graph.
309 309 """
310 310 for c in sorted(splicemap):
311 311 if c not in parents:
312 312 if not self.dest.hascommitforsplicemap(self.map.get(c, c)):
313 313 # Could be in source but not converted during this run
314 314 self.ui.warn(
315 315 _(
316 316 b'splice map revision %s is not being '
317 317 b'converted, ignoring\n'
318 318 )
319 319 % c
320 320 )
321 321 continue
322 322 pc = []
323 323 for p in splicemap[c]:
324 324 # We do not have to wait for nodes already in dest.
325 325 if self.dest.hascommitforsplicemap(self.map.get(p, p)):
326 326 continue
327 327 # Parent is not in dest and not being converted, not good
328 328 if p not in parents:
329 329 raise error.Abort(_(b'unknown splice map parent: %s') % p)
330 330 pc.append(p)
331 331 parents[c] = pc
332 332
333 333 def toposort(self, parents, sortmode):
334 334 """Return an ordering such that every uncommitted changeset is
335 335 preceded by all its uncommitted ancestors."""
336 336
337 337 def mapchildren(parents):
338 338 """Return a (children, roots) tuple where 'children' maps parent
339 339 revision identifiers to children ones, and 'roots' is the list of
340 340 revisions without parents. 'parents' must be a mapping of revision
341 341 identifier to its parents ones.
342 342 """
343 343 visit = collections.deque(sorted(parents))
344 344 seen = set()
345 345 children = {}
346 346 roots = []
347 347
348 348 while visit:
349 349 n = visit.popleft()
350 350 if n in seen:
351 351 continue
352 352 seen.add(n)
353 353 # Ensure that nodes without parents are present in the
354 354 # 'children' mapping.
355 355 children.setdefault(n, [])
356 356 hasparent = False
357 357 for p in parents[n]:
358 358 if p not in self.map:
359 359 visit.append(p)
360 360 hasparent = True
361 361 children.setdefault(p, []).append(n)
362 362 if not hasparent:
363 363 roots.append(n)
364 364
365 365 return children, roots
366 366
367 367 # Sort functions are supposed to take a list of revisions which
368 368 # can be converted immediately and pick one
369 369
370 370 def makebranchsorter():
371 371 """If the previously converted revision has a child in the
372 372 eligible revisions list, pick it. Return the list head
373 373 otherwise. Branch sort attempts to minimize branch
374 374 switching, which is harmful for Mercurial backend
375 375 compression.
376 376 """
377 377 prev = [None]
378 378
379 379 def picknext(nodes):
380 380 next = nodes[0]
381 381 for n in nodes:
382 382 if prev[0] in parents[n]:
383 383 next = n
384 384 break
385 385 prev[0] = next
386 386 return next
387 387
388 388 return picknext
389 389
390 390 def makesourcesorter():
391 391 """Source specific sort."""
392 392 keyfn = lambda n: self.commitcache[n].sortkey
393 393
394 394 def picknext(nodes):
395 395 return sorted(nodes, key=keyfn)[0]
396 396
397 397 return picknext
398 398
399 399 def makeclosesorter():
400 400 """Close order sort."""
401 401 keyfn = lambda n: (
402 402 b'close' not in self.commitcache[n].extra,
403 403 self.commitcache[n].sortkey,
404 404 )
405 405
406 406 def picknext(nodes):
407 407 return sorted(nodes, key=keyfn)[0]
408 408
409 409 return picknext
410 410
411 411 def makedatesorter():
412 412 """Sort revisions by date."""
413 413 dates = {}
414 414
415 415 def getdate(n):
416 416 if n not in dates:
417 417 dates[n] = dateutil.parsedate(self.commitcache[n].date)
418 418 return dates[n]
419 419
420 420 def picknext(nodes):
421 421 return min([(getdate(n), n) for n in nodes])[1]
422 422
423 423 return picknext
424 424
425 425 if sortmode == b'branchsort':
426 426 picknext = makebranchsorter()
427 427 elif sortmode == b'datesort':
428 428 picknext = makedatesorter()
429 429 elif sortmode == b'sourcesort':
430 430 picknext = makesourcesorter()
431 431 elif sortmode == b'closesort':
432 432 picknext = makeclosesorter()
433 433 else:
434 434 raise error.Abort(_(b'unknown sort mode: %s') % sortmode)
435 435
436 436 children, actives = mapchildren(parents)
437 437
438 438 s = []
439 439 pendings = {}
440 440 while actives:
441 441 n = picknext(actives)
442 442 actives.remove(n)
443 443 s.append(n)
444 444
445 445 # Update dependents list
446 446 for c in children.get(n, []):
447 447 if c not in pendings:
448 448 pendings[c] = [p for p in parents[c] if p not in self.map]
449 449 try:
450 450 pendings[c].remove(n)
451 451 except ValueError:
452 452 raise error.Abort(
453 453 _(b'cycle detected between %s and %s')
454 454 % (recode(c), recode(n))
455 455 )
456 456 if not pendings[c]:
457 457 # Parents are converted, node is eligible
458 458 actives.insert(0, c)
459 459 pendings[c] = None
460 460
461 461 if len(s) != len(parents):
462 462 raise error.Abort(_(b"not all revisions were sorted"))
463 463
464 464 return s
465 465
466 466 def writeauthormap(self):
467 467 authorfile = self.authorfile
468 468 if authorfile:
469 469 self.ui.status(_(b'writing author map file %s\n') % authorfile)
470 470 ofile = open(authorfile, b'wb+')
471 471 for author in self.authors:
472 472 ofile.write(
473 473 util.tonativeeol(
474 474 b"%s=%s\n" % (author, self.authors[author])
475 475 )
476 476 )
477 477 ofile.close()
478 478
479 479 def readauthormap(self, authorfile):
480 480 self.authors = readauthormap(self.ui, authorfile, self.authors)
481 481
482 482 def cachecommit(self, rev):
483 483 commit = self.source.getcommit(rev)
484 484 commit.author = self.authors.get(commit.author, commit.author)
485 485 commit.branch = mapbranch(commit.branch, self.branchmap)
486 486 self.commitcache[rev] = commit
487 487 return commit
488 488
489 489 def copy(self, rev):
490 490 commit = self.commitcache[rev]
491 491 full = self.opts.get(b'full')
492 492 changes = self.source.getchanges(rev, full)
493 493 if isinstance(changes, bytes):
494 494 if changes == SKIPREV:
495 495 dest = SKIPREV
496 496 else:
497 497 dest = self.map[changes]
498 498 self.map[rev] = dest
499 499 return
500 500 files, copies, cleanp2 = changes
501 501 pbranches = []
502 502 if commit.parents:
503 503 for prev in commit.parents:
504 504 if prev not in self.commitcache:
505 505 self.cachecommit(prev)
506 506 pbranches.append(
507 507 (self.map[prev], self.commitcache[prev].branch)
508 508 )
509 509 self.dest.setbranch(commit.branch, pbranches)
510 510 try:
511 511 parents = self.splicemap[rev]
512 512 self.ui.status(
513 513 _(b'spliced in %s as parents of %s\n')
514 514 % (_(b' and ').join(parents), rev)
515 515 )
516 516 parents = [self.map.get(p, p) for p in parents]
517 517 except KeyError:
518 518 parents = [b[0] for b in pbranches]
519 519 parents.extend(
520 520 self.map[x] for x in commit.optparents if x in self.map
521 521 )
522 522 if len(pbranches) != 2:
523 523 cleanp2 = set()
524 524 if len(parents) < 3:
525 525 source = progresssource(self.ui, self.source, len(files))
526 526 else:
527 527 # For an octopus merge, we end up traversing the list of
528 528 # changed files N-1 times. This tweak to the number of
529 529 # files makes it so the progress bar doesn't overflow
530 530 # itself.
531 531 source = progresssource(
532 532 self.ui, self.source, len(files) * (len(parents) - 1)
533 533 )
534 534 newnode = self.dest.putcommit(
535 535 files, copies, parents, commit, source, self.map, full, cleanp2
536 536 )
537 537 source.close()
538 538 self.source.converted(rev, newnode)
539 539 self.map[rev] = newnode
540 540
541 541 def convert(self, sortmode):
542 542 try:
543 543 self.source.before()
544 544 self.dest.before()
545 545 self.source.setrevmap(self.map)
546 546 self.ui.status(_(b"scanning source...\n"))
547 547 heads = self.source.getheads()
548 548 parents = self.walktree(heads)
549 549 self.mergesplicemap(parents, self.splicemap)
550 550 self.ui.status(_(b"sorting...\n"))
551 551 t = self.toposort(parents, sortmode)
552 552 num = len(t)
553 553 c = None
554 554
555 555 self.ui.status(_(b"converting...\n"))
556 556 progress = self.ui.makeprogress(
557 557 _(b'converting'), unit=_(b'revisions'), total=len(t)
558 558 )
559 559 for i, c in enumerate(t):
560 560 num -= 1
561 561 desc = self.commitcache[c].desc
562 562 if b"\n" in desc:
563 563 desc = desc.splitlines()[0]
564 564 # convert log message to local encoding without using
565 565 # tolocal() because the encoding.encoding convert()
566 566 # uses is 'utf-8'
567 567 self.ui.status(b"%d %s\n" % (num, recode(desc)))
568 568 self.ui.note(_(b"source: %s\n") % recode(c))
569 569 progress.update(i)
570 570 self.copy(c)
571 571 progress.complete()
572 572
573 573 if not self.ui.configbool(b'convert', b'skiptags'):
574 574 tags = self.source.gettags()
575 575 ctags = {}
576 576 for k in tags:
577 577 v = tags[k]
578 578 if self.map.get(v, SKIPREV) != SKIPREV:
579 579 ctags[k] = self.map[v]
580 580
581 581 if c and ctags:
582 582 nrev, tagsparent = self.dest.puttags(ctags)
583 583 if nrev and tagsparent:
584 584 # write another hash correspondence to override the
585 585 # previous one so we don't end up with extra tag heads
586 586 tagsparents = [
587 587 e for e in self.map.items() if e[1] == tagsparent
588 588 ]
589 589 if tagsparents:
590 590 self.map[tagsparents[0][0]] = nrev
591 591
592 592 bookmarks = self.source.getbookmarks()
593 593 cbookmarks = {}
594 594 for k in bookmarks:
595 595 v = bookmarks[k]
596 596 if self.map.get(v, SKIPREV) != SKIPREV:
597 597 cbookmarks[k] = self.map[v]
598 598
599 599 if c and cbookmarks:
600 600 self.dest.putbookmarks(cbookmarks)
601 601
602 602 self.writeauthormap()
603 603 finally:
604 604 self.cleanup()
605 605
606 606 def cleanup(self):
607 607 try:
608 608 self.dest.after()
609 609 finally:
610 610 self.source.after()
611 611 self.map.close()
612 612
613 613
614 614 def convert(ui, src, dest=None, revmapfile=None, **opts):
615 615 opts = pycompat.byteskwargs(opts)
616 616 global orig_encoding
617 617 orig_encoding = encoding.encoding
618 618 encoding.encoding = b'UTF-8'
619 619
620 620 # support --authors as an alias for --authormap
621 621 if not opts.get(b'authormap'):
622 622 opts[b'authormap'] = opts.get(b'authors')
623 623
624 624 if not dest:
625 625 dest = hg.defaultdest(src) + b"-hg"
626 626 ui.status(_(b"assuming destination %s\n") % dest)
627 627
628 628 destc = convertsink(ui, dest, opts.get(b'dest_type'))
629 629 destc = scmutil.wrapconvertsink(destc)
630 630
631 631 try:
632 632 srcc, defaultsort = convertsource(
633 633 ui, src, opts.get(b'source_type'), opts.get(b'rev')
634 634 )
635 635 except Exception:
636 636 for path in destc.created:
637 637 shutil.rmtree(path, True)
638 638 raise
639 639
640 640 sortmodes = (b'branchsort', b'datesort', b'sourcesort', b'closesort')
641 641 sortmode = [m for m in sortmodes if opts.get(m)]
642 642 if len(sortmode) > 1:
643 643 raise error.Abort(_(b'more than one sort mode specified'))
644 644 if sortmode:
645 645 sortmode = sortmode[0]
646 646 else:
647 647 sortmode = defaultsort
648 648
649 649 if sortmode == b'sourcesort' and not srcc.hasnativeorder():
650 650 raise error.Abort(
651 651 _(b'--sourcesort is not supported by this data source')
652 652 )
653 653 if sortmode == b'closesort' and not srcc.hasnativeclose():
654 654 raise error.Abort(
655 655 _(b'--closesort is not supported by this data source')
656 656 )
657 657
658 658 fmap = opts.get(b'filemap')
659 659 if fmap:
660 660 srcc = filemap.filemap_source(ui, srcc, fmap)
661 661 destc.setfilemapmode(True)
662 662
663 663 if not revmapfile:
664 664 revmapfile = destc.revmapfile()
665 665
666 666 c = converter(ui, srcc, destc, revmapfile, opts)
667 667 c.convert(sortmode)
@@ -1,1655 +1,1655 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import bisect
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('dirstate')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 """compile the regexp with the best available regexp engine and return a
51 51 matcher function"""
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 """Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it."""
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls,
121 121 root,
122 122 cwd,
123 123 kindpats,
124 124 ctx=None,
125 125 listsubrepos=False,
126 126 badfn=None,
127 127 ):
128 128 matchers = []
129 129 fms, kindpats = _expandsets(
130 130 cwd,
131 131 kindpats,
132 132 ctx=ctx,
133 133 listsubrepos=listsubrepos,
134 134 badfn=badfn,
135 135 )
136 136 if kindpats:
137 137 m = matchercls(root, kindpats, badfn=badfn)
138 138 matchers.append(m)
139 139 if fms:
140 140 matchers.extend(fms)
141 141 if not matchers:
142 142 return nevermatcher(badfn=badfn)
143 143 if len(matchers) == 1:
144 144 return matchers[0]
145 145 return unionmatcher(matchers)
146 146
147 147
148 148 def match(
149 149 root,
150 150 cwd,
151 151 patterns=None,
152 152 include=None,
153 153 exclude=None,
154 154 default=b'glob',
155 155 auditor=None,
156 156 ctx=None,
157 157 listsubrepos=False,
158 158 warn=None,
159 159 badfn=None,
160 160 icasefs=False,
161 161 ):
162 162 r"""build an object to match a set of file patterns
163 163
164 164 arguments:
165 165 root - the canonical root of the tree you're matching against
166 166 cwd - the current working directory, if relevant
167 167 patterns - patterns to find
168 168 include - patterns to include (unless they are excluded)
169 169 exclude - patterns to exclude (even if they are included)
170 170 default - if a pattern in patterns has no explicit type, assume this one
171 171 auditor - optional path auditor
172 172 ctx - optional changecontext
173 173 listsubrepos - if True, recurse into subrepositories
174 174 warn - optional function used for printing warnings
175 175 badfn - optional bad() callback for this matcher instead of the default
176 176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 177 normalizes the given patterns to the case in the filesystem
178 178
179 179 a pattern is one of:
180 180 'glob:<glob>' - a glob relative to cwd
181 181 're:<regexp>' - a regular expression
182 182 'path:<path>' - a path relative to repository root, which is matched
183 183 recursively
184 184 'rootfilesin:<path>' - a path relative to repository root, which is
185 185 matched non-recursively (will not match subdirectories)
186 186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 187 'relpath:<path>' - a path relative to cwd
188 188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 189 'set:<fileset>' - a fileset expression
190 190 'include:<path>' - a file of patterns to read and include
191 191 'subinclude:<path>' - a file of patterns to match against files under
192 192 the same directory
193 193 '<something>' - a pattern of the specified default type
194 194
195 195 >>> def _match(root, *args, **kwargs):
196 196 ... return match(util.localpath(root), *args, **kwargs)
197 197
198 198 Usually a patternmatcher is returned:
199 199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201 201
202 202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 203 intersectionmatcher (resp. a differencematcher):
204 204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 205 <class 'mercurial.match.intersectionmatcher'>
206 206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 207 <class 'mercurial.match.differencematcher'>
208 208
209 209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 210 >>> _match(b'/foo', b'.', [])
211 211 <alwaysmatcher>
212 212
213 213 The 'default' argument determines which kind of pattern is assumed if a
214 214 pattern has no prefix:
215 215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 216 <patternmatcher patterns='.*\\.c$'>
217 217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 220 <patternmatcher patterns='main.py'>
221 221
222 222 The primary use of matchers is to check whether a value (usually a file
223 223 name) matches againset one of the patterns given at initialization. There
224 224 are two ways of doing this check.
225 225
226 226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227 227
228 228 1. Calling the matcher with a file name returns True if any pattern
229 229 matches that file name:
230 230 >>> m(b'a')
231 231 True
232 232 >>> m(b'main.c')
233 233 True
234 234 >>> m(b'test.py')
235 235 False
236 236
237 237 2. Using the exact() method only returns True if the file name matches one
238 238 of the exact patterns (i.e. not re: or glob: patterns):
239 239 >>> m.exact(b'a')
240 240 True
241 241 >>> m.exact(b'main.c')
242 242 False
243 243 """
244 244 assert os.path.isabs(root)
245 245 cwd = os.path.join(root, util.localpath(cwd))
246 246 normalize = _donormalize
247 247 if icasefs:
248 248 dirstate = ctx.repo().dirstate
249 249 dsnormalize = dirstate.normalize
250 250
251 251 def normalize(patterns, default, root, cwd, auditor, warn):
252 252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 253 kindpats = []
254 254 for kind, pats, source in kp:
255 255 if kind not in (b're', b'relre'): # regex can't be normalized
256 256 p = pats
257 257 pats = dsnormalize(pats)
258 258
259 259 # Preserve the original to handle a case only rename.
260 260 if p != pats and p in dirstate:
261 261 kindpats.append((kind, p, source))
262 262
263 263 kindpats.append((kind, pats, source))
264 264 return kindpats
265 265
266 266 if patterns:
267 267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 268 if _kindpatsalwaysmatch(kindpats):
269 269 m = alwaysmatcher(badfn)
270 270 else:
271 271 m = _buildkindpatsmatcher(
272 272 patternmatcher,
273 273 root,
274 274 cwd,
275 275 kindpats,
276 276 ctx=ctx,
277 277 listsubrepos=listsubrepos,
278 278 badfn=badfn,
279 279 )
280 280 else:
281 281 # It's a little strange that no patterns means to match everything.
282 282 # Consider changing this to match nothing (probably using nevermatcher).
283 283 m = alwaysmatcher(badfn)
284 284
285 285 if include:
286 286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 287 im = _buildkindpatsmatcher(
288 288 includematcher,
289 289 root,
290 290 cwd,
291 291 kindpats,
292 292 ctx=ctx,
293 293 listsubrepos=listsubrepos,
294 294 badfn=None,
295 295 )
296 296 m = intersectmatchers(m, im)
297 297 if exclude:
298 298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 299 em = _buildkindpatsmatcher(
300 300 includematcher,
301 301 root,
302 302 cwd,
303 303 kindpats,
304 304 ctx=ctx,
305 305 listsubrepos=listsubrepos,
306 306 badfn=None,
307 307 )
308 308 m = differencematcher(m, em)
309 309 return m
310 310
311 311
312 312 def exact(files, badfn=None):
313 313 return exactmatcher(files, badfn=badfn)
314 314
315 315
316 316 def always(badfn=None):
317 317 return alwaysmatcher(badfn)
318 318
319 319
320 320 def never(badfn=None):
321 321 return nevermatcher(badfn)
322 322
323 323
324 324 def badmatch(match, badfn):
325 325 """Make a copy of the given matcher, replacing its bad method with the given
326 326 one.
327 327 """
328 328 m = copy.copy(match)
329 329 m.bad = badfn
330 330 return m
331 331
332 332
333 333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 335 normalized and rooted patterns and with listfiles expanded."""
336 336 kindpats = []
337 337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 338 if kind in cwdrelativepatternkinds:
339 339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 341 pat = util.normpath(pat)
342 342 elif kind in (b'listfile', b'listfile0'):
343 343 try:
344 344 files = util.readfile(pat)
345 345 if kind == b'listfile0':
346 346 files = files.split(b'\0')
347 347 else:
348 348 files = files.splitlines()
349 349 files = [f for f in files if f]
350 350 except EnvironmentError:
351 351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 352 for k, p, source in _donormalize(
353 353 files, default, root, cwd, auditor, warn
354 354 ):
355 355 kindpats.append((k, p, pat))
356 356 continue
357 357 elif kind == b'include':
358 358 try:
359 359 fullpath = os.path.join(root, util.localpath(pat))
360 360 includepats = readpatternfile(fullpath, warn)
361 361 for k, p, source in _donormalize(
362 362 includepats, default, root, cwd, auditor, warn
363 363 ):
364 364 kindpats.append((k, p, source or pat))
365 365 except error.Abort as inst:
366 366 raise error.Abort(
367 367 b'%s: %s'
368 368 % (
369 369 pat,
370 370 inst.message,
371 371 ) # pytype: disable=unsupported-operands
372 372 )
373 373 except IOError as inst:
374 374 if warn:
375 375 warn(
376 376 _(b"skipping unreadable pattern file '%s': %s\n")
377 377 % (pat, stringutil.forcebytestr(inst.strerror))
378 378 )
379 379 continue
380 380 # else: re or relre - which cannot be normalized
381 381 kindpats.append((kind, pat, b''))
382 382 return kindpats
383 383
384 384
385 385 class basematcher(object):
386 386 def __init__(self, badfn=None):
387 387 if badfn is not None:
388 388 self.bad = badfn
389 389
390 390 def __call__(self, fn):
391 391 return self.matchfn(fn)
392 392
393 393 # Callbacks related to how the matcher is used by dirstate.walk.
394 394 # Subscribers to these events must monkeypatch the matcher object.
395 395 def bad(self, f, msg):
396 396 """Callback from dirstate.walk for each explicit file that can't be
397 397 found/accessed, with an error message."""
398 398
399 399 # If an traversedir is set, it will be called when a directory discovered
400 400 # by recursive traversal is visited.
401 401 traversedir = None
402 402
403 403 @propertycache
404 404 def _files(self):
405 405 return []
406 406
407 407 def files(self):
408 408 """Explicitly listed files or patterns or roots:
409 409 if no patterns or .always(): empty list,
410 410 if exact: list exact files,
411 411 if not .anypats(): list all files and dirs,
412 412 else: optimal roots"""
413 413 return self._files
414 414
415 415 @propertycache
416 416 def _fileset(self):
417 417 return set(self._files)
418 418
419 419 def exact(self, f):
420 420 '''Returns True if f is in .files().'''
421 421 return f in self._fileset
422 422
423 423 def matchfn(self, f):
424 424 return False
425 425
426 426 def visitdir(self, dir):
427 427 """Decides whether a directory should be visited based on whether it
428 428 has potential matches in it or one of its subdirectories. This is
429 429 based on the match's primary, included, and excluded patterns.
430 430
431 431 Returns the string 'all' if the given directory and all subdirectories
432 432 should be visited. Otherwise returns True or False indicating whether
433 433 the given directory should be visited.
434 434 """
435 435 return True
436 436
437 437 def visitchildrenset(self, dir):
438 438 """Decides whether a directory should be visited based on whether it
439 439 has potential matches in it or one of its subdirectories, and
440 440 potentially lists which subdirectories of that directory should be
441 441 visited. This is based on the match's primary, included, and excluded
442 442 patterns.
443 443
444 444 This function is very similar to 'visitdir', and the following mapping
445 445 can be applied:
446 446
447 447 visitdir | visitchildrenlist
448 448 ----------+-------------------
449 449 False | set()
450 450 'all' | 'all'
451 451 True | 'this' OR non-empty set of subdirs -or files- to visit
452 452
453 453 Example:
454 454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 455 the following values (assuming the implementation of visitchildrenset
456 456 is capable of recognizing this; some implementations are not).
457 457
458 458 '' -> {'foo', 'qux'}
459 459 'baz' -> set()
460 460 'foo' -> {'bar'}
461 461 # Ideally this would be 'all', but since the prefix nature of matchers
462 462 # is applied to the entire matcher, we have to downgrade this to
463 463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 464 # in.
465 465 'foo/bar' -> 'this'
466 466 'qux' -> 'this'
467 467
468 468 Important:
469 469 Most matchers do not know if they're representing files or
470 470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 471 file or a directory, so visitchildrenset('dir') for most matchers will
472 472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 473 does), it may return 'this'. Do not rely on the return being a set
474 474 indicating that there are no files in this dir to investigate (or
475 475 equivalently that if there are files to investigate in 'dir' that it
476 476 will always return 'this').
477 477 """
478 478 return b'this'
479 479
480 480 def always(self):
481 481 """Matcher will match everything and .files() will be empty --
482 482 optimization might be possible."""
483 483 return False
484 484
485 485 def isexact(self):
486 486 """Matcher will match exactly the list of files in .files() --
487 487 optimization might be possible."""
488 488 return False
489 489
490 490 def prefix(self):
491 491 """Matcher will match the paths in .files() recursively --
492 492 optimization might be possible."""
493 493 return False
494 494
495 495 def anypats(self):
496 496 """None of .always(), .isexact(), and .prefix() is true --
497 497 optimizations will be difficult."""
498 498 return not self.always() and not self.isexact() and not self.prefix()
499 499
500 500
501 501 class alwaysmatcher(basematcher):
502 502 '''Matches everything.'''
503 503
504 504 def __init__(self, badfn=None):
505 505 super(alwaysmatcher, self).__init__(badfn)
506 506
507 507 def always(self):
508 508 return True
509 509
510 510 def matchfn(self, f):
511 511 return True
512 512
513 513 def visitdir(self, dir):
514 514 return b'all'
515 515
516 516 def visitchildrenset(self, dir):
517 517 return b'all'
518 518
519 519 def __repr__(self):
520 520 return r'<alwaysmatcher>'
521 521
522 522
523 523 class nevermatcher(basematcher):
524 524 '''Matches nothing.'''
525 525
526 526 def __init__(self, badfn=None):
527 527 super(nevermatcher, self).__init__(badfn)
528 528
529 529 # It's a little weird to say that the nevermatcher is an exact matcher
530 530 # or a prefix matcher, but it seems to make sense to let callers take
531 531 # fast paths based on either. There will be no exact matches, nor any
532 532 # prefixes (files() returns []), so fast paths iterating over them should
533 533 # be efficient (and correct).
534 534 def isexact(self):
535 535 return True
536 536
537 537 def prefix(self):
538 538 return True
539 539
540 540 def visitdir(self, dir):
541 541 return False
542 542
543 543 def visitchildrenset(self, dir):
544 544 return set()
545 545
546 546 def __repr__(self):
547 547 return r'<nevermatcher>'
548 548
549 549
550 550 class predicatematcher(basematcher):
551 551 """A matcher adapter for a simple boolean function"""
552 552
553 553 def __init__(self, predfn, predrepr=None, badfn=None):
554 554 super(predicatematcher, self).__init__(badfn)
555 555 self.matchfn = predfn
556 556 self._predrepr = predrepr
557 557
558 558 @encoding.strmethod
559 559 def __repr__(self):
560 560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 561 self.matchfn
562 562 )
563 563 return b'<predicatenmatcher pred=%s>' % s
564 564
565 565
566 566 def path_or_parents_in_set(path, prefix_set):
567 567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 568 l = len(prefix_set)
569 569 if l == 0:
570 570 return False
571 571 if path in prefix_set:
572 572 return True
573 573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 574 # "walk up" the directory hierarchy instead, with the assumption that most
575 575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 576 if l > 5:
577 577 return any(
578 578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 579 )
580 580
581 581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 582 # recognize ourselves as an 'always' matcher and skip this.
583 583 if b'' in prefix_set:
584 584 return True
585 585
586 586 sl = ord(b'/')
587 587
588 588 # We already checked that path isn't in prefix_set exactly, so
589 589 # `path[len(pf)] should never raise IndexError.
590 590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
591 591
592 592
593 593 class patternmatcher(basematcher):
594 594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
595 595
596 596 >>> kindpats = [
597 597 ... (b're', br'.*\.c$', b''),
598 598 ... (b'path', b'foo/a', b''),
599 599 ... (b'relpath', b'b', b''),
600 600 ... (b'glob', b'*.h', b''),
601 601 ... ]
602 602 >>> m = patternmatcher(b'foo', kindpats)
603 603 >>> m(b'main.c') # matches re:.*\.c$
604 604 True
605 605 >>> m(b'b.txt')
606 606 False
607 607 >>> m(b'foo/a') # matches path:foo/a
608 608 True
609 609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
610 610 False
611 611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
612 612 True
613 613 >>> m(b'lib.h') # matches glob:*.h
614 614 True
615 615
616 616 >>> m.files()
617 617 ['', 'foo/a', 'b', '']
618 618 >>> m.exact(b'foo/a')
619 619 True
620 620 >>> m.exact(b'b')
621 621 True
622 622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
623 623 False
624 624 """
625 625
626 626 def __init__(self, root, kindpats, badfn=None):
627 627 super(patternmatcher, self).__init__(badfn)
628 628
629 629 self._files = _explicitfiles(kindpats)
630 630 self._prefix = _prefix(kindpats)
631 631 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
632 632
633 633 @propertycache
634 634 def _dirs(self):
635 635 return set(pathutil.dirs(self._fileset))
636 636
637 637 def visitdir(self, dir):
638 638 if self._prefix and dir in self._fileset:
639 639 return b'all'
640 640 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
641 641
642 642 def visitchildrenset(self, dir):
643 643 ret = self.visitdir(dir)
644 644 if ret is True:
645 645 return b'this'
646 646 elif not ret:
647 647 return set()
648 648 assert ret == b'all'
649 649 return b'all'
650 650
651 651 def prefix(self):
652 652 return self._prefix
653 653
654 654 @encoding.strmethod
655 655 def __repr__(self):
656 656 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
657 657
658 658
659 659 # This is basically a reimplementation of pathutil.dirs that stores the
660 660 # children instead of just a count of them, plus a small optional optimization
661 661 # to avoid some directories we don't need.
662 662 class _dirchildren(object):
663 663 def __init__(self, paths, onlyinclude=None):
664 664 self._dirs = {}
665 665 self._onlyinclude = onlyinclude or []
666 666 addpath = self.addpath
667 667 for f in paths:
668 668 addpath(f)
669 669
670 670 def addpath(self, path):
671 671 if path == b'':
672 672 return
673 673 dirs = self._dirs
674 674 findsplitdirs = _dirchildren._findsplitdirs
675 675 for d, b in findsplitdirs(path):
676 676 if d not in self._onlyinclude:
677 677 continue
678 678 dirs.setdefault(d, set()).add(b)
679 679
680 680 @staticmethod
681 681 def _findsplitdirs(path):
682 682 # yields (dirname, basename) tuples, walking back to the root. This is
683 683 # very similar to pathutil.finddirs, except:
684 684 # - produces a (dirname, basename) tuple, not just 'dirname'
685 685 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
686 686 # slash.
687 687 oldpos = len(path)
688 688 pos = path.rfind(b'/')
689 689 while pos != -1:
690 690 yield path[:pos], path[pos + 1 : oldpos]
691 691 oldpos = pos
692 692 pos = path.rfind(b'/', 0, pos)
693 693 yield b'', path[:oldpos]
694 694
695 695 def get(self, path):
696 696 return self._dirs.get(path, set())
697 697
698 698
699 699 class includematcher(basematcher):
700 700 def __init__(self, root, kindpats, badfn=None):
701 701 super(includematcher, self).__init__(badfn)
702 702 if rustmod is not None:
703 703 # We need to pass the patterns to Rust because they can contain
704 704 # patterns from the user interface
705 705 self._kindpats = kindpats
706 706 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
707 707 self._prefix = _prefix(kindpats)
708 708 roots, dirs, parents = _rootsdirsandparents(kindpats)
709 709 # roots are directories which are recursively included.
710 710 self._roots = set(roots)
711 711 # dirs are directories which are non-recursively included.
712 712 self._dirs = set(dirs)
713 713 # parents are directories which are non-recursively included because
714 714 # they are needed to get to items in _dirs or _roots.
715 715 self._parents = parents
716 716
717 717 def visitdir(self, dir):
718 718 if self._prefix and dir in self._roots:
719 719 return b'all'
720 720 return (
721 721 dir in self._dirs
722 722 or dir in self._parents
723 723 or path_or_parents_in_set(dir, self._roots)
724 724 )
725 725
726 726 @propertycache
727 727 def _allparentschildren(self):
728 728 # It may seem odd that we add dirs, roots, and parents, and then
729 729 # restrict to only parents. This is to catch the case of:
730 730 # dirs = ['foo/bar']
731 731 # parents = ['foo']
732 732 # if we asked for the children of 'foo', but had only added
733 733 # self._parents, we wouldn't be able to respond ['bar'].
734 734 return _dirchildren(
735 735 itertools.chain(self._dirs, self._roots, self._parents),
736 736 onlyinclude=self._parents,
737 737 )
738 738
739 739 def visitchildrenset(self, dir):
740 740 if self._prefix and dir in self._roots:
741 741 return b'all'
742 742 # Note: this does *not* include the 'dir in self._parents' case from
743 743 # visitdir, that's handled below.
744 744 if (
745 745 b'' in self._roots
746 746 or dir in self._dirs
747 747 or path_or_parents_in_set(dir, self._roots)
748 748 ):
749 749 return b'this'
750 750
751 751 if dir in self._parents:
752 752 return self._allparentschildren.get(dir) or set()
753 753 return set()
754 754
755 755 @encoding.strmethod
756 756 def __repr__(self):
757 757 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
758 758
759 759
760 760 class exactmatcher(basematcher):
761 761 r"""Matches the input files exactly. They are interpreted as paths, not
762 762 patterns (so no kind-prefixes).
763 763
764 764 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
765 765 >>> m(b'a.txt')
766 766 True
767 767 >>> m(b'b.txt')
768 768 False
769 769
770 770 Input files that would be matched are exactly those returned by .files()
771 771 >>> m.files()
772 772 ['a.txt', 're:.*\\.c$']
773 773
774 774 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
775 775 >>> m(b'main.c')
776 776 False
777 777 >>> m(br're:.*\.c$')
778 778 True
779 779 """
780 780
781 781 def __init__(self, files, badfn=None):
782 782 super(exactmatcher, self).__init__(badfn)
783 783
784 784 if isinstance(files, list):
785 785 self._files = files
786 786 else:
787 787 self._files = list(files)
788 788
789 789 matchfn = basematcher.exact
790 790
791 791 @propertycache
792 792 def _dirs(self):
793 793 return set(pathutil.dirs(self._fileset))
794 794
795 795 def visitdir(self, dir):
796 796 return dir in self._dirs
797 797
798 798 @propertycache
799 799 def _visitchildrenset_candidates(self):
800 800 """A memoized set of candidates for visitchildrenset."""
801 801 return self._fileset | self._dirs - {b''}
802 802
803 803 @propertycache
804 804 def _sorted_visitchildrenset_candidates(self):
805 805 """A memoized sorted list of candidates for visitchildrenset."""
806 806 return sorted(self._visitchildrenset_candidates)
807 807
808 808 def visitchildrenset(self, dir):
809 809 if not self._fileset or dir not in self._dirs:
810 810 return set()
811 811
812 812 if dir == b'':
813 813 candidates = self._visitchildrenset_candidates
814 814 else:
815 815 candidates = self._sorted_visitchildrenset_candidates
816 816 d = dir + b'/'
817 817 # Use bisect to find the first element potentially starting with d
818 818 # (i.e. >= d). This should always find at least one element (we'll
819 819 # assert later if this is not the case).
820 820 first = bisect.bisect_left(candidates, d)
821 821 # We need a representation of the first element that is > d that
822 822 # does not start with d, so since we added a `/` on the end of dir,
823 823 # we'll add whatever comes after slash (we could probably assume
824 824 # that `0` is after `/`, but let's not) to the end of dir instead.
825 825 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
826 826 # Use bisect to find the first element >= d_next
827 827 last = bisect.bisect_left(candidates, dnext, lo=first)
828 828 dlen = len(d)
829 829 candidates = {c[dlen:] for c in candidates[first:last]}
830 830 # self._dirs includes all of the directories, recursively, so if
831 831 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
832 832 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
833 833 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
834 834 # immediate subdir will be in there without a slash.
835 835 ret = {c for c in candidates if b'/' not in c}
836 836 # We really do not expect ret to be empty, since that would imply that
837 837 # there's something in _dirs that didn't have a file in _fileset.
838 838 assert ret
839 839 return ret
840 840
841 841 def isexact(self):
842 842 return True
843 843
844 844 @encoding.strmethod
845 845 def __repr__(self):
846 846 return b'<exactmatcher files=%r>' % self._files
847 847
848 848
849 849 class differencematcher(basematcher):
850 850 """Composes two matchers by matching if the first matches and the second
851 851 does not.
852 852
853 853 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
854 854 """
855 855
856 856 def __init__(self, m1, m2):
857 857 super(differencematcher, self).__init__()
858 858 self._m1 = m1
859 859 self._m2 = m2
860 860 self.bad = m1.bad
861 861 self.traversedir = m1.traversedir
862 862
863 863 def matchfn(self, f):
864 864 return self._m1(f) and not self._m2(f)
865 865
866 866 @propertycache
867 867 def _files(self):
868 868 if self.isexact():
869 869 return [f for f in self._m1.files() if self(f)]
870 870 # If m1 is not an exact matcher, we can't easily figure out the set of
871 871 # files, because its files() are not always files. For example, if
872 872 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
873 873 # want to remove "dir" from the set even though it would match m2,
874 874 # because the "dir" in m1 may not be a file.
875 875 return self._m1.files()
876 876
877 877 def visitdir(self, dir):
878 878 if self._m2.visitdir(dir) == b'all':
879 879 return False
880 880 elif not self._m2.visitdir(dir):
881 881 # m2 does not match dir, we can return 'all' here if possible
882 882 return self._m1.visitdir(dir)
883 883 return bool(self._m1.visitdir(dir))
884 884
885 885 def visitchildrenset(self, dir):
886 886 m2_set = self._m2.visitchildrenset(dir)
887 887 if m2_set == b'all':
888 888 return set()
889 889 m1_set = self._m1.visitchildrenset(dir)
890 890 # Possible values for m1: 'all', 'this', set(...), set()
891 891 # Possible values for m2: 'this', set(...), set()
892 892 # If m2 has nothing under here that we care about, return m1, even if
893 893 # it's 'all'. This is a change in behavior from visitdir, which would
894 894 # return True, not 'all', for some reason.
895 895 if not m2_set:
896 896 return m1_set
897 897 if m1_set in [b'all', b'this']:
898 898 # Never return 'all' here if m2_set is any kind of non-empty (either
899 899 # 'this' or set(foo)), since m2 might return set() for a
900 900 # subdirectory.
901 901 return b'this'
902 902 # Possible values for m1: set(...), set()
903 903 # Possible values for m2: 'this', set(...)
904 904 # We ignore m2's set results. They're possibly incorrect:
905 905 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
906 906 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
907 907 # return set(), which is *not* correct, we still need to visit 'dir'!
908 908 return m1_set
909 909
910 910 def isexact(self):
911 911 return self._m1.isexact()
912 912
913 913 @encoding.strmethod
914 914 def __repr__(self):
915 915 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
916 916
917 917
918 918 def intersectmatchers(m1, m2):
919 919 """Composes two matchers by matching if both of them match.
920 920
921 921 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
922 922 """
923 923 if m1 is None or m2 is None:
924 924 return m1 or m2
925 925 if m1.always():
926 926 m = copy.copy(m2)
927 927 # TODO: Consider encapsulating these things in a class so there's only
928 928 # one thing to copy from m1.
929 929 m.bad = m1.bad
930 930 m.traversedir = m1.traversedir
931 931 return m
932 932 if m2.always():
933 933 m = copy.copy(m1)
934 934 return m
935 935 return intersectionmatcher(m1, m2)
936 936
937 937
938 938 class intersectionmatcher(basematcher):
939 939 def __init__(self, m1, m2):
940 940 super(intersectionmatcher, self).__init__()
941 941 self._m1 = m1
942 942 self._m2 = m2
943 943 self.bad = m1.bad
944 944 self.traversedir = m1.traversedir
945 945
946 946 @propertycache
947 947 def _files(self):
948 948 if self.isexact():
949 949 m1, m2 = self._m1, self._m2
950 950 if not m1.isexact():
951 951 m1, m2 = m2, m1
952 952 return [f for f in m1.files() if m2(f)]
953 953 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
954 954 # the set of files, because their files() are not always files. For
955 955 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
956 956 # "path:dir2", we don't want to remove "dir2" from the set.
957 957 return self._m1.files() + self._m2.files()
958 958
959 959 def matchfn(self, f):
960 960 return self._m1(f) and self._m2(f)
961 961
962 962 def visitdir(self, dir):
963 963 visit1 = self._m1.visitdir(dir)
964 964 if visit1 == b'all':
965 965 return self._m2.visitdir(dir)
966 966 # bool() because visit1=True + visit2='all' should not be 'all'
967 967 return bool(visit1 and self._m2.visitdir(dir))
968 968
969 969 def visitchildrenset(self, dir):
970 970 m1_set = self._m1.visitchildrenset(dir)
971 971 if not m1_set:
972 972 return set()
973 973 m2_set = self._m2.visitchildrenset(dir)
974 974 if not m2_set:
975 975 return set()
976 976
977 977 if m1_set == b'all':
978 978 return m2_set
979 979 elif m2_set == b'all':
980 980 return m1_set
981 981
982 982 if m1_set == b'this' or m2_set == b'this':
983 983 return b'this'
984 984
985 985 assert isinstance(m1_set, set) and isinstance(m2_set, set)
986 986 return m1_set.intersection(m2_set)
987 987
988 988 def always(self):
989 989 return self._m1.always() and self._m2.always()
990 990
991 991 def isexact(self):
992 992 return self._m1.isexact() or self._m2.isexact()
993 993
994 994 @encoding.strmethod
995 995 def __repr__(self):
996 996 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
997 997
998 998
999 999 class subdirmatcher(basematcher):
1000 1000 """Adapt a matcher to work on a subdirectory only.
1001 1001
1002 1002 The paths are remapped to remove/insert the path as needed:
1003 1003
1004 1004 >>> from . import pycompat
1005 1005 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1006 1006 >>> m2 = subdirmatcher(b'sub', m1)
1007 1007 >>> m2(b'a.txt')
1008 1008 False
1009 1009 >>> m2(b'b.txt')
1010 1010 True
1011 1011 >>> m2.matchfn(b'a.txt')
1012 1012 False
1013 1013 >>> m2.matchfn(b'b.txt')
1014 1014 True
1015 1015 >>> m2.files()
1016 1016 ['b.txt']
1017 1017 >>> m2.exact(b'b.txt')
1018 1018 True
1019 1019 >>> def bad(f, msg):
1020 1020 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1021 1021 >>> m1.bad = bad
1022 1022 >>> m2.bad(b'x.txt', b'No such file')
1023 1023 sub/x.txt: No such file
1024 1024 """
1025 1025
1026 1026 def __init__(self, path, matcher):
1027 1027 super(subdirmatcher, self).__init__()
1028 1028 self._path = path
1029 1029 self._matcher = matcher
1030 1030 self._always = matcher.always()
1031 1031
1032 1032 self._files = [
1033 1033 f[len(path) + 1 :]
1034 1034 for f in matcher._files
1035 1035 if f.startswith(path + b"/")
1036 1036 ]
1037 1037
1038 1038 # If the parent repo had a path to this subrepo and the matcher is
1039 1039 # a prefix matcher, this submatcher always matches.
1040 1040 if matcher.prefix():
1041 1041 self._always = any(f == path for f in matcher._files)
1042 1042
1043 1043 def bad(self, f, msg):
1044 1044 self._matcher.bad(self._path + b"/" + f, msg)
1045 1045
1046 1046 def matchfn(self, f):
1047 1047 # Some information is lost in the superclass's constructor, so we
1048 1048 # can not accurately create the matching function for the subdirectory
1049 1049 # from the inputs. Instead, we override matchfn() and visitdir() to
1050 1050 # call the original matcher with the subdirectory path prepended.
1051 1051 return self._matcher.matchfn(self._path + b"/" + f)
1052 1052
1053 1053 def visitdir(self, dir):
1054 1054 if dir == b'':
1055 1055 dir = self._path
1056 1056 else:
1057 1057 dir = self._path + b"/" + dir
1058 1058 return self._matcher.visitdir(dir)
1059 1059
1060 1060 def visitchildrenset(self, dir):
1061 1061 if dir == b'':
1062 1062 dir = self._path
1063 1063 else:
1064 1064 dir = self._path + b"/" + dir
1065 1065 return self._matcher.visitchildrenset(dir)
1066 1066
1067 1067 def always(self):
1068 1068 return self._always
1069 1069
1070 1070 def prefix(self):
1071 1071 return self._matcher.prefix() and not self._always
1072 1072
1073 1073 @encoding.strmethod
1074 1074 def __repr__(self):
1075 1075 return b'<subdirmatcher path=%r, matcher=%r>' % (
1076 1076 self._path,
1077 1077 self._matcher,
1078 1078 )
1079 1079
1080 1080
1081 1081 class prefixdirmatcher(basematcher):
1082 1082 """Adapt a matcher to work on a parent directory.
1083 1083
1084 1084 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1085 1085
1086 1086 The prefix path should usually be the relative path from the root of
1087 1087 this matcher to the root of the wrapped matcher.
1088 1088
1089 1089 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1090 1090 >>> m2 = prefixdirmatcher(b'd/e', m1)
1091 1091 >>> m2(b'a.txt')
1092 1092 False
1093 1093 >>> m2(b'd/e/a.txt')
1094 1094 True
1095 1095 >>> m2(b'd/e/b.txt')
1096 1096 False
1097 1097 >>> m2.files()
1098 1098 ['d/e/a.txt', 'd/e/f/b.txt']
1099 1099 >>> m2.exact(b'd/e/a.txt')
1100 1100 True
1101 1101 >>> m2.visitdir(b'd')
1102 1102 True
1103 1103 >>> m2.visitdir(b'd/e')
1104 1104 True
1105 1105 >>> m2.visitdir(b'd/e/f')
1106 1106 True
1107 1107 >>> m2.visitdir(b'd/e/g')
1108 1108 False
1109 1109 >>> m2.visitdir(b'd/ef')
1110 1110 False
1111 1111 """
1112 1112
1113 1113 def __init__(self, path, matcher, badfn=None):
1114 1114 super(prefixdirmatcher, self).__init__(badfn)
1115 1115 if not path:
1116 1116 raise error.ProgrammingError(b'prefix path must not be empty')
1117 1117 self._path = path
1118 1118 self._pathprefix = path + b'/'
1119 1119 self._matcher = matcher
1120 1120
1121 1121 @propertycache
1122 1122 def _files(self):
1123 1123 return [self._pathprefix + f for f in self._matcher._files]
1124 1124
1125 1125 def matchfn(self, f):
1126 1126 if not f.startswith(self._pathprefix):
1127 1127 return False
1128 1128 return self._matcher.matchfn(f[len(self._pathprefix) :])
1129 1129
1130 1130 @propertycache
1131 1131 def _pathdirs(self):
1132 1132 return set(pathutil.finddirs(self._path))
1133 1133
1134 1134 def visitdir(self, dir):
1135 1135 if dir == self._path:
1136 1136 return self._matcher.visitdir(b'')
1137 1137 if dir.startswith(self._pathprefix):
1138 1138 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1139 1139 return dir in self._pathdirs
1140 1140
1141 1141 def visitchildrenset(self, dir):
1142 1142 if dir == self._path:
1143 1143 return self._matcher.visitchildrenset(b'')
1144 1144 if dir.startswith(self._pathprefix):
1145 1145 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1146 1146 if dir in self._pathdirs:
1147 1147 return b'this'
1148 1148 return set()
1149 1149
1150 1150 def isexact(self):
1151 1151 return self._matcher.isexact()
1152 1152
1153 1153 def prefix(self):
1154 1154 return self._matcher.prefix()
1155 1155
1156 1156 @encoding.strmethod
1157 1157 def __repr__(self):
1158 1158 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1159 1159 pycompat.bytestr(self._path),
1160 1160 self._matcher,
1161 1161 )
1162 1162
1163 1163
1164 1164 class unionmatcher(basematcher):
1165 1165 """A matcher that is the union of several matchers.
1166 1166
1167 1167 The non-matching-attributes (bad, traversedir) are taken from the first
1168 1168 matcher.
1169 1169 """
1170 1170
1171 1171 def __init__(self, matchers):
1172 1172 m1 = matchers[0]
1173 1173 super(unionmatcher, self).__init__()
1174 1174 self.traversedir = m1.traversedir
1175 1175 self._matchers = matchers
1176 1176
1177 1177 def matchfn(self, f):
1178 1178 for match in self._matchers:
1179 1179 if match(f):
1180 1180 return True
1181 1181 return False
1182 1182
1183 1183 def visitdir(self, dir):
1184 1184 r = False
1185 1185 for m in self._matchers:
1186 1186 v = m.visitdir(dir)
1187 1187 if v == b'all':
1188 1188 return v
1189 1189 r |= v
1190 1190 return r
1191 1191
1192 1192 def visitchildrenset(self, dir):
1193 1193 r = set()
1194 1194 this = False
1195 1195 for m in self._matchers:
1196 1196 v = m.visitchildrenset(dir)
1197 1197 if not v:
1198 1198 continue
1199 1199 if v == b'all':
1200 1200 return v
1201 1201 if this or v == b'this':
1202 1202 this = True
1203 1203 # don't break, we might have an 'all' in here.
1204 1204 continue
1205 1205 assert isinstance(v, set)
1206 1206 r = r.union(v)
1207 1207 if this:
1208 1208 return b'this'
1209 1209 return r
1210 1210
1211 1211 @encoding.strmethod
1212 1212 def __repr__(self):
1213 1213 return b'<unionmatcher matchers=%r>' % self._matchers
1214 1214
1215 1215
1216 1216 def patkind(pattern, default=None):
1217 1217 r"""If pattern is 'kind:pat' with a known kind, return kind.
1218 1218
1219 1219 >>> patkind(br're:.*\.c$')
1220 1220 're'
1221 1221 >>> patkind(b'glob:*.c')
1222 1222 'glob'
1223 1223 >>> patkind(b'relpath:test.py')
1224 1224 'relpath'
1225 1225 >>> patkind(b'main.py')
1226 1226 >>> patkind(b'main.py', default=b're')
1227 1227 're'
1228 1228 """
1229 1229 return _patsplit(pattern, default)[0]
1230 1230
1231 1231
1232 1232 def _patsplit(pattern, default):
1233 1233 """Split a string into the optional pattern kind prefix and the actual
1234 1234 pattern."""
1235 1235 if b':' in pattern:
1236 1236 kind, pat = pattern.split(b':', 1)
1237 1237 if kind in allpatternkinds:
1238 1238 return kind, pat
1239 1239 return default, pattern
1240 1240
1241 1241
1242 1242 def _globre(pat):
1243 1243 r"""Convert an extended glob string to a regexp string.
1244 1244
1245 1245 >>> from . import pycompat
1246 1246 >>> def bprint(s):
1247 1247 ... print(pycompat.sysstr(s))
1248 1248 >>> bprint(_globre(br'?'))
1249 1249 .
1250 1250 >>> bprint(_globre(br'*'))
1251 1251 [^/]*
1252 1252 >>> bprint(_globre(br'**'))
1253 1253 .*
1254 1254 >>> bprint(_globre(br'**/a'))
1255 1255 (?:.*/)?a
1256 1256 >>> bprint(_globre(br'a/**/b'))
1257 1257 a/(?:.*/)?b
1258 1258 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1259 1259 [a*?!^][\^b][^c]
1260 1260 >>> bprint(_globre(br'{a,b}'))
1261 1261 (?:a|b)
1262 1262 >>> bprint(_globre(br'.\*\?'))
1263 1263 \.\*\?
1264 1264 """
1265 1265 i, n = 0, len(pat)
1266 1266 res = b''
1267 1267 group = 0
1268 1268 escape = util.stringutil.regexbytesescapemap.get
1269 1269
1270 1270 def peek():
1271 1271 return i < n and pat[i : i + 1]
1272 1272
1273 1273 while i < n:
1274 1274 c = pat[i : i + 1]
1275 1275 i += 1
1276 1276 if c not in b'*?[{},\\':
1277 1277 res += escape(c, c)
1278 1278 elif c == b'*':
1279 1279 if peek() == b'*':
1280 1280 i += 1
1281 1281 if peek() == b'/':
1282 1282 i += 1
1283 1283 res += b'(?:.*/)?'
1284 1284 else:
1285 1285 res += b'.*'
1286 1286 else:
1287 1287 res += b'[^/]*'
1288 1288 elif c == b'?':
1289 1289 res += b'.'
1290 1290 elif c == b'[':
1291 1291 j = i
1292 1292 if j < n and pat[j : j + 1] in b'!]':
1293 1293 j += 1
1294 1294 while j < n and pat[j : j + 1] != b']':
1295 1295 j += 1
1296 1296 if j >= n:
1297 1297 res += b'\\['
1298 1298 else:
1299 1299 stuff = pat[i:j].replace(b'\\', b'\\\\')
1300 1300 i = j + 1
1301 1301 if stuff[0:1] == b'!':
1302 1302 stuff = b'^' + stuff[1:]
1303 1303 elif stuff[0:1] == b'^':
1304 1304 stuff = b'\\' + stuff
1305 1305 res = b'%s[%s]' % (res, stuff)
1306 1306 elif c == b'{':
1307 1307 group += 1
1308 1308 res += b'(?:'
1309 1309 elif c == b'}' and group:
1310 1310 res += b')'
1311 1311 group -= 1
1312 1312 elif c == b',' and group:
1313 1313 res += b'|'
1314 1314 elif c == b'\\':
1315 1315 p = peek()
1316 1316 if p:
1317 1317 i += 1
1318 1318 res += escape(p, p)
1319 1319 else:
1320 1320 res += escape(c, c)
1321 1321 else:
1322 1322 res += escape(c, c)
1323 1323 return res
1324 1324
1325 1325
1326 1326 def _regex(kind, pat, globsuffix):
1327 1327 """Convert a (normalized) pattern of any kind into a
1328 1328 regular expression.
1329 1329 globsuffix is appended to the regexp of globs."""
1330 1330 if not pat and kind in (b'glob', b'relpath'):
1331 1331 return b''
1332 1332 if kind == b're':
1333 1333 return pat
1334 1334 if kind in (b'path', b'relpath'):
1335 1335 if pat == b'.':
1336 1336 return b''
1337 1337 return util.stringutil.reescape(pat) + b'(?:/|$)'
1338 1338 if kind == b'rootfilesin':
1339 1339 if pat == b'.':
1340 1340 escaped = b''
1341 1341 else:
1342 1342 # Pattern is a directory name.
1343 1343 escaped = util.stringutil.reescape(pat) + b'/'
1344 1344 # Anything after the pattern must be a non-directory.
1345 1345 return escaped + b'[^/]+$'
1346 1346 if kind == b'relglob':
1347 1347 globre = _globre(pat)
1348 1348 if globre.startswith(b'[^/]*'):
1349 1349 # When pat has the form *XYZ (common), make the returned regex more
1350 1350 # legible by returning the regex for **XYZ instead of **/*XYZ.
1351 1351 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1352 1352 return b'(?:|.*/)' + globre + globsuffix
1353 1353 if kind == b'relre':
1354 1354 if pat.startswith(b'^'):
1355 1355 return pat
1356 1356 return b'.*' + pat
1357 1357 if kind in (b'glob', b'rootglob'):
1358 1358 return _globre(pat) + globsuffix
1359 1359 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1360 1360
1361 1361
1362 1362 def _buildmatch(kindpats, globsuffix, root):
1363 1363 """Return regexp string and a matcher function for kindpats.
1364 1364 globsuffix is appended to the regexp of globs."""
1365 1365 matchfuncs = []
1366 1366
1367 1367 subincludes, kindpats = _expandsubinclude(kindpats, root)
1368 1368 if subincludes:
1369 1369 submatchers = {}
1370 1370
1371 1371 def matchsubinclude(f):
1372 1372 for prefix, matcherargs in subincludes:
1373 1373 if f.startswith(prefix):
1374 1374 mf = submatchers.get(prefix)
1375 1375 if mf is None:
1376 1376 mf = match(*matcherargs)
1377 1377 submatchers[prefix] = mf
1378 1378
1379 1379 if mf(f[len(prefix) :]):
1380 1380 return True
1381 1381 return False
1382 1382
1383 1383 matchfuncs.append(matchsubinclude)
1384 1384
1385 1385 regex = b''
1386 1386 if kindpats:
1387 1387 if all(k == b'rootfilesin' for k, p, s in kindpats):
1388 1388 dirs = {p for k, p, s in kindpats}
1389 1389
1390 1390 def mf(f):
1391 1391 i = f.rfind(b'/')
1392 1392 if i >= 0:
1393 1393 dir = f[:i]
1394 1394 else:
1395 1395 dir = b'.'
1396 1396 return dir in dirs
1397 1397
1398 1398 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1399 1399 matchfuncs.append(mf)
1400 1400 else:
1401 1401 regex, mf = _buildregexmatch(kindpats, globsuffix)
1402 1402 matchfuncs.append(mf)
1403 1403
1404 1404 if len(matchfuncs) == 1:
1405 1405 return regex, matchfuncs[0]
1406 1406 else:
1407 1407 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1408 1408
1409 1409
1410 1410 MAX_RE_SIZE = 20000
1411 1411
1412 1412
1413 1413 def _joinregexes(regexps):
1414 1414 """gather multiple regular expressions into a single one"""
1415 1415 return b'|'.join(regexps)
1416 1416
1417 1417
1418 1418 def _buildregexmatch(kindpats, globsuffix):
1419 1419 """Build a match function from a list of kinds and kindpats,
1420 1420 return regexp string and a matcher function.
1421 1421
1422 1422 Test too large input
1423 1423 >>> _buildregexmatch([
1424 1424 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1425 1425 ... ], b'$')
1426 1426 Traceback (most recent call last):
1427 1427 ...
1428 1428 Abort: matcher pattern is too long (20009 bytes)
1429 1429 """
1430 1430 try:
1431 1431 allgroups = []
1432 1432 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1433 1433 fullregexp = _joinregexes(regexps)
1434 1434
1435 1435 startidx = 0
1436 1436 groupsize = 0
1437 1437 for idx, r in enumerate(regexps):
1438 1438 piecesize = len(r)
1439 1439 if piecesize > MAX_RE_SIZE:
1440 1440 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1441 1441 raise error.Abort(msg)
1442 1442 elif (groupsize + piecesize) > MAX_RE_SIZE:
1443 1443 group = regexps[startidx:idx]
1444 1444 allgroups.append(_joinregexes(group))
1445 1445 startidx = idx
1446 1446 groupsize = 0
1447 1447 groupsize += piecesize + 1
1448 1448
1449 1449 if startidx == 0:
1450 1450 matcher = _rematcher(fullregexp)
1451 1451 func = lambda s: bool(matcher(s))
1452 1452 else:
1453 1453 group = regexps[startidx:]
1454 1454 allgroups.append(_joinregexes(group))
1455 1455 allmatchers = [_rematcher(g) for g in allgroups]
1456 1456 func = lambda s: any(m(s) for m in allmatchers)
1457 1457 return fullregexp, func
1458 1458 except re.error:
1459 1459 for k, p, s in kindpats:
1460 1460 try:
1461 1461 _rematcher(_regex(k, p, globsuffix))
1462 1462 except re.error:
1463 1463 if s:
1464 1464 raise error.Abort(
1465 1465 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1466 1466 )
1467 1467 else:
1468 1468 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1469 1469 raise error.Abort(_(b"invalid pattern"))
1470 1470
1471 1471
1472 1472 def _patternrootsanddirs(kindpats):
1473 1473 """Returns roots and directories corresponding to each pattern.
1474 1474
1475 1475 This calculates the roots and directories exactly matching the patterns and
1476 1476 returns a tuple of (roots, dirs) for each. It does not return other
1477 1477 directories which may also need to be considered, like the parent
1478 1478 directories.
1479 1479 """
1480 1480 r = []
1481 1481 d = []
1482 1482 for kind, pat, source in kindpats:
1483 1483 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1484 1484 root = []
1485 1485 for p in pat.split(b'/'):
1486 1486 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1487 1487 break
1488 1488 root.append(p)
1489 1489 r.append(b'/'.join(root))
1490 1490 elif kind in (b'relpath', b'path'):
1491 1491 if pat == b'.':
1492 1492 pat = b''
1493 1493 r.append(pat)
1494 1494 elif kind in (b'rootfilesin',):
1495 1495 if pat == b'.':
1496 1496 pat = b''
1497 1497 d.append(pat)
1498 1498 else: # relglob, re, relre
1499 1499 r.append(b'')
1500 1500 return r, d
1501 1501
1502 1502
1503 1503 def _roots(kindpats):
1504 1504 '''Returns root directories to match recursively from the given patterns.'''
1505 1505 roots, dirs = _patternrootsanddirs(kindpats)
1506 1506 return roots
1507 1507
1508 1508
1509 1509 def _rootsdirsandparents(kindpats):
1510 1510 """Returns roots and exact directories from patterns.
1511 1511
1512 1512 `roots` are directories to match recursively, `dirs` should
1513 1513 be matched non-recursively, and `parents` are the implicitly required
1514 1514 directories to walk to items in either roots or dirs.
1515 1515
1516 1516 Returns a tuple of (roots, dirs, parents).
1517 1517
1518 1518 >>> r = _rootsdirsandparents(
1519 1519 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1520 1520 ... (b'glob', b'g*', b'')])
1521 1521 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1522 1522 (['g/h', 'g/h', ''], []) ['', 'g']
1523 1523 >>> r = _rootsdirsandparents(
1524 1524 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1525 1525 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1526 1526 ([], ['g/h', '']) ['', 'g']
1527 1527 >>> r = _rootsdirsandparents(
1528 1528 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1529 1529 ... (b'path', b'', b'')])
1530 1530 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1531 1531 (['r', 'p/p', ''], []) ['', 'p']
1532 1532 >>> r = _rootsdirsandparents(
1533 1533 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1534 1534 ... (b'relre', b'rr', b'')])
1535 1535 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1536 1536 (['', '', ''], []) ['']
1537 1537 """
1538 1538 r, d = _patternrootsanddirs(kindpats)
1539 1539
1540 1540 p = set()
1541 1541 # Add the parents as non-recursive/exact directories, since they must be
1542 1542 # scanned to get to either the roots or the other exact directories.
1543 1543 p.update(pathutil.dirs(d))
1544 1544 p.update(pathutil.dirs(r))
1545 1545
1546 1546 # FIXME: all uses of this function convert these to sets, do so before
1547 1547 # returning.
1548 1548 # FIXME: all uses of this function do not need anything in 'roots' and
1549 1549 # 'dirs' to also be in 'parents', consider removing them before returning.
1550 1550 return r, d, p
1551 1551
1552 1552
1553 1553 def _explicitfiles(kindpats):
1554 1554 """Returns the potential explicit filenames from the patterns.
1555 1555
1556 1556 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1557 1557 ['foo/bar']
1558 1558 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1559 1559 []
1560 1560 """
1561 1561 # Keep only the pattern kinds where one can specify filenames (vs only
1562 1562 # directory names).
1563 1563 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1564 1564 return _roots(filable)
1565 1565
1566 1566
1567 1567 def _prefix(kindpats):
1568 1568 '''Whether all the patterns match a prefix (i.e. recursively)'''
1569 1569 for kind, pat, source in kindpats:
1570 1570 if kind not in (b'path', b'relpath'):
1571 1571 return False
1572 1572 return True
1573 1573
1574 1574
1575 1575 _commentre = None
1576 1576
1577 1577
1578 1578 def readpatternfile(filepath, warn, sourceinfo=False):
1579 1579 """parse a pattern file, returning a list of
1580 1580 patterns. These patterns should be given to compile()
1581 1581 to be validated and converted into a match function.
1582 1582
1583 1583 trailing white space is dropped.
1584 1584 the escape character is backslash.
1585 1585 comments start with #.
1586 1586 empty lines are skipped.
1587 1587
1588 1588 lines can be of the following formats:
1589 1589
1590 1590 syntax: regexp # defaults following lines to non-rooted regexps
1591 1591 syntax: glob # defaults following lines to non-rooted globs
1592 1592 re:pattern # non-rooted regular expression
1593 1593 glob:pattern # non-rooted glob
1594 1594 rootglob:pat # rooted glob (same root as ^ in regexps)
1595 1595 pattern # pattern of the current default type
1596 1596
1597 1597 if sourceinfo is set, returns a list of tuples:
1598 1598 (pattern, lineno, originalline).
1599 1599 This is useful to debug ignore patterns.
1600 1600 """
1601 1601
1602 1602 syntaxes = {
1603 1603 b're': b'relre:',
1604 1604 b'regexp': b'relre:',
1605 1605 b'glob': b'relglob:',
1606 1606 b'rootglob': b'rootglob:',
1607 1607 b'include': b'include',
1608 1608 b'subinclude': b'subinclude',
1609 1609 }
1610 1610 syntax = b'relre:'
1611 1611 patterns = []
1612 1612
1613 1613 fp = open(filepath, b'rb')
1614 for lineno, line in enumerate(util.iterfile(fp), start=1):
1614 for lineno, line in enumerate(fp, start=1):
1615 1615 if b"#" in line:
1616 1616 global _commentre
1617 1617 if not _commentre:
1618 1618 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1619 1619 # remove comments prefixed by an even number of escapes
1620 1620 m = _commentre.search(line)
1621 1621 if m:
1622 1622 line = line[: m.end(1)]
1623 1623 # fixup properly escaped comments that survived the above
1624 1624 line = line.replace(b"\\#", b"#")
1625 1625 line = line.rstrip()
1626 1626 if not line:
1627 1627 continue
1628 1628
1629 1629 if line.startswith(b'syntax:'):
1630 1630 s = line[7:].strip()
1631 1631 try:
1632 1632 syntax = syntaxes[s]
1633 1633 except KeyError:
1634 1634 if warn:
1635 1635 warn(
1636 1636 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1637 1637 )
1638 1638 continue
1639 1639
1640 1640 linesyntax = syntax
1641 1641 for s, rels in syntaxes.items():
1642 1642 if line.startswith(rels):
1643 1643 linesyntax = rels
1644 1644 line = line[len(rels) :]
1645 1645 break
1646 1646 elif line.startswith(s + b':'):
1647 1647 linesyntax = rels
1648 1648 line = line[len(s) + 1 :]
1649 1649 break
1650 1650 if sourceinfo:
1651 1651 patterns.append((linesyntax + line, lineno, line))
1652 1652 else:
1653 1653 patterns.append(linesyntax + line)
1654 1654 fp.close()
1655 1655 return patterns
@@ -1,3261 +1,3261 b''
1 1 # patch.py - patch file parsing routines
2 2 #
3 3 # Copyright 2006 Brendan Cully <brendan@kublai.com>
4 4 # Copyright 2007 Chris Mason <chris.mason@oracle.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 import collections
11 11 import contextlib
12 12 import copy
13 13 import errno
14 14 import os
15 15 import re
16 16 import shutil
17 17 import zlib
18 18
19 19 from .i18n import _
20 20 from .node import (
21 21 hex,
22 22 sha1nodeconstants,
23 23 short,
24 24 )
25 25 from .pycompat import open
26 26 from . import (
27 27 copies,
28 28 diffhelper,
29 29 diffutil,
30 30 encoding,
31 31 error,
32 32 mail,
33 33 mdiff,
34 34 pathutil,
35 35 pycompat,
36 36 scmutil,
37 37 similar,
38 38 util,
39 39 vfs as vfsmod,
40 40 )
41 41 from .utils import (
42 42 dateutil,
43 43 hashutil,
44 44 procutil,
45 45 stringutil,
46 46 )
47 47
48 48 stringio = util.stringio
49 49
50 50 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
51 51 tabsplitter = re.compile(br'(\t+|[^\t]+)')
52 52 wordsplitter = re.compile(
53 53 br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|[^ \ta-zA-Z0-9_\x80-\xff])'
54 54 )
55 55
56 56 PatchError = error.PatchError
57 57 PatchParseError = error.PatchParseError
58 58 PatchApplicationError = error.PatchApplicationError
59 59
60 60 # public functions
61 61
62 62
63 63 def split(stream):
64 64 '''return an iterator of individual patches from a stream'''
65 65
66 66 def isheader(line, inheader):
67 67 if inheader and line.startswith((b' ', b'\t')):
68 68 # continuation
69 69 return True
70 70 if line.startswith((b' ', b'-', b'+')):
71 71 # diff line - don't check for header pattern in there
72 72 return False
73 73 l = line.split(b': ', 1)
74 74 return len(l) == 2 and b' ' not in l[0]
75 75
76 76 def chunk(lines):
77 77 return stringio(b''.join(lines))
78 78
79 79 def hgsplit(stream, cur):
80 80 inheader = True
81 81
82 82 for line in stream:
83 83 if not line.strip():
84 84 inheader = False
85 85 if not inheader and line.startswith(b'# HG changeset patch'):
86 86 yield chunk(cur)
87 87 cur = []
88 88 inheader = True
89 89
90 90 cur.append(line)
91 91
92 92 if cur:
93 93 yield chunk(cur)
94 94
95 95 def mboxsplit(stream, cur):
96 96 for line in stream:
97 97 if line.startswith(b'From '):
98 98 for c in split(chunk(cur[1:])):
99 99 yield c
100 100 cur = []
101 101
102 102 cur.append(line)
103 103
104 104 if cur:
105 105 for c in split(chunk(cur[1:])):
106 106 yield c
107 107
108 108 def mimesplit(stream, cur):
109 109 def msgfp(m):
110 110 fp = stringio()
111 111 # pytype: disable=wrong-arg-types
112 112 g = mail.Generator(fp, mangle_from_=False)
113 113 # pytype: enable=wrong-arg-types
114 114 g.flatten(m)
115 115 fp.seek(0)
116 116 return fp
117 117
118 118 for line in stream:
119 119 cur.append(line)
120 120 c = chunk(cur)
121 121
122 122 m = mail.parse(c)
123 123 if not m.is_multipart():
124 124 yield msgfp(m)
125 125 else:
126 126 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
127 127 for part in m.walk():
128 128 ct = part.get_content_type()
129 129 if ct not in ok_types:
130 130 continue
131 131 yield msgfp(part)
132 132
133 133 def headersplit(stream, cur):
134 134 inheader = False
135 135
136 136 for line in stream:
137 137 if not inheader and isheader(line, inheader):
138 138 yield chunk(cur)
139 139 cur = []
140 140 inheader = True
141 141 if inheader and not isheader(line, inheader):
142 142 inheader = False
143 143
144 144 cur.append(line)
145 145
146 146 if cur:
147 147 yield chunk(cur)
148 148
149 149 def remainder(cur):
150 150 yield chunk(cur)
151 151
152 152 class fiter(object):
153 153 def __init__(self, fp):
154 154 self.fp = fp
155 155
156 156 def __iter__(self):
157 157 return self
158 158
159 159 def next(self):
160 160 l = self.fp.readline()
161 161 if not l:
162 162 raise StopIteration
163 163 return l
164 164
165 165 __next__ = next
166 166
167 167 inheader = False
168 168 cur = []
169 169
170 170 mimeheaders = [b'content-type']
171 171
172 172 if not util.safehasattr(stream, b'next'):
173 173 # http responses, for example, have readline but not next
174 174 stream = fiter(stream)
175 175
176 176 for line in stream:
177 177 cur.append(line)
178 178 if line.startswith(b'# HG changeset patch'):
179 179 return hgsplit(stream, cur)
180 180 elif line.startswith(b'From '):
181 181 return mboxsplit(stream, cur)
182 182 elif isheader(line, inheader):
183 183 inheader = True
184 184 if line.split(b':', 1)[0].lower() in mimeheaders:
185 185 # let email parser handle this
186 186 return mimesplit(stream, cur)
187 187 elif line.startswith(b'--- ') and inheader:
188 188 # No evil headers seen by diff start, split by hand
189 189 return headersplit(stream, cur)
190 190 # Not enough info, keep reading
191 191
192 192 # if we are here, we have a very plain patch
193 193 return remainder(cur)
194 194
195 195
196 196 ## Some facility for extensible patch parsing:
197 197 # list of pairs ("header to match", "data key")
198 198 patchheadermap = [
199 199 (b'Date', b'date'),
200 200 (b'Branch', b'branch'),
201 201 (b'Node ID', b'nodeid'),
202 202 ]
203 203
204 204
205 205 @contextlib.contextmanager
206 206 def extract(ui, fileobj):
207 207 """extract patch from data read from fileobj.
208 208
209 209 patch can be a normal patch or contained in an email message.
210 210
211 211 return a dictionary. Standard keys are:
212 212 - filename,
213 213 - message,
214 214 - user,
215 215 - date,
216 216 - branch,
217 217 - node,
218 218 - p1,
219 219 - p2.
220 220 Any item can be missing from the dictionary. If filename is missing,
221 221 fileobj did not contain a patch. Caller must unlink filename when done."""
222 222
223 223 fd, tmpname = pycompat.mkstemp(prefix=b'hg-patch-')
224 224 tmpfp = os.fdopen(fd, 'wb')
225 225 try:
226 226 yield _extract(ui, fileobj, tmpname, tmpfp)
227 227 finally:
228 228 tmpfp.close()
229 229 os.unlink(tmpname)
230 230
231 231
232 232 def _extract(ui, fileobj, tmpname, tmpfp):
233 233
234 234 # attempt to detect the start of a patch
235 235 # (this heuristic is borrowed from quilt)
236 236 diffre = re.compile(
237 237 br'^(?:Index:[ \t]|diff[ \t]-|RCS file: |'
238 238 br'retrieving revision [0-9]+(\.[0-9]+)*$|'
239 239 br'---[ \t].*?^\+\+\+[ \t]|'
240 240 br'\*\*\*[ \t].*?^---[ \t])',
241 241 re.MULTILINE | re.DOTALL,
242 242 )
243 243
244 244 data = {}
245 245
246 246 msg = mail.parse(fileobj)
247 247
248 248 subject = msg['Subject'] and mail.headdecode(msg['Subject'])
249 249 data[b'user'] = msg['From'] and mail.headdecode(msg['From'])
250 250 if not subject and not data[b'user']:
251 251 # Not an email, restore parsed headers if any
252 252 subject = (
253 253 b'\n'.join(
254 254 b': '.join(map(encoding.strtolocal, h)) for h in msg.items()
255 255 )
256 256 + b'\n'
257 257 )
258 258
259 259 # should try to parse msg['Date']
260 260 parents = []
261 261
262 262 nodeid = msg['X-Mercurial-Node']
263 263 if nodeid:
264 264 data[b'nodeid'] = nodeid = mail.headdecode(nodeid)
265 265 ui.debug(b'Node ID: %s\n' % nodeid)
266 266
267 267 if subject:
268 268 if subject.startswith(b'[PATCH'):
269 269 pend = subject.find(b']')
270 270 if pend >= 0:
271 271 subject = subject[pend + 1 :].lstrip()
272 272 subject = re.sub(br'\n[ \t]+', b' ', subject)
273 273 ui.debug(b'Subject: %s\n' % subject)
274 274 if data[b'user']:
275 275 ui.debug(b'From: %s\n' % data[b'user'])
276 276 diffs_seen = 0
277 277 ok_types = (b'text/plain', b'text/x-diff', b'text/x-patch')
278 278 message = b''
279 279 for part in msg.walk():
280 280 content_type = pycompat.bytestr(part.get_content_type())
281 281 ui.debug(b'Content-Type: %s\n' % content_type)
282 282 if content_type not in ok_types:
283 283 continue
284 284 payload = part.get_payload(decode=True)
285 285 m = diffre.search(payload)
286 286 if m:
287 287 hgpatch = False
288 288 hgpatchheader = False
289 289 ignoretext = False
290 290
291 291 ui.debug(b'found patch at byte %d\n' % m.start(0))
292 292 diffs_seen += 1
293 293 cfp = stringio()
294 294 for line in payload[: m.start(0)].splitlines():
295 295 if line.startswith(b'# HG changeset patch') and not hgpatch:
296 296 ui.debug(b'patch generated by hg export\n')
297 297 hgpatch = True
298 298 hgpatchheader = True
299 299 # drop earlier commit message content
300 300 cfp.seek(0)
301 301 cfp.truncate()
302 302 subject = None
303 303 elif hgpatchheader:
304 304 if line.startswith(b'# User '):
305 305 data[b'user'] = line[7:]
306 306 ui.debug(b'From: %s\n' % data[b'user'])
307 307 elif line.startswith(b"# Parent "):
308 308 parents.append(line[9:].lstrip())
309 309 elif line.startswith(b"# "):
310 310 for header, key in patchheadermap:
311 311 prefix = b'# %s ' % header
312 312 if line.startswith(prefix):
313 313 data[key] = line[len(prefix) :]
314 314 ui.debug(b'%s: %s\n' % (header, data[key]))
315 315 else:
316 316 hgpatchheader = False
317 317 elif line == b'---':
318 318 ignoretext = True
319 319 if not hgpatchheader and not ignoretext:
320 320 cfp.write(line)
321 321 cfp.write(b'\n')
322 322 message = cfp.getvalue()
323 323 if tmpfp:
324 324 tmpfp.write(payload)
325 325 if not payload.endswith(b'\n'):
326 326 tmpfp.write(b'\n')
327 327 elif not diffs_seen and message and content_type == b'text/plain':
328 328 message += b'\n' + payload
329 329
330 330 if subject and not message.startswith(subject):
331 331 message = b'%s\n%s' % (subject, message)
332 332 data[b'message'] = message
333 333 tmpfp.close()
334 334 if parents:
335 335 data[b'p1'] = parents.pop(0)
336 336 if parents:
337 337 data[b'p2'] = parents.pop(0)
338 338
339 339 if diffs_seen:
340 340 data[b'filename'] = tmpname
341 341
342 342 return data
343 343
344 344
345 345 class patchmeta(object):
346 346 """Patched file metadata
347 347
348 348 'op' is the performed operation within ADD, DELETE, RENAME, MODIFY
349 349 or COPY. 'path' is patched file path. 'oldpath' is set to the
350 350 origin file when 'op' is either COPY or RENAME, None otherwise. If
351 351 file mode is changed, 'mode' is a tuple (islink, isexec) where
352 352 'islink' is True if the file is a symlink and 'isexec' is True if
353 353 the file is executable. Otherwise, 'mode' is None.
354 354 """
355 355
356 356 def __init__(self, path):
357 357 self.path = path
358 358 self.oldpath = None
359 359 self.mode = None
360 360 self.op = b'MODIFY'
361 361 self.binary = False
362 362
363 363 def setmode(self, mode):
364 364 islink = mode & 0o20000
365 365 isexec = mode & 0o100
366 366 self.mode = (islink, isexec)
367 367
368 368 def copy(self):
369 369 other = patchmeta(self.path)
370 370 other.oldpath = self.oldpath
371 371 other.mode = self.mode
372 372 other.op = self.op
373 373 other.binary = self.binary
374 374 return other
375 375
376 376 def _ispatchinga(self, afile):
377 377 if afile == b'/dev/null':
378 378 return self.op == b'ADD'
379 379 return afile == b'a/' + (self.oldpath or self.path)
380 380
381 381 def _ispatchingb(self, bfile):
382 382 if bfile == b'/dev/null':
383 383 return self.op == b'DELETE'
384 384 return bfile == b'b/' + self.path
385 385
386 386 def ispatching(self, afile, bfile):
387 387 return self._ispatchinga(afile) and self._ispatchingb(bfile)
388 388
389 389 def __repr__(self):
390 390 return "<patchmeta %s %r>" % (self.op, self.path)
391 391
392 392
393 393 def readgitpatch(lr):
394 394 """extract git-style metadata about patches from <patchname>"""
395 395
396 396 # Filter patch for git information
397 397 gp = None
398 398 gitpatches = []
399 399 for line in lr:
400 400 line = line.rstrip(b'\r\n')
401 401 if line.startswith(b'diff --git a/'):
402 402 m = gitre.match(line)
403 403 if m:
404 404 if gp:
405 405 gitpatches.append(gp)
406 406 dst = m.group(2)
407 407 gp = patchmeta(dst)
408 408 elif gp:
409 409 if line.startswith(b'--- '):
410 410 gitpatches.append(gp)
411 411 gp = None
412 412 continue
413 413 if line.startswith(b'rename from '):
414 414 gp.op = b'RENAME'
415 415 gp.oldpath = line[12:]
416 416 elif line.startswith(b'rename to '):
417 417 gp.path = line[10:]
418 418 elif line.startswith(b'copy from '):
419 419 gp.op = b'COPY'
420 420 gp.oldpath = line[10:]
421 421 elif line.startswith(b'copy to '):
422 422 gp.path = line[8:]
423 423 elif line.startswith(b'deleted file'):
424 424 gp.op = b'DELETE'
425 425 elif line.startswith(b'new file mode '):
426 426 gp.op = b'ADD'
427 427 gp.setmode(int(line[-6:], 8))
428 428 elif line.startswith(b'new mode '):
429 429 gp.setmode(int(line[-6:], 8))
430 430 elif line.startswith(b'GIT binary patch'):
431 431 gp.binary = True
432 432 if gp:
433 433 gitpatches.append(gp)
434 434
435 435 return gitpatches
436 436
437 437
438 438 class linereader(object):
439 439 # simple class to allow pushing lines back into the input stream
440 440 def __init__(self, fp):
441 441 self.fp = fp
442 442 self.buf = []
443 443
444 444 def push(self, line):
445 445 if line is not None:
446 446 self.buf.append(line)
447 447
448 448 def readline(self):
449 449 if self.buf:
450 450 l = self.buf[0]
451 451 del self.buf[0]
452 452 return l
453 453 return self.fp.readline()
454 454
455 455 def __iter__(self):
456 456 return iter(self.readline, b'')
457 457
458 458
459 459 class abstractbackend(object):
460 460 def __init__(self, ui):
461 461 self.ui = ui
462 462
463 463 def getfile(self, fname):
464 464 """Return target file data and flags as a (data, (islink,
465 465 isexec)) tuple. Data is None if file is missing/deleted.
466 466 """
467 467 raise NotImplementedError
468 468
469 469 def setfile(self, fname, data, mode, copysource):
470 470 """Write data to target file fname and set its mode. mode is a
471 471 (islink, isexec) tuple. If data is None, the file content should
472 472 be left unchanged. If the file is modified after being copied,
473 473 copysource is set to the original file name.
474 474 """
475 475 raise NotImplementedError
476 476
477 477 def unlink(self, fname):
478 478 """Unlink target file."""
479 479 raise NotImplementedError
480 480
481 481 def writerej(self, fname, failed, total, lines):
482 482 """Write rejected lines for fname. total is the number of hunks
483 483 which failed to apply and total the total number of hunks for this
484 484 files.
485 485 """
486 486
487 487 def exists(self, fname):
488 488 raise NotImplementedError
489 489
490 490 def close(self):
491 491 raise NotImplementedError
492 492
493 493
494 494 class fsbackend(abstractbackend):
495 495 def __init__(self, ui, basedir):
496 496 super(fsbackend, self).__init__(ui)
497 497 self.opener = vfsmod.vfs(basedir)
498 498
499 499 def getfile(self, fname):
500 500 if self.opener.islink(fname):
501 501 return (self.opener.readlink(fname), (True, False))
502 502
503 503 isexec = False
504 504 try:
505 505 isexec = self.opener.lstat(fname).st_mode & 0o100 != 0
506 506 except OSError as e:
507 507 if e.errno != errno.ENOENT:
508 508 raise
509 509 try:
510 510 return (self.opener.read(fname), (False, isexec))
511 511 except IOError as e:
512 512 if e.errno != errno.ENOENT:
513 513 raise
514 514 return None, None
515 515
516 516 def setfile(self, fname, data, mode, copysource):
517 517 islink, isexec = mode
518 518 if data is None:
519 519 self.opener.setflags(fname, islink, isexec)
520 520 return
521 521 if islink:
522 522 self.opener.symlink(data, fname)
523 523 else:
524 524 self.opener.write(fname, data)
525 525 if isexec:
526 526 self.opener.setflags(fname, False, True)
527 527
528 528 def unlink(self, fname):
529 529 rmdir = self.ui.configbool(b'experimental', b'removeemptydirs')
530 530 self.opener.unlinkpath(fname, ignoremissing=True, rmdir=rmdir)
531 531
532 532 def writerej(self, fname, failed, total, lines):
533 533 fname = fname + b".rej"
534 534 self.ui.warn(
535 535 _(b"%d out of %d hunks FAILED -- saving rejects to file %s\n")
536 536 % (failed, total, fname)
537 537 )
538 538 fp = self.opener(fname, b'w')
539 539 fp.writelines(lines)
540 540 fp.close()
541 541
542 542 def exists(self, fname):
543 543 return self.opener.lexists(fname)
544 544
545 545
546 546 class workingbackend(fsbackend):
547 547 def __init__(self, ui, repo, similarity):
548 548 super(workingbackend, self).__init__(ui, repo.root)
549 549 self.repo = repo
550 550 self.similarity = similarity
551 551 self.removed = set()
552 552 self.changed = set()
553 553 self.copied = []
554 554
555 555 def _checkknown(self, fname):
556 556 if not self.repo.dirstate.get_entry(fname).any_tracked and self.exists(
557 557 fname
558 558 ):
559 559 raise PatchApplicationError(
560 560 _(b'cannot patch %s: file is not tracked') % fname
561 561 )
562 562
563 563 def setfile(self, fname, data, mode, copysource):
564 564 self._checkknown(fname)
565 565 super(workingbackend, self).setfile(fname, data, mode, copysource)
566 566 if copysource is not None:
567 567 self.copied.append((copysource, fname))
568 568 self.changed.add(fname)
569 569
570 570 def unlink(self, fname):
571 571 self._checkknown(fname)
572 572 super(workingbackend, self).unlink(fname)
573 573 self.removed.add(fname)
574 574 self.changed.add(fname)
575 575
576 576 def close(self):
577 577 wctx = self.repo[None]
578 578 changed = set(self.changed)
579 579 for src, dst in self.copied:
580 580 scmutil.dirstatecopy(self.ui, self.repo, wctx, src, dst)
581 581 if self.removed:
582 582 wctx.forget(sorted(self.removed))
583 583 for f in self.removed:
584 584 if f not in self.repo.dirstate:
585 585 # File was deleted and no longer belongs to the
586 586 # dirstate, it was probably marked added then
587 587 # deleted, and should not be considered by
588 588 # marktouched().
589 589 changed.discard(f)
590 590 if changed:
591 591 scmutil.marktouched(self.repo, changed, self.similarity)
592 592 return sorted(self.changed)
593 593
594 594
595 595 class filestore(object):
596 596 def __init__(self, maxsize=None):
597 597 self.opener = None
598 598 self.files = {}
599 599 self.created = 0
600 600 self.maxsize = maxsize
601 601 if self.maxsize is None:
602 602 self.maxsize = 4 * (2 ** 20)
603 603 self.size = 0
604 604 self.data = {}
605 605
606 606 def setfile(self, fname, data, mode, copied=None):
607 607 if self.maxsize < 0 or (len(data) + self.size) <= self.maxsize:
608 608 self.data[fname] = (data, mode, copied)
609 609 self.size += len(data)
610 610 else:
611 611 if self.opener is None:
612 612 root = pycompat.mkdtemp(prefix=b'hg-patch-')
613 613 self.opener = vfsmod.vfs(root)
614 614 # Avoid filename issues with these simple names
615 615 fn = b'%d' % self.created
616 616 self.opener.write(fn, data)
617 617 self.created += 1
618 618 self.files[fname] = (fn, mode, copied)
619 619
620 620 def getfile(self, fname):
621 621 if fname in self.data:
622 622 return self.data[fname]
623 623 if not self.opener or fname not in self.files:
624 624 return None, None, None
625 625 fn, mode, copied = self.files[fname]
626 626 return self.opener.read(fn), mode, copied
627 627
628 628 def close(self):
629 629 if self.opener:
630 630 shutil.rmtree(self.opener.base)
631 631
632 632
633 633 class repobackend(abstractbackend):
634 634 def __init__(self, ui, repo, ctx, store):
635 635 super(repobackend, self).__init__(ui)
636 636 self.repo = repo
637 637 self.ctx = ctx
638 638 self.store = store
639 639 self.changed = set()
640 640 self.removed = set()
641 641 self.copied = {}
642 642
643 643 def _checkknown(self, fname):
644 644 if fname not in self.ctx:
645 645 raise PatchApplicationError(
646 646 _(b'cannot patch %s: file is not tracked') % fname
647 647 )
648 648
649 649 def getfile(self, fname):
650 650 try:
651 651 fctx = self.ctx[fname]
652 652 except error.LookupError:
653 653 return None, None
654 654 flags = fctx.flags()
655 655 return fctx.data(), (b'l' in flags, b'x' in flags)
656 656
657 657 def setfile(self, fname, data, mode, copysource):
658 658 if copysource:
659 659 self._checkknown(copysource)
660 660 if data is None:
661 661 data = self.ctx[fname].data()
662 662 self.store.setfile(fname, data, mode, copysource)
663 663 self.changed.add(fname)
664 664 if copysource:
665 665 self.copied[fname] = copysource
666 666
667 667 def unlink(self, fname):
668 668 self._checkknown(fname)
669 669 self.removed.add(fname)
670 670
671 671 def exists(self, fname):
672 672 return fname in self.ctx
673 673
674 674 def close(self):
675 675 return self.changed | self.removed
676 676
677 677
678 678 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
679 679 unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
680 680 contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
681 681 eolmodes = [b'strict', b'crlf', b'lf', b'auto']
682 682
683 683
684 684 class patchfile(object):
685 685 def __init__(self, ui, gp, backend, store, eolmode=b'strict'):
686 686 self.fname = gp.path
687 687 self.eolmode = eolmode
688 688 self.eol = None
689 689 self.backend = backend
690 690 self.ui = ui
691 691 self.lines = []
692 692 self.exists = False
693 693 self.missing = True
694 694 self.mode = gp.mode
695 695 self.copysource = gp.oldpath
696 696 self.create = gp.op in (b'ADD', b'COPY', b'RENAME')
697 697 self.remove = gp.op == b'DELETE'
698 698 if self.copysource is None:
699 699 data, mode = backend.getfile(self.fname)
700 700 else:
701 701 data, mode = store.getfile(self.copysource)[:2]
702 702 if data is not None:
703 703 self.exists = self.copysource is None or backend.exists(self.fname)
704 704 self.missing = False
705 705 if data:
706 706 self.lines = mdiff.splitnewlines(data)
707 707 if self.mode is None:
708 708 self.mode = mode
709 709 if self.lines:
710 710 # Normalize line endings
711 711 if self.lines[0].endswith(b'\r\n'):
712 712 self.eol = b'\r\n'
713 713 elif self.lines[0].endswith(b'\n'):
714 714 self.eol = b'\n'
715 715 if eolmode != b'strict':
716 716 nlines = []
717 717 for l in self.lines:
718 718 if l.endswith(b'\r\n'):
719 719 l = l[:-2] + b'\n'
720 720 nlines.append(l)
721 721 self.lines = nlines
722 722 else:
723 723 if self.create:
724 724 self.missing = False
725 725 if self.mode is None:
726 726 self.mode = (False, False)
727 727 if self.missing:
728 728 self.ui.warn(_(b"unable to find '%s' for patching\n") % self.fname)
729 729 self.ui.warn(
730 730 _(
731 731 b"(use '--prefix' to apply patch relative to the "
732 732 b"current directory)\n"
733 733 )
734 734 )
735 735
736 736 self.hash = {}
737 737 self.dirty = 0
738 738 self.offset = 0
739 739 self.skew = 0
740 740 self.rej = []
741 741 self.fileprinted = False
742 742 self.printfile(False)
743 743 self.hunks = 0
744 744
745 745 def writelines(self, fname, lines, mode):
746 746 if self.eolmode == b'auto':
747 747 eol = self.eol
748 748 elif self.eolmode == b'crlf':
749 749 eol = b'\r\n'
750 750 else:
751 751 eol = b'\n'
752 752
753 753 if self.eolmode != b'strict' and eol and eol != b'\n':
754 754 rawlines = []
755 755 for l in lines:
756 756 if l and l.endswith(b'\n'):
757 757 l = l[:-1] + eol
758 758 rawlines.append(l)
759 759 lines = rawlines
760 760
761 761 self.backend.setfile(fname, b''.join(lines), mode, self.copysource)
762 762
763 763 def printfile(self, warn):
764 764 if self.fileprinted:
765 765 return
766 766 if warn or self.ui.verbose:
767 767 self.fileprinted = True
768 768 s = _(b"patching file %s\n") % self.fname
769 769 if warn:
770 770 self.ui.warn(s)
771 771 else:
772 772 self.ui.note(s)
773 773
774 774 def findlines(self, l, linenum):
775 775 # looks through the hash and finds candidate lines. The
776 776 # result is a list of line numbers sorted based on distance
777 777 # from linenum
778 778
779 779 cand = self.hash.get(l, [])
780 780 if len(cand) > 1:
781 781 # resort our list of potentials forward then back.
782 782 cand.sort(key=lambda x: abs(x - linenum))
783 783 return cand
784 784
785 785 def write_rej(self):
786 786 # our rejects are a little different from patch(1). This always
787 787 # creates rejects in the same form as the original patch. A file
788 788 # header is inserted so that you can run the reject through patch again
789 789 # without having to type the filename.
790 790 if not self.rej:
791 791 return
792 792 base = os.path.basename(self.fname)
793 793 lines = [b"--- %s\n+++ %s\n" % (base, base)]
794 794 for x in self.rej:
795 795 for l in x.hunk:
796 796 lines.append(l)
797 797 if l[-1:] != b'\n':
798 798 lines.append(b'\n' + diffhelper.MISSING_NEWLINE_MARKER)
799 799 self.backend.writerej(self.fname, len(self.rej), self.hunks, lines)
800 800
801 801 def apply(self, h):
802 802 if not h.complete():
803 803 raise PatchParseError(
804 804 _(b"bad hunk #%d %s (%d %d %d %d)")
805 805 % (h.number, h.desc, len(h.a), h.lena, len(h.b), h.lenb)
806 806 )
807 807
808 808 self.hunks += 1
809 809
810 810 if self.missing:
811 811 self.rej.append(h)
812 812 return -1
813 813
814 814 if self.exists and self.create:
815 815 if self.copysource:
816 816 self.ui.warn(
817 817 _(b"cannot create %s: destination already exists\n")
818 818 % self.fname
819 819 )
820 820 else:
821 821 self.ui.warn(_(b"file %s already exists\n") % self.fname)
822 822 self.rej.append(h)
823 823 return -1
824 824
825 825 if isinstance(h, binhunk):
826 826 if self.remove:
827 827 self.backend.unlink(self.fname)
828 828 else:
829 829 l = h.new(self.lines)
830 830 self.lines[:] = l
831 831 self.offset += len(l)
832 832 self.dirty = True
833 833 return 0
834 834
835 835 horig = h
836 836 if (
837 837 self.eolmode in (b'crlf', b'lf')
838 838 or self.eolmode == b'auto'
839 839 and self.eol
840 840 ):
841 841 # If new eols are going to be normalized, then normalize
842 842 # hunk data before patching. Otherwise, preserve input
843 843 # line-endings.
844 844 h = h.getnormalized()
845 845
846 846 # fast case first, no offsets, no fuzz
847 847 old, oldstart, new, newstart = h.fuzzit(0, False)
848 848 oldstart += self.offset
849 849 orig_start = oldstart
850 850 # if there's skew we want to emit the "(offset %d lines)" even
851 851 # when the hunk cleanly applies at start + skew, so skip the
852 852 # fast case code
853 853 if self.skew == 0 and diffhelper.testhunk(old, self.lines, oldstart):
854 854 if self.remove:
855 855 self.backend.unlink(self.fname)
856 856 else:
857 857 self.lines[oldstart : oldstart + len(old)] = new
858 858 self.offset += len(new) - len(old)
859 859 self.dirty = True
860 860 return 0
861 861
862 862 # ok, we couldn't match the hunk. Lets look for offsets and fuzz it
863 863 self.hash = {}
864 864 for x, s in enumerate(self.lines):
865 865 self.hash.setdefault(s, []).append(x)
866 866
867 867 for fuzzlen in pycompat.xrange(
868 868 self.ui.configint(b"patch", b"fuzz") + 1
869 869 ):
870 870 for toponly in [True, False]:
871 871 old, oldstart, new, newstart = h.fuzzit(fuzzlen, toponly)
872 872 oldstart = oldstart + self.offset + self.skew
873 873 oldstart = min(oldstart, len(self.lines))
874 874 if old:
875 875 cand = self.findlines(old[0][1:], oldstart)
876 876 else:
877 877 # Only adding lines with no or fuzzed context, just
878 878 # take the skew in account
879 879 cand = [oldstart]
880 880
881 881 for l in cand:
882 882 if not old or diffhelper.testhunk(old, self.lines, l):
883 883 self.lines[l : l + len(old)] = new
884 884 self.offset += len(new) - len(old)
885 885 self.skew = l - orig_start
886 886 self.dirty = True
887 887 offset = l - orig_start - fuzzlen
888 888 if fuzzlen:
889 889 msg = _(
890 890 b"Hunk #%d succeeded at %d "
891 891 b"with fuzz %d "
892 892 b"(offset %d lines).\n"
893 893 )
894 894 self.printfile(True)
895 895 self.ui.warn(
896 896 msg % (h.number, l + 1, fuzzlen, offset)
897 897 )
898 898 else:
899 899 msg = _(
900 900 b"Hunk #%d succeeded at %d "
901 901 b"(offset %d lines).\n"
902 902 )
903 903 self.ui.note(msg % (h.number, l + 1, offset))
904 904 return fuzzlen
905 905 self.printfile(True)
906 906 self.ui.warn(_(b"Hunk #%d FAILED at %d\n") % (h.number, orig_start))
907 907 self.rej.append(horig)
908 908 return -1
909 909
910 910 def close(self):
911 911 if self.dirty:
912 912 self.writelines(self.fname, self.lines, self.mode)
913 913 self.write_rej()
914 914 return len(self.rej)
915 915
916 916
917 917 class header(object):
918 918 """patch header"""
919 919
920 920 diffgit_re = re.compile(b'diff --git a/(.*) b/(.*)$')
921 921 diff_re = re.compile(b'diff -r .* (.*)$')
922 922 allhunks_re = re.compile(b'(?:index|deleted file) ')
923 923 pretty_re = re.compile(b'(?:new file|deleted file) ')
924 924 special_re = re.compile(b'(?:index|deleted|copy|rename|new mode) ')
925 925 newfile_re = re.compile(b'(?:new file|copy to|rename to)')
926 926
927 927 def __init__(self, header):
928 928 self.header = header
929 929 self.hunks = []
930 930
931 931 def binary(self):
932 932 return any(h.startswith(b'index ') for h in self.header)
933 933
934 934 def pretty(self, fp):
935 935 for h in self.header:
936 936 if h.startswith(b'index '):
937 937 fp.write(_(b'this modifies a binary file (all or nothing)\n'))
938 938 break
939 939 if self.pretty_re.match(h):
940 940 fp.write(h)
941 941 if self.binary():
942 942 fp.write(_(b'this is a binary file\n'))
943 943 break
944 944 if h.startswith(b'---'):
945 945 fp.write(
946 946 _(b'%d hunks, %d lines changed\n')
947 947 % (
948 948 len(self.hunks),
949 949 sum([max(h.added, h.removed) for h in self.hunks]),
950 950 )
951 951 )
952 952 break
953 953 fp.write(h)
954 954
955 955 def write(self, fp):
956 956 fp.write(b''.join(self.header))
957 957
958 958 def allhunks(self):
959 959 return any(self.allhunks_re.match(h) for h in self.header)
960 960
961 961 def files(self):
962 962 match = self.diffgit_re.match(self.header[0])
963 963 if match:
964 964 fromfile, tofile = match.groups()
965 965 if fromfile == tofile:
966 966 return [fromfile]
967 967 return [fromfile, tofile]
968 968 else:
969 969 return self.diff_re.match(self.header[0]).groups()
970 970
971 971 def filename(self):
972 972 return self.files()[-1]
973 973
974 974 def __repr__(self):
975 975 return '<header %s>' % (
976 976 ' '.join(pycompat.rapply(pycompat.fsdecode, self.files()))
977 977 )
978 978
979 979 def isnewfile(self):
980 980 return any(self.newfile_re.match(h) for h in self.header)
981 981
982 982 def special(self):
983 983 # Special files are shown only at the header level and not at the hunk
984 984 # level for example a file that has been deleted is a special file.
985 985 # The user cannot change the content of the operation, in the case of
986 986 # the deleted file he has to take the deletion or not take it, he
987 987 # cannot take some of it.
988 988 # Newly added files are special if they are empty, they are not special
989 989 # if they have some content as we want to be able to change it
990 990 nocontent = len(self.header) == 2
991 991 emptynewfile = self.isnewfile() and nocontent
992 992 return emptynewfile or any(
993 993 self.special_re.match(h) for h in self.header
994 994 )
995 995
996 996
997 997 class recordhunk(object):
998 998 """patch hunk
999 999
1000 1000 XXX shouldn't we merge this with the other hunk class?
1001 1001 """
1002 1002
1003 1003 def __init__(
1004 1004 self,
1005 1005 header,
1006 1006 fromline,
1007 1007 toline,
1008 1008 proc,
1009 1009 before,
1010 1010 hunk,
1011 1011 after,
1012 1012 maxcontext=None,
1013 1013 ):
1014 1014 def trimcontext(lines, reverse=False):
1015 1015 if maxcontext is not None:
1016 1016 delta = len(lines) - maxcontext
1017 1017 if delta > 0:
1018 1018 if reverse:
1019 1019 return delta, lines[delta:]
1020 1020 else:
1021 1021 return delta, lines[:maxcontext]
1022 1022 return 0, lines
1023 1023
1024 1024 self.header = header
1025 1025 trimedbefore, self.before = trimcontext(before, True)
1026 1026 self.fromline = fromline + trimedbefore
1027 1027 self.toline = toline + trimedbefore
1028 1028 _trimedafter, self.after = trimcontext(after, False)
1029 1029 self.proc = proc
1030 1030 self.hunk = hunk
1031 1031 self.added, self.removed = self.countchanges(self.hunk)
1032 1032
1033 1033 def __eq__(self, v):
1034 1034 if not isinstance(v, recordhunk):
1035 1035 return False
1036 1036
1037 1037 return (
1038 1038 (v.hunk == self.hunk)
1039 1039 and (v.proc == self.proc)
1040 1040 and (self.fromline == v.fromline)
1041 1041 and (self.header.files() == v.header.files())
1042 1042 )
1043 1043
1044 1044 def __hash__(self):
1045 1045 return hash(
1046 1046 (
1047 1047 tuple(self.hunk),
1048 1048 tuple(self.header.files()),
1049 1049 self.fromline,
1050 1050 self.proc,
1051 1051 )
1052 1052 )
1053 1053
1054 1054 def countchanges(self, hunk):
1055 1055 """hunk -> (n+,n-)"""
1056 1056 add = len([h for h in hunk if h.startswith(b'+')])
1057 1057 rem = len([h for h in hunk if h.startswith(b'-')])
1058 1058 return add, rem
1059 1059
1060 1060 def reversehunk(self):
1061 1061 """return another recordhunk which is the reverse of the hunk
1062 1062
1063 1063 If this hunk is diff(A, B), the returned hunk is diff(B, A). To do
1064 1064 that, swap fromline/toline and +/- signs while keep other things
1065 1065 unchanged.
1066 1066 """
1067 1067 m = {b'+': b'-', b'-': b'+', b'\\': b'\\'}
1068 1068 hunk = [b'%s%s' % (m[l[0:1]], l[1:]) for l in self.hunk]
1069 1069 return recordhunk(
1070 1070 self.header,
1071 1071 self.toline,
1072 1072 self.fromline,
1073 1073 self.proc,
1074 1074 self.before,
1075 1075 hunk,
1076 1076 self.after,
1077 1077 )
1078 1078
1079 1079 def write(self, fp):
1080 1080 delta = len(self.before) + len(self.after)
1081 1081 if self.after and self.after[-1] == diffhelper.MISSING_NEWLINE_MARKER:
1082 1082 delta -= 1
1083 1083 fromlen = delta + self.removed
1084 1084 tolen = delta + self.added
1085 1085 fp.write(
1086 1086 b'@@ -%d,%d +%d,%d @@%s\n'
1087 1087 % (
1088 1088 self.fromline,
1089 1089 fromlen,
1090 1090 self.toline,
1091 1091 tolen,
1092 1092 self.proc and (b' ' + self.proc),
1093 1093 )
1094 1094 )
1095 1095 fp.write(b''.join(self.before + self.hunk + self.after))
1096 1096
1097 1097 pretty = write
1098 1098
1099 1099 def filename(self):
1100 1100 return self.header.filename()
1101 1101
1102 1102 @encoding.strmethod
1103 1103 def __repr__(self):
1104 1104 return b'<hunk %r@%d>' % (self.filename(), self.fromline)
1105 1105
1106 1106
1107 1107 def getmessages():
1108 1108 return {
1109 1109 b'multiple': {
1110 1110 b'apply': _(b"apply change %d/%d to '%s'?"),
1111 1111 b'discard': _(b"discard change %d/%d to '%s'?"),
1112 1112 b'keep': _(b"keep change %d/%d to '%s'?"),
1113 1113 b'record': _(b"record change %d/%d to '%s'?"),
1114 1114 },
1115 1115 b'single': {
1116 1116 b'apply': _(b"apply this change to '%s'?"),
1117 1117 b'discard': _(b"discard this change to '%s'?"),
1118 1118 b'keep': _(b"keep this change to '%s'?"),
1119 1119 b'record': _(b"record this change to '%s'?"),
1120 1120 },
1121 1121 b'help': {
1122 1122 b'apply': _(
1123 1123 b'[Ynesfdaq?]'
1124 1124 b'$$ &Yes, apply this change'
1125 1125 b'$$ &No, skip this change'
1126 1126 b'$$ &Edit this change manually'
1127 1127 b'$$ &Skip remaining changes to this file'
1128 1128 b'$$ Apply remaining changes to this &file'
1129 1129 b'$$ &Done, skip remaining changes and files'
1130 1130 b'$$ Apply &all changes to all remaining files'
1131 1131 b'$$ &Quit, applying no changes'
1132 1132 b'$$ &? (display help)'
1133 1133 ),
1134 1134 b'discard': _(
1135 1135 b'[Ynesfdaq?]'
1136 1136 b'$$ &Yes, discard this change'
1137 1137 b'$$ &No, skip this change'
1138 1138 b'$$ &Edit this change manually'
1139 1139 b'$$ &Skip remaining changes to this file'
1140 1140 b'$$ Discard remaining changes to this &file'
1141 1141 b'$$ &Done, skip remaining changes and files'
1142 1142 b'$$ Discard &all changes to all remaining files'
1143 1143 b'$$ &Quit, discarding no changes'
1144 1144 b'$$ &? (display help)'
1145 1145 ),
1146 1146 b'keep': _(
1147 1147 b'[Ynesfdaq?]'
1148 1148 b'$$ &Yes, keep this change'
1149 1149 b'$$ &No, skip this change'
1150 1150 b'$$ &Edit this change manually'
1151 1151 b'$$ &Skip remaining changes to this file'
1152 1152 b'$$ Keep remaining changes to this &file'
1153 1153 b'$$ &Done, skip remaining changes and files'
1154 1154 b'$$ Keep &all changes to all remaining files'
1155 1155 b'$$ &Quit, keeping all changes'
1156 1156 b'$$ &? (display help)'
1157 1157 ),
1158 1158 b'record': _(
1159 1159 b'[Ynesfdaq?]'
1160 1160 b'$$ &Yes, record this change'
1161 1161 b'$$ &No, skip this change'
1162 1162 b'$$ &Edit this change manually'
1163 1163 b'$$ &Skip remaining changes to this file'
1164 1164 b'$$ Record remaining changes to this &file'
1165 1165 b'$$ &Done, skip remaining changes and files'
1166 1166 b'$$ Record &all changes to all remaining files'
1167 1167 b'$$ &Quit, recording no changes'
1168 1168 b'$$ &? (display help)'
1169 1169 ),
1170 1170 },
1171 1171 }
1172 1172
1173 1173
1174 1174 def filterpatch(ui, headers, match, operation=None):
1175 1175 """Interactively filter patch chunks into applied-only chunks"""
1176 1176 messages = getmessages()
1177 1177
1178 1178 if operation is None:
1179 1179 operation = b'record'
1180 1180
1181 1181 def prompt(skipfile, skipall, query, chunk):
1182 1182 """prompt query, and process base inputs
1183 1183
1184 1184 - y/n for the rest of file
1185 1185 - y/n for the rest
1186 1186 - ? (help)
1187 1187 - q (quit)
1188 1188
1189 1189 Return True/False and possibly updated skipfile and skipall.
1190 1190 """
1191 1191 newpatches = None
1192 1192 if skipall is not None:
1193 1193 return skipall, skipfile, skipall, newpatches
1194 1194 if skipfile is not None:
1195 1195 return skipfile, skipfile, skipall, newpatches
1196 1196 while True:
1197 1197 resps = messages[b'help'][operation]
1198 1198 # IMPORTANT: keep the last line of this prompt short (<40 english
1199 1199 # chars is a good target) because of issue6158.
1200 1200 r = ui.promptchoice(b"%s\n(enter ? for help) %s" % (query, resps))
1201 1201 ui.write(b"\n")
1202 1202 if r == 8: # ?
1203 1203 for c, t in ui.extractchoices(resps)[1]:
1204 1204 ui.write(b'%s - %s\n' % (c, encoding.lower(t)))
1205 1205 continue
1206 1206 elif r == 0: # yes
1207 1207 ret = True
1208 1208 elif r == 1: # no
1209 1209 ret = False
1210 1210 elif r == 2: # Edit patch
1211 1211 if chunk is None:
1212 1212 ui.write(_(b'cannot edit patch for whole file'))
1213 1213 ui.write(b"\n")
1214 1214 continue
1215 1215 if chunk.header.binary():
1216 1216 ui.write(_(b'cannot edit patch for binary file'))
1217 1217 ui.write(b"\n")
1218 1218 continue
1219 1219 # Patch comment based on the Git one (based on comment at end of
1220 1220 # https://mercurial-scm.org/wiki/RecordExtension)
1221 1221 phelp = b'---' + _(
1222 1222 b"""
1223 1223 To remove '-' lines, make them ' ' lines (context).
1224 1224 To remove '+' lines, delete them.
1225 1225 Lines starting with # will be removed from the patch.
1226 1226
1227 1227 If the patch applies cleanly, the edited hunk will immediately be
1228 1228 added to the record list. If it does not apply cleanly, a rejects
1229 1229 file will be generated: you can use that when you try again. If
1230 1230 all lines of the hunk are removed, then the edit is aborted and
1231 1231 the hunk is left unchanged.
1232 1232 """
1233 1233 )
1234 1234 (patchfd, patchfn) = pycompat.mkstemp(
1235 1235 prefix=b"hg-editor-", suffix=b".diff"
1236 1236 )
1237 1237 ncpatchfp = None
1238 1238 try:
1239 1239 # Write the initial patch
1240 1240 f = util.nativeeolwriter(os.fdopen(patchfd, 'wb'))
1241 1241 chunk.header.write(f)
1242 1242 chunk.write(f)
1243 1243 f.write(
1244 1244 b''.join(
1245 1245 [b'# ' + i + b'\n' for i in phelp.splitlines()]
1246 1246 )
1247 1247 )
1248 1248 f.close()
1249 1249 # Start the editor and wait for it to complete
1250 1250 editor = ui.geteditor()
1251 1251 ret = ui.system(
1252 1252 b"%s \"%s\"" % (editor, patchfn),
1253 1253 environ={b'HGUSER': ui.username()},
1254 1254 blockedtag=b'filterpatch',
1255 1255 )
1256 1256 if ret != 0:
1257 1257 ui.warn(_(b"editor exited with exit code %d\n") % ret)
1258 1258 continue
1259 1259 # Remove comment lines
1260 1260 patchfp = open(patchfn, 'rb')
1261 1261 ncpatchfp = stringio()
1262 for line in util.iterfile(patchfp):
1262 for line in patchfp:
1263 1263 line = util.fromnativeeol(line)
1264 1264 if not line.startswith(b'#'):
1265 1265 ncpatchfp.write(line)
1266 1266 patchfp.close()
1267 1267 ncpatchfp.seek(0)
1268 1268 newpatches = parsepatch(ncpatchfp)
1269 1269 finally:
1270 1270 os.unlink(patchfn)
1271 1271 del ncpatchfp
1272 1272 # Signal that the chunk shouldn't be applied as-is, but
1273 1273 # provide the new patch to be used instead.
1274 1274 ret = False
1275 1275 elif r == 3: # Skip
1276 1276 ret = skipfile = False
1277 1277 elif r == 4: # file (Record remaining)
1278 1278 ret = skipfile = True
1279 1279 elif r == 5: # done, skip remaining
1280 1280 ret = skipall = False
1281 1281 elif r == 6: # all
1282 1282 ret = skipall = True
1283 1283 elif r == 7: # quit
1284 1284 raise error.CanceledError(_(b'user quit'))
1285 1285 return ret, skipfile, skipall, newpatches
1286 1286
1287 1287 seen = set()
1288 1288 applied = {} # 'filename' -> [] of chunks
1289 1289 skipfile, skipall = None, None
1290 1290 pos, total = 1, sum(len(h.hunks) for h in headers)
1291 1291 for h in headers:
1292 1292 pos += len(h.hunks)
1293 1293 skipfile = None
1294 1294 fixoffset = 0
1295 1295 hdr = b''.join(h.header)
1296 1296 if hdr in seen:
1297 1297 continue
1298 1298 seen.add(hdr)
1299 1299 if skipall is None:
1300 1300 h.pretty(ui)
1301 1301 files = h.files()
1302 1302 msg = _(b'examine changes to %s?') % _(b' and ').join(
1303 1303 b"'%s'" % f for f in files
1304 1304 )
1305 1305 if all(match.exact(f) for f in files):
1306 1306 r, skipall, np = True, None, None
1307 1307 else:
1308 1308 r, skipfile, skipall, np = prompt(skipfile, skipall, msg, None)
1309 1309 if not r:
1310 1310 continue
1311 1311 applied[h.filename()] = [h]
1312 1312 if h.allhunks():
1313 1313 applied[h.filename()] += h.hunks
1314 1314 continue
1315 1315 for i, chunk in enumerate(h.hunks):
1316 1316 if skipfile is None and skipall is None:
1317 1317 chunk.pretty(ui)
1318 1318 if total == 1:
1319 1319 msg = messages[b'single'][operation] % chunk.filename()
1320 1320 else:
1321 1321 idx = pos - len(h.hunks) + i
1322 1322 msg = messages[b'multiple'][operation] % (
1323 1323 idx,
1324 1324 total,
1325 1325 chunk.filename(),
1326 1326 )
1327 1327 r, skipfile, skipall, newpatches = prompt(
1328 1328 skipfile, skipall, msg, chunk
1329 1329 )
1330 1330 if r:
1331 1331 if fixoffset:
1332 1332 chunk = copy.copy(chunk)
1333 1333 chunk.toline += fixoffset
1334 1334 applied[chunk.filename()].append(chunk)
1335 1335 elif newpatches is not None:
1336 1336 for newpatch in newpatches:
1337 1337 for newhunk in newpatch.hunks:
1338 1338 if fixoffset:
1339 1339 newhunk.toline += fixoffset
1340 1340 applied[newhunk.filename()].append(newhunk)
1341 1341 else:
1342 1342 fixoffset += chunk.removed - chunk.added
1343 1343 return (
1344 1344 sum(
1345 1345 [h for h in applied.values() if h[0].special() or len(h) > 1],
1346 1346 [],
1347 1347 ),
1348 1348 {},
1349 1349 )
1350 1350
1351 1351
1352 1352 class hunk(object):
1353 1353 def __init__(self, desc, num, lr, context):
1354 1354 self.number = num
1355 1355 self.desc = desc
1356 1356 self.hunk = [desc]
1357 1357 self.a = []
1358 1358 self.b = []
1359 1359 self.starta = self.lena = None
1360 1360 self.startb = self.lenb = None
1361 1361 if lr is not None:
1362 1362 if context:
1363 1363 self.read_context_hunk(lr)
1364 1364 else:
1365 1365 self.read_unified_hunk(lr)
1366 1366
1367 1367 def getnormalized(self):
1368 1368 """Return a copy with line endings normalized to LF."""
1369 1369
1370 1370 def normalize(lines):
1371 1371 nlines = []
1372 1372 for line in lines:
1373 1373 if line.endswith(b'\r\n'):
1374 1374 line = line[:-2] + b'\n'
1375 1375 nlines.append(line)
1376 1376 return nlines
1377 1377
1378 1378 # Dummy object, it is rebuilt manually
1379 1379 nh = hunk(self.desc, self.number, None, None)
1380 1380 nh.number = self.number
1381 1381 nh.desc = self.desc
1382 1382 nh.hunk = self.hunk
1383 1383 nh.a = normalize(self.a)
1384 1384 nh.b = normalize(self.b)
1385 1385 nh.starta = self.starta
1386 1386 nh.startb = self.startb
1387 1387 nh.lena = self.lena
1388 1388 nh.lenb = self.lenb
1389 1389 return nh
1390 1390
1391 1391 def read_unified_hunk(self, lr):
1392 1392 m = unidesc.match(self.desc)
1393 1393 if not m:
1394 1394 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1395 1395 self.starta, self.lena, self.startb, self.lenb = m.groups()
1396 1396 if self.lena is None:
1397 1397 self.lena = 1
1398 1398 else:
1399 1399 self.lena = int(self.lena)
1400 1400 if self.lenb is None:
1401 1401 self.lenb = 1
1402 1402 else:
1403 1403 self.lenb = int(self.lenb)
1404 1404 self.starta = int(self.starta)
1405 1405 self.startb = int(self.startb)
1406 1406 try:
1407 1407 diffhelper.addlines(
1408 1408 lr, self.hunk, self.lena, self.lenb, self.a, self.b
1409 1409 )
1410 1410 except error.ParseError as e:
1411 1411 raise PatchParseError(_(b"bad hunk #%d: %s") % (self.number, e))
1412 1412 # if we hit eof before finishing out the hunk, the last line will
1413 1413 # be zero length. Lets try to fix it up.
1414 1414 while len(self.hunk[-1]) == 0:
1415 1415 del self.hunk[-1]
1416 1416 del self.a[-1]
1417 1417 del self.b[-1]
1418 1418 self.lena -= 1
1419 1419 self.lenb -= 1
1420 1420 self._fixnewline(lr)
1421 1421
1422 1422 def read_context_hunk(self, lr):
1423 1423 self.desc = lr.readline()
1424 1424 m = contextdesc.match(self.desc)
1425 1425 if not m:
1426 1426 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1427 1427 self.starta, aend = m.groups()
1428 1428 self.starta = int(self.starta)
1429 1429 if aend is None:
1430 1430 aend = self.starta
1431 1431 self.lena = int(aend) - self.starta
1432 1432 if self.starta:
1433 1433 self.lena += 1
1434 1434 for x in pycompat.xrange(self.lena):
1435 1435 l = lr.readline()
1436 1436 if l.startswith(b'---'):
1437 1437 # lines addition, old block is empty
1438 1438 lr.push(l)
1439 1439 break
1440 1440 s = l[2:]
1441 1441 if l.startswith(b'- ') or l.startswith(b'! '):
1442 1442 u = b'-' + s
1443 1443 elif l.startswith(b' '):
1444 1444 u = b' ' + s
1445 1445 else:
1446 1446 raise PatchParseError(
1447 1447 _(b"bad hunk #%d old text line %d") % (self.number, x)
1448 1448 )
1449 1449 self.a.append(u)
1450 1450 self.hunk.append(u)
1451 1451
1452 1452 l = lr.readline()
1453 1453 if l.startswith(br'\ '):
1454 1454 s = self.a[-1][:-1]
1455 1455 self.a[-1] = s
1456 1456 self.hunk[-1] = s
1457 1457 l = lr.readline()
1458 1458 m = contextdesc.match(l)
1459 1459 if not m:
1460 1460 raise PatchParseError(_(b"bad hunk #%d") % self.number)
1461 1461 self.startb, bend = m.groups()
1462 1462 self.startb = int(self.startb)
1463 1463 if bend is None:
1464 1464 bend = self.startb
1465 1465 self.lenb = int(bend) - self.startb
1466 1466 if self.startb:
1467 1467 self.lenb += 1
1468 1468 hunki = 1
1469 1469 for x in pycompat.xrange(self.lenb):
1470 1470 l = lr.readline()
1471 1471 if l.startswith(br'\ '):
1472 1472 # XXX: the only way to hit this is with an invalid line range.
1473 1473 # The no-eol marker is not counted in the line range, but I
1474 1474 # guess there are diff(1) out there which behave differently.
1475 1475 s = self.b[-1][:-1]
1476 1476 self.b[-1] = s
1477 1477 self.hunk[hunki - 1] = s
1478 1478 continue
1479 1479 if not l:
1480 1480 # line deletions, new block is empty and we hit EOF
1481 1481 lr.push(l)
1482 1482 break
1483 1483 s = l[2:]
1484 1484 if l.startswith(b'+ ') or l.startswith(b'! '):
1485 1485 u = b'+' + s
1486 1486 elif l.startswith(b' '):
1487 1487 u = b' ' + s
1488 1488 elif len(self.b) == 0:
1489 1489 # line deletions, new block is empty
1490 1490 lr.push(l)
1491 1491 break
1492 1492 else:
1493 1493 raise PatchParseError(
1494 1494 _(b"bad hunk #%d old text line %d") % (self.number, x)
1495 1495 )
1496 1496 self.b.append(s)
1497 1497 while True:
1498 1498 if hunki >= len(self.hunk):
1499 1499 h = b""
1500 1500 else:
1501 1501 h = self.hunk[hunki]
1502 1502 hunki += 1
1503 1503 if h == u:
1504 1504 break
1505 1505 elif h.startswith(b'-'):
1506 1506 continue
1507 1507 else:
1508 1508 self.hunk.insert(hunki - 1, u)
1509 1509 break
1510 1510
1511 1511 if not self.a:
1512 1512 # this happens when lines were only added to the hunk
1513 1513 for x in self.hunk:
1514 1514 if x.startswith(b'-') or x.startswith(b' '):
1515 1515 self.a.append(x)
1516 1516 if not self.b:
1517 1517 # this happens when lines were only deleted from the hunk
1518 1518 for x in self.hunk:
1519 1519 if x.startswith(b'+') or x.startswith(b' '):
1520 1520 self.b.append(x[1:])
1521 1521 # @@ -start,len +start,len @@
1522 1522 self.desc = b"@@ -%d,%d +%d,%d @@\n" % (
1523 1523 self.starta,
1524 1524 self.lena,
1525 1525 self.startb,
1526 1526 self.lenb,
1527 1527 )
1528 1528 self.hunk[0] = self.desc
1529 1529 self._fixnewline(lr)
1530 1530
1531 1531 def _fixnewline(self, lr):
1532 1532 l = lr.readline()
1533 1533 if l.startswith(br'\ '):
1534 1534 diffhelper.fixnewline(self.hunk, self.a, self.b)
1535 1535 else:
1536 1536 lr.push(l)
1537 1537
1538 1538 def complete(self):
1539 1539 return len(self.a) == self.lena and len(self.b) == self.lenb
1540 1540
1541 1541 def _fuzzit(self, old, new, fuzz, toponly):
1542 1542 # this removes context lines from the top and bottom of list 'l'. It
1543 1543 # checks the hunk to make sure only context lines are removed, and then
1544 1544 # returns a new shortened list of lines.
1545 1545 fuzz = min(fuzz, len(old))
1546 1546 if fuzz:
1547 1547 top = 0
1548 1548 bot = 0
1549 1549 hlen = len(self.hunk)
1550 1550 for x in pycompat.xrange(hlen - 1):
1551 1551 # the hunk starts with the @@ line, so use x+1
1552 1552 if self.hunk[x + 1].startswith(b' '):
1553 1553 top += 1
1554 1554 else:
1555 1555 break
1556 1556 if not toponly:
1557 1557 for x in pycompat.xrange(hlen - 1):
1558 1558 if self.hunk[hlen - bot - 1].startswith(b' '):
1559 1559 bot += 1
1560 1560 else:
1561 1561 break
1562 1562
1563 1563 bot = min(fuzz, bot)
1564 1564 top = min(fuzz, top)
1565 1565 return old[top : len(old) - bot], new[top : len(new) - bot], top
1566 1566 return old, new, 0
1567 1567
1568 1568 def fuzzit(self, fuzz, toponly):
1569 1569 old, new, top = self._fuzzit(self.a, self.b, fuzz, toponly)
1570 1570 oldstart = self.starta + top
1571 1571 newstart = self.startb + top
1572 1572 # zero length hunk ranges already have their start decremented
1573 1573 if self.lena and oldstart > 0:
1574 1574 oldstart -= 1
1575 1575 if self.lenb and newstart > 0:
1576 1576 newstart -= 1
1577 1577 return old, oldstart, new, newstart
1578 1578
1579 1579
1580 1580 class binhunk(object):
1581 1581 """A binary patch file."""
1582 1582
1583 1583 def __init__(self, lr, fname):
1584 1584 self.text = None
1585 1585 self.delta = False
1586 1586 self.hunk = [b'GIT binary patch\n']
1587 1587 self._fname = fname
1588 1588 self._read(lr)
1589 1589
1590 1590 def complete(self):
1591 1591 return self.text is not None
1592 1592
1593 1593 def new(self, lines):
1594 1594 if self.delta:
1595 1595 return [applybindelta(self.text, b''.join(lines))]
1596 1596 return [self.text]
1597 1597
1598 1598 def _read(self, lr):
1599 1599 def getline(lr, hunk):
1600 1600 l = lr.readline()
1601 1601 hunk.append(l)
1602 1602 return l.rstrip(b'\r\n')
1603 1603
1604 1604 while True:
1605 1605 line = getline(lr, self.hunk)
1606 1606 if not line:
1607 1607 raise PatchParseError(
1608 1608 _(b'could not extract "%s" binary data') % self._fname
1609 1609 )
1610 1610 if line.startswith(b'literal '):
1611 1611 size = int(line[8:].rstrip())
1612 1612 break
1613 1613 if line.startswith(b'delta '):
1614 1614 size = int(line[6:].rstrip())
1615 1615 self.delta = True
1616 1616 break
1617 1617 dec = []
1618 1618 line = getline(lr, self.hunk)
1619 1619 while len(line) > 1:
1620 1620 l = line[0:1]
1621 1621 if l <= b'Z' and l >= b'A':
1622 1622 l = ord(l) - ord(b'A') + 1
1623 1623 else:
1624 1624 l = ord(l) - ord(b'a') + 27
1625 1625 try:
1626 1626 dec.append(util.b85decode(line[1:])[:l])
1627 1627 except ValueError as e:
1628 1628 raise PatchParseError(
1629 1629 _(b'could not decode "%s" binary patch: %s')
1630 1630 % (self._fname, stringutil.forcebytestr(e))
1631 1631 )
1632 1632 line = getline(lr, self.hunk)
1633 1633 text = zlib.decompress(b''.join(dec))
1634 1634 if len(text) != size:
1635 1635 raise PatchParseError(
1636 1636 _(b'"%s" length is %d bytes, should be %d')
1637 1637 % (self._fname, len(text), size)
1638 1638 )
1639 1639 self.text = text
1640 1640
1641 1641
1642 1642 def parsefilename(str):
1643 1643 # --- filename \t|space stuff
1644 1644 s = str[4:].rstrip(b'\r\n')
1645 1645 i = s.find(b'\t')
1646 1646 if i < 0:
1647 1647 i = s.find(b' ')
1648 1648 if i < 0:
1649 1649 return s
1650 1650 return s[:i]
1651 1651
1652 1652
1653 1653 def reversehunks(hunks):
1654 1654 '''reverse the signs in the hunks given as argument
1655 1655
1656 1656 This function operates on hunks coming out of patch.filterpatch, that is
1657 1657 a list of the form: [header1, hunk1, hunk2, header2...]. Example usage:
1658 1658
1659 1659 >>> rawpatch = b"""diff --git a/folder1/g b/folder1/g
1660 1660 ... --- a/folder1/g
1661 1661 ... +++ b/folder1/g
1662 1662 ... @@ -1,7 +1,7 @@
1663 1663 ... +firstline
1664 1664 ... c
1665 1665 ... 1
1666 1666 ... 2
1667 1667 ... + 3
1668 1668 ... -4
1669 1669 ... 5
1670 1670 ... d
1671 1671 ... +lastline"""
1672 1672 >>> hunks = parsepatch([rawpatch])
1673 1673 >>> hunkscomingfromfilterpatch = []
1674 1674 >>> for h in hunks:
1675 1675 ... hunkscomingfromfilterpatch.append(h)
1676 1676 ... hunkscomingfromfilterpatch.extend(h.hunks)
1677 1677
1678 1678 >>> reversedhunks = reversehunks(hunkscomingfromfilterpatch)
1679 1679 >>> from . import util
1680 1680 >>> fp = util.stringio()
1681 1681 >>> for c in reversedhunks:
1682 1682 ... c.write(fp)
1683 1683 >>> fp.seek(0) or None
1684 1684 >>> reversedpatch = fp.read()
1685 1685 >>> print(pycompat.sysstr(reversedpatch))
1686 1686 diff --git a/folder1/g b/folder1/g
1687 1687 --- a/folder1/g
1688 1688 +++ b/folder1/g
1689 1689 @@ -1,4 +1,3 @@
1690 1690 -firstline
1691 1691 c
1692 1692 1
1693 1693 2
1694 1694 @@ -2,6 +1,6 @@
1695 1695 c
1696 1696 1
1697 1697 2
1698 1698 - 3
1699 1699 +4
1700 1700 5
1701 1701 d
1702 1702 @@ -6,3 +5,2 @@
1703 1703 5
1704 1704 d
1705 1705 -lastline
1706 1706
1707 1707 '''
1708 1708
1709 1709 newhunks = []
1710 1710 for c in hunks:
1711 1711 if util.safehasattr(c, b'reversehunk'):
1712 1712 c = c.reversehunk()
1713 1713 newhunks.append(c)
1714 1714 return newhunks
1715 1715
1716 1716
1717 1717 def parsepatch(originalchunks, maxcontext=None):
1718 1718 """patch -> [] of headers -> [] of hunks
1719 1719
1720 1720 If maxcontext is not None, trim context lines if necessary.
1721 1721
1722 1722 >>> rawpatch = b'''diff --git a/folder1/g b/folder1/g
1723 1723 ... --- a/folder1/g
1724 1724 ... +++ b/folder1/g
1725 1725 ... @@ -1,8 +1,10 @@
1726 1726 ... 1
1727 1727 ... 2
1728 1728 ... -3
1729 1729 ... 4
1730 1730 ... 5
1731 1731 ... 6
1732 1732 ... +6.1
1733 1733 ... +6.2
1734 1734 ... 7
1735 1735 ... 8
1736 1736 ... +9'''
1737 1737 >>> out = util.stringio()
1738 1738 >>> headers = parsepatch([rawpatch], maxcontext=1)
1739 1739 >>> for header in headers:
1740 1740 ... header.write(out)
1741 1741 ... for hunk in header.hunks:
1742 1742 ... hunk.write(out)
1743 1743 >>> print(pycompat.sysstr(out.getvalue()))
1744 1744 diff --git a/folder1/g b/folder1/g
1745 1745 --- a/folder1/g
1746 1746 +++ b/folder1/g
1747 1747 @@ -2,3 +2,2 @@
1748 1748 2
1749 1749 -3
1750 1750 4
1751 1751 @@ -6,2 +5,4 @@
1752 1752 6
1753 1753 +6.1
1754 1754 +6.2
1755 1755 7
1756 1756 @@ -8,1 +9,2 @@
1757 1757 8
1758 1758 +9
1759 1759 """
1760 1760
1761 1761 class parser(object):
1762 1762 """patch parsing state machine"""
1763 1763
1764 1764 def __init__(self):
1765 1765 self.fromline = 0
1766 1766 self.toline = 0
1767 1767 self.proc = b''
1768 1768 self.header = None
1769 1769 self.context = []
1770 1770 self.before = []
1771 1771 self.hunk = []
1772 1772 self.headers = []
1773 1773
1774 1774 def addrange(self, limits):
1775 1775 self.addcontext([])
1776 1776 fromstart, fromend, tostart, toend, proc = limits
1777 1777 self.fromline = int(fromstart)
1778 1778 self.toline = int(tostart)
1779 1779 self.proc = proc
1780 1780
1781 1781 def addcontext(self, context):
1782 1782 if self.hunk:
1783 1783 h = recordhunk(
1784 1784 self.header,
1785 1785 self.fromline,
1786 1786 self.toline,
1787 1787 self.proc,
1788 1788 self.before,
1789 1789 self.hunk,
1790 1790 context,
1791 1791 maxcontext,
1792 1792 )
1793 1793 self.header.hunks.append(h)
1794 1794 self.fromline += len(self.before) + h.removed
1795 1795 self.toline += len(self.before) + h.added
1796 1796 self.before = []
1797 1797 self.hunk = []
1798 1798 self.context = context
1799 1799
1800 1800 def addhunk(self, hunk):
1801 1801 if self.context:
1802 1802 self.before = self.context
1803 1803 self.context = []
1804 1804 if self.hunk:
1805 1805 self.addcontext([])
1806 1806 self.hunk = hunk
1807 1807
1808 1808 def newfile(self, hdr):
1809 1809 self.addcontext([])
1810 1810 h = header(hdr)
1811 1811 self.headers.append(h)
1812 1812 self.header = h
1813 1813
1814 1814 def addother(self, line):
1815 1815 pass # 'other' lines are ignored
1816 1816
1817 1817 def finished(self):
1818 1818 self.addcontext([])
1819 1819 return self.headers
1820 1820
1821 1821 transitions = {
1822 1822 b'file': {
1823 1823 b'context': addcontext,
1824 1824 b'file': newfile,
1825 1825 b'hunk': addhunk,
1826 1826 b'range': addrange,
1827 1827 },
1828 1828 b'context': {
1829 1829 b'file': newfile,
1830 1830 b'hunk': addhunk,
1831 1831 b'range': addrange,
1832 1832 b'other': addother,
1833 1833 },
1834 1834 b'hunk': {
1835 1835 b'context': addcontext,
1836 1836 b'file': newfile,
1837 1837 b'range': addrange,
1838 1838 },
1839 1839 b'range': {b'context': addcontext, b'hunk': addhunk},
1840 1840 b'other': {b'other': addother},
1841 1841 }
1842 1842
1843 1843 p = parser()
1844 1844 fp = stringio()
1845 1845 fp.write(b''.join(originalchunks))
1846 1846 fp.seek(0)
1847 1847
1848 1848 state = b'context'
1849 1849 for newstate, data in scanpatch(fp):
1850 1850 try:
1851 1851 p.transitions[state][newstate](p, data)
1852 1852 except KeyError:
1853 1853 raise PatchParseError(
1854 1854 b'unhandled transition: %s -> %s' % (state, newstate)
1855 1855 )
1856 1856 state = newstate
1857 1857 del fp
1858 1858 return p.finished()
1859 1859
1860 1860
1861 1861 def pathtransform(path, strip, prefix):
1862 1862 """turn a path from a patch into a path suitable for the repository
1863 1863
1864 1864 prefix, if not empty, is expected to be normalized with a / at the end.
1865 1865
1866 1866 Returns (stripped components, path in repository).
1867 1867
1868 1868 >>> pathtransform(b'a/b/c', 0, b'')
1869 1869 ('', 'a/b/c')
1870 1870 >>> pathtransform(b' a/b/c ', 0, b'')
1871 1871 ('', ' a/b/c')
1872 1872 >>> pathtransform(b' a/b/c ', 2, b'')
1873 1873 ('a/b/', 'c')
1874 1874 >>> pathtransform(b'a/b/c', 0, b'd/e/')
1875 1875 ('', 'd/e/a/b/c')
1876 1876 >>> pathtransform(b' a//b/c ', 2, b'd/e/')
1877 1877 ('a//b/', 'd/e/c')
1878 1878 >>> pathtransform(b'a/b/c', 3, b'')
1879 1879 Traceback (most recent call last):
1880 1880 PatchApplicationError: unable to strip away 1 of 3 dirs from a/b/c
1881 1881 """
1882 1882 pathlen = len(path)
1883 1883 i = 0
1884 1884 if strip == 0:
1885 1885 return b'', prefix + path.rstrip()
1886 1886 count = strip
1887 1887 while count > 0:
1888 1888 i = path.find(b'/', i)
1889 1889 if i == -1:
1890 1890 raise PatchApplicationError(
1891 1891 _(b"unable to strip away %d of %d dirs from %s")
1892 1892 % (count, strip, path)
1893 1893 )
1894 1894 i += 1
1895 1895 # consume '//' in the path
1896 1896 while i < pathlen - 1 and path[i : i + 1] == b'/':
1897 1897 i += 1
1898 1898 count -= 1
1899 1899 return path[:i].lstrip(), prefix + path[i:].rstrip()
1900 1900
1901 1901
1902 1902 def makepatchmeta(backend, afile_orig, bfile_orig, hunk, strip, prefix):
1903 1903 nulla = afile_orig == b"/dev/null"
1904 1904 nullb = bfile_orig == b"/dev/null"
1905 1905 create = nulla and hunk.starta == 0 and hunk.lena == 0
1906 1906 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1907 1907 abase, afile = pathtransform(afile_orig, strip, prefix)
1908 1908 gooda = not nulla and backend.exists(afile)
1909 1909 bbase, bfile = pathtransform(bfile_orig, strip, prefix)
1910 1910 if afile == bfile:
1911 1911 goodb = gooda
1912 1912 else:
1913 1913 goodb = not nullb and backend.exists(bfile)
1914 1914 missing = not goodb and not gooda and not create
1915 1915
1916 1916 # some diff programs apparently produce patches where the afile is
1917 1917 # not /dev/null, but afile starts with bfile
1918 1918 abasedir = afile[: afile.rfind(b'/') + 1]
1919 1919 bbasedir = bfile[: bfile.rfind(b'/') + 1]
1920 1920 if (
1921 1921 missing
1922 1922 and abasedir == bbasedir
1923 1923 and afile.startswith(bfile)
1924 1924 and hunk.starta == 0
1925 1925 and hunk.lena == 0
1926 1926 ):
1927 1927 create = True
1928 1928 missing = False
1929 1929
1930 1930 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1931 1931 # diff is between a file and its backup. In this case, the original
1932 1932 # file should be patched (see original mpatch code).
1933 1933 isbackup = abase == bbase and bfile.startswith(afile)
1934 1934 fname = None
1935 1935 if not missing:
1936 1936 if gooda and goodb:
1937 1937 if isbackup:
1938 1938 fname = afile
1939 1939 else:
1940 1940 fname = bfile
1941 1941 elif gooda:
1942 1942 fname = afile
1943 1943
1944 1944 if not fname:
1945 1945 if not nullb:
1946 1946 if isbackup:
1947 1947 fname = afile
1948 1948 else:
1949 1949 fname = bfile
1950 1950 elif not nulla:
1951 1951 fname = afile
1952 1952 else:
1953 1953 raise PatchParseError(_(b"undefined source and destination files"))
1954 1954
1955 1955 gp = patchmeta(fname)
1956 1956 if create:
1957 1957 gp.op = b'ADD'
1958 1958 elif remove:
1959 1959 gp.op = b'DELETE'
1960 1960 return gp
1961 1961
1962 1962
1963 1963 def scanpatch(fp):
1964 1964 """like patch.iterhunks, but yield different events
1965 1965
1966 1966 - ('file', [header_lines + fromfile + tofile])
1967 1967 - ('context', [context_lines])
1968 1968 - ('hunk', [hunk_lines])
1969 1969 - ('range', (-start,len, +start,len, proc))
1970 1970 """
1971 1971 lines_re = re.compile(br'@@ -(\d+),(\d+) \+(\d+),(\d+) @@\s*(.*)')
1972 1972 lr = linereader(fp)
1973 1973
1974 1974 def scanwhile(first, p):
1975 1975 """scan lr while predicate holds"""
1976 1976 lines = [first]
1977 1977 for line in iter(lr.readline, b''):
1978 1978 if p(line):
1979 1979 lines.append(line)
1980 1980 else:
1981 1981 lr.push(line)
1982 1982 break
1983 1983 return lines
1984 1984
1985 1985 for line in iter(lr.readline, b''):
1986 1986 if line.startswith(b'diff --git a/') or line.startswith(b'diff -r '):
1987 1987
1988 1988 def notheader(line):
1989 1989 s = line.split(None, 1)
1990 1990 return not s or s[0] not in (b'---', b'diff')
1991 1991
1992 1992 header = scanwhile(line, notheader)
1993 1993 fromfile = lr.readline()
1994 1994 if fromfile.startswith(b'---'):
1995 1995 tofile = lr.readline()
1996 1996 header += [fromfile, tofile]
1997 1997 else:
1998 1998 lr.push(fromfile)
1999 1999 yield b'file', header
2000 2000 elif line.startswith(b' '):
2001 2001 cs = (b' ', b'\\')
2002 2002 yield b'context', scanwhile(line, lambda l: l.startswith(cs))
2003 2003 elif line.startswith((b'-', b'+')):
2004 2004 cs = (b'-', b'+', b'\\')
2005 2005 yield b'hunk', scanwhile(line, lambda l: l.startswith(cs))
2006 2006 else:
2007 2007 m = lines_re.match(line)
2008 2008 if m:
2009 2009 yield b'range', m.groups()
2010 2010 else:
2011 2011 yield b'other', line
2012 2012
2013 2013
2014 2014 def scangitpatch(lr, firstline):
2015 2015 """
2016 2016 Git patches can emit:
2017 2017 - rename a to b
2018 2018 - change b
2019 2019 - copy a to c
2020 2020 - change c
2021 2021
2022 2022 We cannot apply this sequence as-is, the renamed 'a' could not be
2023 2023 found for it would have been renamed already. And we cannot copy
2024 2024 from 'b' instead because 'b' would have been changed already. So
2025 2025 we scan the git patch for copy and rename commands so we can
2026 2026 perform the copies ahead of time.
2027 2027 """
2028 2028 pos = 0
2029 2029 try:
2030 2030 pos = lr.fp.tell()
2031 2031 fp = lr.fp
2032 2032 except IOError:
2033 2033 fp = stringio(lr.fp.read())
2034 2034 gitlr = linereader(fp)
2035 2035 gitlr.push(firstline)
2036 2036 gitpatches = readgitpatch(gitlr)
2037 2037 fp.seek(pos)
2038 2038 return gitpatches
2039 2039
2040 2040
2041 2041 def iterhunks(fp):
2042 2042 """Read a patch and yield the following events:
2043 2043 - ("file", afile, bfile, firsthunk): select a new target file.
2044 2044 - ("hunk", hunk): a new hunk is ready to be applied, follows a
2045 2045 "file" event.
2046 2046 - ("git", gitchanges): current diff is in git format, gitchanges
2047 2047 maps filenames to gitpatch records. Unique event.
2048 2048 """
2049 2049 afile = b""
2050 2050 bfile = b""
2051 2051 state = None
2052 2052 hunknum = 0
2053 2053 emitfile = newfile = False
2054 2054 gitpatches = None
2055 2055
2056 2056 # our states
2057 2057 BFILE = 1
2058 2058 context = None
2059 2059 lr = linereader(fp)
2060 2060
2061 2061 for x in iter(lr.readline, b''):
2062 2062 if state == BFILE and (
2063 2063 (not context and x.startswith(b'@'))
2064 2064 or (context is not False and x.startswith(b'***************'))
2065 2065 or x.startswith(b'GIT binary patch')
2066 2066 ):
2067 2067 gp = None
2068 2068 if gitpatches and gitpatches[-1].ispatching(afile, bfile):
2069 2069 gp = gitpatches.pop()
2070 2070 if x.startswith(b'GIT binary patch'):
2071 2071 h = binhunk(lr, gp.path)
2072 2072 else:
2073 2073 if context is None and x.startswith(b'***************'):
2074 2074 context = True
2075 2075 h = hunk(x, hunknum + 1, lr, context)
2076 2076 hunknum += 1
2077 2077 if emitfile:
2078 2078 emitfile = False
2079 2079 yield b'file', (afile, bfile, h, gp and gp.copy() or None)
2080 2080 yield b'hunk', h
2081 2081 elif x.startswith(b'diff --git a/'):
2082 2082 m = gitre.match(x.rstrip(b'\r\n'))
2083 2083 if not m:
2084 2084 continue
2085 2085 if gitpatches is None:
2086 2086 # scan whole input for git metadata
2087 2087 gitpatches = scangitpatch(lr, x)
2088 2088 yield b'git', [
2089 2089 g.copy() for g in gitpatches if g.op in (b'COPY', b'RENAME')
2090 2090 ]
2091 2091 gitpatches.reverse()
2092 2092 afile = b'a/' + m.group(1)
2093 2093 bfile = b'b/' + m.group(2)
2094 2094 while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
2095 2095 gp = gitpatches.pop()
2096 2096 yield b'file', (
2097 2097 b'a/' + gp.path,
2098 2098 b'b/' + gp.path,
2099 2099 None,
2100 2100 gp.copy(),
2101 2101 )
2102 2102 if not gitpatches:
2103 2103 raise PatchParseError(
2104 2104 _(b'failed to synchronize metadata for "%s"') % afile[2:]
2105 2105 )
2106 2106 newfile = True
2107 2107 elif x.startswith(b'---'):
2108 2108 # check for a unified diff
2109 2109 l2 = lr.readline()
2110 2110 if not l2.startswith(b'+++'):
2111 2111 lr.push(l2)
2112 2112 continue
2113 2113 newfile = True
2114 2114 context = False
2115 2115 afile = parsefilename(x)
2116 2116 bfile = parsefilename(l2)
2117 2117 elif x.startswith(b'***'):
2118 2118 # check for a context diff
2119 2119 l2 = lr.readline()
2120 2120 if not l2.startswith(b'---'):
2121 2121 lr.push(l2)
2122 2122 continue
2123 2123 l3 = lr.readline()
2124 2124 lr.push(l3)
2125 2125 if not l3.startswith(b"***************"):
2126 2126 lr.push(l2)
2127 2127 continue
2128 2128 newfile = True
2129 2129 context = True
2130 2130 afile = parsefilename(x)
2131 2131 bfile = parsefilename(l2)
2132 2132
2133 2133 if newfile:
2134 2134 newfile = False
2135 2135 emitfile = True
2136 2136 state = BFILE
2137 2137 hunknum = 0
2138 2138
2139 2139 while gitpatches:
2140 2140 gp = gitpatches.pop()
2141 2141 yield b'file', (b'a/' + gp.path, b'b/' + gp.path, None, gp.copy())
2142 2142
2143 2143
2144 2144 def applybindelta(binchunk, data):
2145 2145 """Apply a binary delta hunk
2146 2146 The algorithm used is the algorithm from git's patch-delta.c
2147 2147 """
2148 2148
2149 2149 def deltahead(binchunk):
2150 2150 i = 0
2151 2151 for c in pycompat.bytestr(binchunk):
2152 2152 i += 1
2153 2153 if not (ord(c) & 0x80):
2154 2154 return i
2155 2155 return i
2156 2156
2157 2157 out = b""
2158 2158 s = deltahead(binchunk)
2159 2159 binchunk = binchunk[s:]
2160 2160 s = deltahead(binchunk)
2161 2161 binchunk = binchunk[s:]
2162 2162 i = 0
2163 2163 while i < len(binchunk):
2164 2164 cmd = ord(binchunk[i : i + 1])
2165 2165 i += 1
2166 2166 if cmd & 0x80:
2167 2167 offset = 0
2168 2168 size = 0
2169 2169 if cmd & 0x01:
2170 2170 offset = ord(binchunk[i : i + 1])
2171 2171 i += 1
2172 2172 if cmd & 0x02:
2173 2173 offset |= ord(binchunk[i : i + 1]) << 8
2174 2174 i += 1
2175 2175 if cmd & 0x04:
2176 2176 offset |= ord(binchunk[i : i + 1]) << 16
2177 2177 i += 1
2178 2178 if cmd & 0x08:
2179 2179 offset |= ord(binchunk[i : i + 1]) << 24
2180 2180 i += 1
2181 2181 if cmd & 0x10:
2182 2182 size = ord(binchunk[i : i + 1])
2183 2183 i += 1
2184 2184 if cmd & 0x20:
2185 2185 size |= ord(binchunk[i : i + 1]) << 8
2186 2186 i += 1
2187 2187 if cmd & 0x40:
2188 2188 size |= ord(binchunk[i : i + 1]) << 16
2189 2189 i += 1
2190 2190 if size == 0:
2191 2191 size = 0x10000
2192 2192 offset_end = offset + size
2193 2193 out += data[offset:offset_end]
2194 2194 elif cmd != 0:
2195 2195 offset_end = i + cmd
2196 2196 out += binchunk[i:offset_end]
2197 2197 i += cmd
2198 2198 else:
2199 2199 raise PatchApplicationError(_(b'unexpected delta opcode 0'))
2200 2200 return out
2201 2201
2202 2202
2203 2203 def applydiff(ui, fp, backend, store, strip=1, prefix=b'', eolmode=b'strict'):
2204 2204 """Reads a patch from fp and tries to apply it.
2205 2205
2206 2206 Returns 0 for a clean patch, -1 if any rejects were found and 1 if
2207 2207 there was any fuzz.
2208 2208
2209 2209 If 'eolmode' is 'strict', the patch content and patched file are
2210 2210 read in binary mode. Otherwise, line endings are ignored when
2211 2211 patching then normalized according to 'eolmode'.
2212 2212 """
2213 2213 return _applydiff(
2214 2214 ui,
2215 2215 fp,
2216 2216 patchfile,
2217 2217 backend,
2218 2218 store,
2219 2219 strip=strip,
2220 2220 prefix=prefix,
2221 2221 eolmode=eolmode,
2222 2222 )
2223 2223
2224 2224
2225 2225 def _canonprefix(repo, prefix):
2226 2226 if prefix:
2227 2227 prefix = pathutil.canonpath(repo.root, repo.getcwd(), prefix)
2228 2228 if prefix != b'':
2229 2229 prefix += b'/'
2230 2230 return prefix
2231 2231
2232 2232
2233 2233 def _applydiff(
2234 2234 ui, fp, patcher, backend, store, strip=1, prefix=b'', eolmode=b'strict'
2235 2235 ):
2236 2236 prefix = _canonprefix(backend.repo, prefix)
2237 2237
2238 2238 def pstrip(p):
2239 2239 return pathtransform(p, strip - 1, prefix)[1]
2240 2240
2241 2241 rejects = 0
2242 2242 err = 0
2243 2243 current_file = None
2244 2244
2245 2245 for state, values in iterhunks(fp):
2246 2246 if state == b'hunk':
2247 2247 if not current_file:
2248 2248 continue
2249 2249 ret = current_file.apply(values)
2250 2250 if ret > 0:
2251 2251 err = 1
2252 2252 elif state == b'file':
2253 2253 if current_file:
2254 2254 rejects += current_file.close()
2255 2255 current_file = None
2256 2256 afile, bfile, first_hunk, gp = values
2257 2257 if gp:
2258 2258 gp.path = pstrip(gp.path)
2259 2259 if gp.oldpath:
2260 2260 gp.oldpath = pstrip(gp.oldpath)
2261 2261 else:
2262 2262 gp = makepatchmeta(
2263 2263 backend, afile, bfile, first_hunk, strip, prefix
2264 2264 )
2265 2265 if gp.op == b'RENAME':
2266 2266 backend.unlink(gp.oldpath)
2267 2267 if not first_hunk:
2268 2268 if gp.op == b'DELETE':
2269 2269 backend.unlink(gp.path)
2270 2270 continue
2271 2271 data, mode = None, None
2272 2272 if gp.op in (b'RENAME', b'COPY'):
2273 2273 data, mode = store.getfile(gp.oldpath)[:2]
2274 2274 if data is None:
2275 2275 # This means that the old path does not exist
2276 2276 raise PatchApplicationError(
2277 2277 _(b"source file '%s' does not exist") % gp.oldpath
2278 2278 )
2279 2279 if gp.mode:
2280 2280 mode = gp.mode
2281 2281 if gp.op == b'ADD':
2282 2282 # Added files without content have no hunk and
2283 2283 # must be created
2284 2284 data = b''
2285 2285 if data or mode:
2286 2286 if gp.op in (b'ADD', b'RENAME', b'COPY') and backend.exists(
2287 2287 gp.path
2288 2288 ):
2289 2289 raise PatchApplicationError(
2290 2290 _(
2291 2291 b"cannot create %s: destination "
2292 2292 b"already exists"
2293 2293 )
2294 2294 % gp.path
2295 2295 )
2296 2296 backend.setfile(gp.path, data, mode, gp.oldpath)
2297 2297 continue
2298 2298 try:
2299 2299 current_file = patcher(ui, gp, backend, store, eolmode=eolmode)
2300 2300 except PatchError as inst:
2301 2301 ui.warn(stringutil.forcebytestr(inst) + b'\n')
2302 2302 current_file = None
2303 2303 rejects += 1
2304 2304 continue
2305 2305 elif state == b'git':
2306 2306 for gp in values:
2307 2307 path = pstrip(gp.oldpath)
2308 2308 data, mode = backend.getfile(path)
2309 2309 if data is None:
2310 2310 # The error ignored here will trigger a getfile()
2311 2311 # error in a place more appropriate for error
2312 2312 # handling, and will not interrupt the patching
2313 2313 # process.
2314 2314 pass
2315 2315 else:
2316 2316 store.setfile(path, data, mode)
2317 2317 else:
2318 2318 raise error.Abort(_(b'unsupported parser state: %s') % state)
2319 2319
2320 2320 if current_file:
2321 2321 rejects += current_file.close()
2322 2322
2323 2323 if rejects:
2324 2324 return -1
2325 2325 return err
2326 2326
2327 2327
2328 2328 def _externalpatch(ui, repo, patcher, patchname, strip, files, similarity):
2329 2329 """use <patcher> to apply <patchname> to the working directory.
2330 2330 returns whether patch was applied with fuzz factor."""
2331 2331
2332 2332 fuzz = False
2333 2333 args = []
2334 2334 cwd = repo.root
2335 2335 if cwd:
2336 2336 args.append(b'-d %s' % procutil.shellquote(cwd))
2337 2337 cmd = b'%s %s -p%d < %s' % (
2338 2338 patcher,
2339 2339 b' '.join(args),
2340 2340 strip,
2341 2341 procutil.shellquote(patchname),
2342 2342 )
2343 2343 ui.debug(b'Using external patch tool: %s\n' % cmd)
2344 2344 fp = procutil.popen(cmd, b'rb')
2345 2345 try:
2346 for line in util.iterfile(fp):
2346 for line in fp:
2347 2347 line = line.rstrip()
2348 2348 ui.note(line + b'\n')
2349 2349 if line.startswith(b'patching file '):
2350 2350 pf = util.parsepatchoutput(line)
2351 2351 printed_file = False
2352 2352 files.add(pf)
2353 2353 elif line.find(b'with fuzz') >= 0:
2354 2354 fuzz = True
2355 2355 if not printed_file:
2356 2356 ui.warn(pf + b'\n')
2357 2357 printed_file = True
2358 2358 ui.warn(line + b'\n')
2359 2359 elif line.find(b'saving rejects to file') >= 0:
2360 2360 ui.warn(line + b'\n')
2361 2361 elif line.find(b'FAILED') >= 0:
2362 2362 if not printed_file:
2363 2363 ui.warn(pf + b'\n')
2364 2364 printed_file = True
2365 2365 ui.warn(line + b'\n')
2366 2366 finally:
2367 2367 if files:
2368 2368 scmutil.marktouched(repo, files, similarity)
2369 2369 code = fp.close()
2370 2370 if code:
2371 2371 raise PatchApplicationError(
2372 2372 _(b"patch command failed: %s") % procutil.explainexit(code)
2373 2373 )
2374 2374 return fuzz
2375 2375
2376 2376
2377 2377 def patchbackend(
2378 2378 ui, backend, patchobj, strip, prefix, files=None, eolmode=b'strict'
2379 2379 ):
2380 2380 if files is None:
2381 2381 files = set()
2382 2382 if eolmode is None:
2383 2383 eolmode = ui.config(b'patch', b'eol')
2384 2384 if eolmode.lower() not in eolmodes:
2385 2385 raise error.Abort(_(b'unsupported line endings type: %s') % eolmode)
2386 2386 eolmode = eolmode.lower()
2387 2387
2388 2388 store = filestore()
2389 2389 try:
2390 2390 fp = open(patchobj, b'rb')
2391 2391 except TypeError:
2392 2392 fp = patchobj
2393 2393 try:
2394 2394 ret = applydiff(
2395 2395 ui, fp, backend, store, strip=strip, prefix=prefix, eolmode=eolmode
2396 2396 )
2397 2397 finally:
2398 2398 if fp != patchobj:
2399 2399 fp.close()
2400 2400 files.update(backend.close())
2401 2401 store.close()
2402 2402 if ret < 0:
2403 2403 raise PatchApplicationError(_(b'patch failed to apply'))
2404 2404 return ret > 0
2405 2405
2406 2406
2407 2407 def internalpatch(
2408 2408 ui,
2409 2409 repo,
2410 2410 patchobj,
2411 2411 strip,
2412 2412 prefix=b'',
2413 2413 files=None,
2414 2414 eolmode=b'strict',
2415 2415 similarity=0,
2416 2416 ):
2417 2417 """use builtin patch to apply <patchobj> to the working directory.
2418 2418 returns whether patch was applied with fuzz factor."""
2419 2419 backend = workingbackend(ui, repo, similarity)
2420 2420 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2421 2421
2422 2422
2423 2423 def patchrepo(
2424 2424 ui, repo, ctx, store, patchobj, strip, prefix, files=None, eolmode=b'strict'
2425 2425 ):
2426 2426 backend = repobackend(ui, repo, ctx, store)
2427 2427 return patchbackend(ui, backend, patchobj, strip, prefix, files, eolmode)
2428 2428
2429 2429
2430 2430 def patch(
2431 2431 ui,
2432 2432 repo,
2433 2433 patchname,
2434 2434 strip=1,
2435 2435 prefix=b'',
2436 2436 files=None,
2437 2437 eolmode=b'strict',
2438 2438 similarity=0,
2439 2439 ):
2440 2440 """Apply <patchname> to the working directory.
2441 2441
2442 2442 'eolmode' specifies how end of lines should be handled. It can be:
2443 2443 - 'strict': inputs are read in binary mode, EOLs are preserved
2444 2444 - 'crlf': EOLs are ignored when patching and reset to CRLF
2445 2445 - 'lf': EOLs are ignored when patching and reset to LF
2446 2446 - None: get it from user settings, default to 'strict'
2447 2447 'eolmode' is ignored when using an external patcher program.
2448 2448
2449 2449 Returns whether patch was applied with fuzz factor.
2450 2450 """
2451 2451 patcher = ui.config(b'ui', b'patch')
2452 2452 if files is None:
2453 2453 files = set()
2454 2454 if patcher:
2455 2455 return _externalpatch(
2456 2456 ui, repo, patcher, patchname, strip, files, similarity
2457 2457 )
2458 2458 return internalpatch(
2459 2459 ui, repo, patchname, strip, prefix, files, eolmode, similarity
2460 2460 )
2461 2461
2462 2462
2463 2463 def changedfiles(ui, repo, patchpath, strip=1, prefix=b''):
2464 2464 backend = fsbackend(ui, repo.root)
2465 2465 prefix = _canonprefix(repo, prefix)
2466 2466 with open(patchpath, b'rb') as fp:
2467 2467 changed = set()
2468 2468 for state, values in iterhunks(fp):
2469 2469 if state == b'file':
2470 2470 afile, bfile, first_hunk, gp = values
2471 2471 if gp:
2472 2472 gp.path = pathtransform(gp.path, strip - 1, prefix)[1]
2473 2473 if gp.oldpath:
2474 2474 gp.oldpath = pathtransform(
2475 2475 gp.oldpath, strip - 1, prefix
2476 2476 )[1]
2477 2477 else:
2478 2478 gp = makepatchmeta(
2479 2479 backend, afile, bfile, first_hunk, strip, prefix
2480 2480 )
2481 2481 changed.add(gp.path)
2482 2482 if gp.op == b'RENAME':
2483 2483 changed.add(gp.oldpath)
2484 2484 elif state not in (b'hunk', b'git'):
2485 2485 raise error.Abort(_(b'unsupported parser state: %s') % state)
2486 2486 return changed
2487 2487
2488 2488
2489 2489 class GitDiffRequired(Exception):
2490 2490 pass
2491 2491
2492 2492
2493 2493 diffopts = diffutil.diffallopts
2494 2494 diffallopts = diffutil.diffallopts
2495 2495 difffeatureopts = diffutil.difffeatureopts
2496 2496
2497 2497
2498 2498 def diff(
2499 2499 repo,
2500 2500 node1=None,
2501 2501 node2=None,
2502 2502 match=None,
2503 2503 changes=None,
2504 2504 opts=None,
2505 2505 losedatafn=None,
2506 2506 pathfn=None,
2507 2507 copy=None,
2508 2508 copysourcematch=None,
2509 2509 hunksfilterfn=None,
2510 2510 ):
2511 2511 """yields diff of changes to files between two nodes, or node and
2512 2512 working directory.
2513 2513
2514 2514 if node1 is None, use first dirstate parent instead.
2515 2515 if node2 is None, compare node1 with working directory.
2516 2516
2517 2517 losedatafn(**kwarg) is a callable run when opts.upgrade=True and
2518 2518 every time some change cannot be represented with the current
2519 2519 patch format. Return False to upgrade to git patch format, True to
2520 2520 accept the loss or raise an exception to abort the diff. It is
2521 2521 called with the name of current file being diffed as 'fn'. If set
2522 2522 to None, patches will always be upgraded to git format when
2523 2523 necessary.
2524 2524
2525 2525 prefix is a filename prefix that is prepended to all filenames on
2526 2526 display (used for subrepos).
2527 2527
2528 2528 relroot, if not empty, must be normalized with a trailing /. Any match
2529 2529 patterns that fall outside it will be ignored.
2530 2530
2531 2531 copy, if not empty, should contain mappings {dst@y: src@x} of copy
2532 2532 information.
2533 2533
2534 2534 if copysourcematch is not None, then copy sources will be filtered by this
2535 2535 matcher
2536 2536
2537 2537 hunksfilterfn, if not None, should be a function taking a filectx and
2538 2538 hunks generator that may yield filtered hunks.
2539 2539 """
2540 2540 if not node1 and not node2:
2541 2541 node1 = repo.dirstate.p1()
2542 2542
2543 2543 ctx1 = repo[node1]
2544 2544 ctx2 = repo[node2]
2545 2545
2546 2546 for fctx1, fctx2, hdr, hunks in diffhunks(
2547 2547 repo,
2548 2548 ctx1=ctx1,
2549 2549 ctx2=ctx2,
2550 2550 match=match,
2551 2551 changes=changes,
2552 2552 opts=opts,
2553 2553 losedatafn=losedatafn,
2554 2554 pathfn=pathfn,
2555 2555 copy=copy,
2556 2556 copysourcematch=copysourcematch,
2557 2557 ):
2558 2558 if hunksfilterfn is not None:
2559 2559 # If the file has been removed, fctx2 is None; but this should
2560 2560 # not occur here since we catch removed files early in
2561 2561 # logcmdutil.getlinerangerevs() for 'hg log -L'.
2562 2562 assert (
2563 2563 fctx2 is not None
2564 2564 ), b'fctx2 unexpectly None in diff hunks filtering'
2565 2565 hunks = hunksfilterfn(fctx2, hunks)
2566 2566 text = b''.join(b''.join(hlines) for hrange, hlines in hunks)
2567 2567 if hdr and (text or len(hdr) > 1):
2568 2568 yield b'\n'.join(hdr) + b'\n'
2569 2569 if text:
2570 2570 yield text
2571 2571
2572 2572
2573 2573 def diffhunks(
2574 2574 repo,
2575 2575 ctx1,
2576 2576 ctx2,
2577 2577 match=None,
2578 2578 changes=None,
2579 2579 opts=None,
2580 2580 losedatafn=None,
2581 2581 pathfn=None,
2582 2582 copy=None,
2583 2583 copysourcematch=None,
2584 2584 ):
2585 2585 """Yield diff of changes to files in the form of (`header`, `hunks`) tuples
2586 2586 where `header` is a list of diff headers and `hunks` is an iterable of
2587 2587 (`hunkrange`, `hunklines`) tuples.
2588 2588
2589 2589 See diff() for the meaning of parameters.
2590 2590 """
2591 2591
2592 2592 if opts is None:
2593 2593 opts = mdiff.defaultopts
2594 2594
2595 2595 def lrugetfilectx():
2596 2596 cache = {}
2597 2597 order = collections.deque()
2598 2598
2599 2599 def getfilectx(f, ctx):
2600 2600 fctx = ctx.filectx(f, filelog=cache.get(f))
2601 2601 if f not in cache:
2602 2602 if len(cache) > 20:
2603 2603 del cache[order.popleft()]
2604 2604 cache[f] = fctx.filelog()
2605 2605 else:
2606 2606 order.remove(f)
2607 2607 order.append(f)
2608 2608 return fctx
2609 2609
2610 2610 return getfilectx
2611 2611
2612 2612 getfilectx = lrugetfilectx()
2613 2613
2614 2614 if not changes:
2615 2615 changes = ctx1.status(ctx2, match=match)
2616 2616 if isinstance(changes, list):
2617 2617 modified, added, removed = changes[:3]
2618 2618 else:
2619 2619 modified, added, removed = (
2620 2620 changes.modified,
2621 2621 changes.added,
2622 2622 changes.removed,
2623 2623 )
2624 2624
2625 2625 if not modified and not added and not removed:
2626 2626 return []
2627 2627
2628 2628 if repo.ui.debugflag:
2629 2629 hexfunc = hex
2630 2630 else:
2631 2631 hexfunc = short
2632 2632 revs = [hexfunc(node) for node in [ctx1.node(), ctx2.node()] if node]
2633 2633
2634 2634 if copy is None:
2635 2635 copy = {}
2636 2636 if opts.git or opts.upgrade:
2637 2637 copy = copies.pathcopies(ctx1, ctx2, match=match)
2638 2638
2639 2639 if copysourcematch:
2640 2640 # filter out copies where source side isn't inside the matcher
2641 2641 # (copies.pathcopies() already filtered out the destination)
2642 2642 copy = {dst: src for dst, src in copy.items() if copysourcematch(src)}
2643 2643
2644 2644 modifiedset = set(modified)
2645 2645 addedset = set(added)
2646 2646 removedset = set(removed)
2647 2647 for f in modified:
2648 2648 if f not in ctx1:
2649 2649 # Fix up added, since merged-in additions appear as
2650 2650 # modifications during merges
2651 2651 modifiedset.remove(f)
2652 2652 addedset.add(f)
2653 2653 for f in removed:
2654 2654 if f not in ctx1:
2655 2655 # Merged-in additions that are then removed are reported as removed.
2656 2656 # They are not in ctx1, so We don't want to show them in the diff.
2657 2657 removedset.remove(f)
2658 2658 modified = sorted(modifiedset)
2659 2659 added = sorted(addedset)
2660 2660 removed = sorted(removedset)
2661 2661 for dst, src in list(copy.items()):
2662 2662 if src not in ctx1:
2663 2663 # Files merged in during a merge and then copied/renamed are
2664 2664 # reported as copies. We want to show them in the diff as additions.
2665 2665 del copy[dst]
2666 2666
2667 2667 prefetchmatch = scmutil.matchfiles(
2668 2668 repo, list(modifiedset | addedset | removedset)
2669 2669 )
2670 2670 revmatches = [
2671 2671 (ctx1.rev(), prefetchmatch),
2672 2672 (ctx2.rev(), prefetchmatch),
2673 2673 ]
2674 2674 scmutil.prefetchfiles(repo, revmatches)
2675 2675
2676 2676 def difffn(opts, losedata):
2677 2677 return trydiff(
2678 2678 repo,
2679 2679 revs,
2680 2680 ctx1,
2681 2681 ctx2,
2682 2682 modified,
2683 2683 added,
2684 2684 removed,
2685 2685 copy,
2686 2686 getfilectx,
2687 2687 opts,
2688 2688 losedata,
2689 2689 pathfn,
2690 2690 )
2691 2691
2692 2692 if opts.upgrade and not opts.git:
2693 2693 try:
2694 2694
2695 2695 def losedata(fn):
2696 2696 if not losedatafn or not losedatafn(fn=fn):
2697 2697 raise GitDiffRequired
2698 2698
2699 2699 # Buffer the whole output until we are sure it can be generated
2700 2700 return list(difffn(opts.copy(git=False), losedata))
2701 2701 except GitDiffRequired:
2702 2702 return difffn(opts.copy(git=True), None)
2703 2703 else:
2704 2704 return difffn(opts, None)
2705 2705
2706 2706
2707 2707 def diffsinglehunk(hunklines):
2708 2708 """yield tokens for a list of lines in a single hunk"""
2709 2709 for line in hunklines:
2710 2710 # chomp
2711 2711 chompline = line.rstrip(b'\r\n')
2712 2712 # highlight tabs and trailing whitespace
2713 2713 stripline = chompline.rstrip()
2714 2714 if line.startswith(b'-'):
2715 2715 label = b'diff.deleted'
2716 2716 elif line.startswith(b'+'):
2717 2717 label = b'diff.inserted'
2718 2718 else:
2719 2719 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2720 2720 for token in tabsplitter.findall(stripline):
2721 2721 if token.startswith(b'\t'):
2722 2722 yield (token, b'diff.tab')
2723 2723 else:
2724 2724 yield (token, label)
2725 2725
2726 2726 if chompline != stripline:
2727 2727 yield (chompline[len(stripline) :], b'diff.trailingwhitespace')
2728 2728 if chompline != line:
2729 2729 yield (line[len(chompline) :], b'')
2730 2730
2731 2731
2732 2732 def diffsinglehunkinline(hunklines):
2733 2733 """yield tokens for a list of lines in a single hunk, with inline colors"""
2734 2734 # prepare deleted, and inserted content
2735 2735 a = bytearray()
2736 2736 b = bytearray()
2737 2737 for line in hunklines:
2738 2738 if line[0:1] == b'-':
2739 2739 a += line[1:]
2740 2740 elif line[0:1] == b'+':
2741 2741 b += line[1:]
2742 2742 else:
2743 2743 raise error.ProgrammingError(b'unexpected hunk line: %s' % line)
2744 2744 # fast path: if either side is empty, use diffsinglehunk
2745 2745 if not a or not b:
2746 2746 for t in diffsinglehunk(hunklines):
2747 2747 yield t
2748 2748 return
2749 2749 # re-split the content into words
2750 2750 al = wordsplitter.findall(bytes(a))
2751 2751 bl = wordsplitter.findall(bytes(b))
2752 2752 # re-arrange the words to lines since the diff algorithm is line-based
2753 2753 aln = [s if s == b'\n' else s + b'\n' for s in al]
2754 2754 bln = [s if s == b'\n' else s + b'\n' for s in bl]
2755 2755 an = b''.join(aln)
2756 2756 bn = b''.join(bln)
2757 2757 # run the diff algorithm, prepare atokens and btokens
2758 2758 atokens = []
2759 2759 btokens = []
2760 2760 blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
2761 2761 for (a1, a2, b1, b2), btype in blocks:
2762 2762 changed = btype == b'!'
2763 2763 for token in mdiff.splitnewlines(b''.join(al[a1:a2])):
2764 2764 atokens.append((changed, token))
2765 2765 for token in mdiff.splitnewlines(b''.join(bl[b1:b2])):
2766 2766 btokens.append((changed, token))
2767 2767
2768 2768 # yield deleted tokens, then inserted ones
2769 2769 for prefix, label, tokens in [
2770 2770 (b'-', b'diff.deleted', atokens),
2771 2771 (b'+', b'diff.inserted', btokens),
2772 2772 ]:
2773 2773 nextisnewline = True
2774 2774 for changed, token in tokens:
2775 2775 if nextisnewline:
2776 2776 yield (prefix, label)
2777 2777 nextisnewline = False
2778 2778 # special handling line end
2779 2779 isendofline = token.endswith(b'\n')
2780 2780 if isendofline:
2781 2781 chomp = token[:-1] # chomp
2782 2782 if chomp.endswith(b'\r'):
2783 2783 chomp = chomp[:-1]
2784 2784 endofline = token[len(chomp) :]
2785 2785 token = chomp.rstrip() # detect spaces at the end
2786 2786 endspaces = chomp[len(token) :]
2787 2787 # scan tabs
2788 2788 for maybetab in tabsplitter.findall(token):
2789 2789 if b'\t' == maybetab[0:1]:
2790 2790 currentlabel = b'diff.tab'
2791 2791 else:
2792 2792 if changed:
2793 2793 currentlabel = label + b'.changed'
2794 2794 else:
2795 2795 currentlabel = label + b'.unchanged'
2796 2796 yield (maybetab, currentlabel)
2797 2797 if isendofline:
2798 2798 if endspaces:
2799 2799 yield (endspaces, b'diff.trailingwhitespace')
2800 2800 yield (endofline, b'')
2801 2801 nextisnewline = True
2802 2802
2803 2803
2804 2804 def difflabel(func, *args, **kw):
2805 2805 '''yields 2-tuples of (output, label) based on the output of func()'''
2806 2806 if kw.get('opts') and kw['opts'].worddiff:
2807 2807 dodiffhunk = diffsinglehunkinline
2808 2808 else:
2809 2809 dodiffhunk = diffsinglehunk
2810 2810 headprefixes = [
2811 2811 (b'diff', b'diff.diffline'),
2812 2812 (b'copy', b'diff.extended'),
2813 2813 (b'rename', b'diff.extended'),
2814 2814 (b'old', b'diff.extended'),
2815 2815 (b'new', b'diff.extended'),
2816 2816 (b'deleted', b'diff.extended'),
2817 2817 (b'index', b'diff.extended'),
2818 2818 (b'similarity', b'diff.extended'),
2819 2819 (b'---', b'diff.file_a'),
2820 2820 (b'+++', b'diff.file_b'),
2821 2821 ]
2822 2822 textprefixes = [
2823 2823 (b'@', b'diff.hunk'),
2824 2824 # - and + are handled by diffsinglehunk
2825 2825 ]
2826 2826 head = False
2827 2827
2828 2828 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
2829 2829 hunkbuffer = []
2830 2830
2831 2831 def consumehunkbuffer():
2832 2832 if hunkbuffer:
2833 2833 for token in dodiffhunk(hunkbuffer):
2834 2834 yield token
2835 2835 hunkbuffer[:] = []
2836 2836
2837 2837 for chunk in func(*args, **kw):
2838 2838 lines = chunk.split(b'\n')
2839 2839 linecount = len(lines)
2840 2840 for i, line in enumerate(lines):
2841 2841 if head:
2842 2842 if line.startswith(b'@'):
2843 2843 head = False
2844 2844 else:
2845 2845 if line and not line.startswith(
2846 2846 (b' ', b'+', b'-', b'@', b'\\')
2847 2847 ):
2848 2848 head = True
2849 2849 diffline = False
2850 2850 if not head and line and line.startswith((b'+', b'-')):
2851 2851 diffline = True
2852 2852
2853 2853 prefixes = textprefixes
2854 2854 if head:
2855 2855 prefixes = headprefixes
2856 2856 if diffline:
2857 2857 # buffered
2858 2858 bufferedline = line
2859 2859 if i + 1 < linecount:
2860 2860 bufferedline += b"\n"
2861 2861 hunkbuffer.append(bufferedline)
2862 2862 else:
2863 2863 # unbuffered
2864 2864 for token in consumehunkbuffer():
2865 2865 yield token
2866 2866 stripline = line.rstrip()
2867 2867 for prefix, label in prefixes:
2868 2868 if stripline.startswith(prefix):
2869 2869 yield (stripline, label)
2870 2870 if line != stripline:
2871 2871 yield (
2872 2872 line[len(stripline) :],
2873 2873 b'diff.trailingwhitespace',
2874 2874 )
2875 2875 break
2876 2876 else:
2877 2877 yield (line, b'')
2878 2878 if i + 1 < linecount:
2879 2879 yield (b'\n', b'')
2880 2880 for token in consumehunkbuffer():
2881 2881 yield token
2882 2882
2883 2883
2884 2884 def diffui(*args, **kw):
2885 2885 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
2886 2886 return difflabel(diff, *args, **kw)
2887 2887
2888 2888
2889 2889 def _filepairs(modified, added, removed, copy, opts):
2890 2890 """generates tuples (f1, f2, copyop), where f1 is the name of the file
2891 2891 before and f2 is the the name after. For added files, f1 will be None,
2892 2892 and for removed files, f2 will be None. copyop may be set to None, 'copy'
2893 2893 or 'rename' (the latter two only if opts.git is set)."""
2894 2894 gone = set()
2895 2895
2896 2896 copyto = {v: k for k, v in copy.items()}
2897 2897
2898 2898 addedset, removedset = set(added), set(removed)
2899 2899
2900 2900 for f in sorted(modified + added + removed):
2901 2901 copyop = None
2902 2902 f1, f2 = f, f
2903 2903 if f in addedset:
2904 2904 f1 = None
2905 2905 if f in copy:
2906 2906 if opts.git:
2907 2907 f1 = copy[f]
2908 2908 if f1 in removedset and f1 not in gone:
2909 2909 copyop = b'rename'
2910 2910 gone.add(f1)
2911 2911 else:
2912 2912 copyop = b'copy'
2913 2913 elif f in removedset:
2914 2914 f2 = None
2915 2915 if opts.git:
2916 2916 # have we already reported a copy above?
2917 2917 if (
2918 2918 f in copyto
2919 2919 and copyto[f] in addedset
2920 2920 and copy[copyto[f]] == f
2921 2921 ):
2922 2922 continue
2923 2923 yield f1, f2, copyop
2924 2924
2925 2925
2926 2926 def _gitindex(text):
2927 2927 if not text:
2928 2928 text = b""
2929 2929 l = len(text)
2930 2930 s = hashutil.sha1(b'blob %d\0' % l)
2931 2931 s.update(text)
2932 2932 return hex(s.digest())
2933 2933
2934 2934
2935 2935 _gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'}
2936 2936
2937 2937
2938 2938 def trydiff(
2939 2939 repo,
2940 2940 revs,
2941 2941 ctx1,
2942 2942 ctx2,
2943 2943 modified,
2944 2944 added,
2945 2945 removed,
2946 2946 copy,
2947 2947 getfilectx,
2948 2948 opts,
2949 2949 losedatafn,
2950 2950 pathfn,
2951 2951 ):
2952 2952 """given input data, generate a diff and yield it in blocks
2953 2953
2954 2954 If generating a diff would lose data like flags or binary data and
2955 2955 losedatafn is not None, it will be called.
2956 2956
2957 2957 pathfn is applied to every path in the diff output.
2958 2958 """
2959 2959
2960 2960 if opts.noprefix:
2961 2961 aprefix = bprefix = b''
2962 2962 else:
2963 2963 aprefix = b'a/'
2964 2964 bprefix = b'b/'
2965 2965
2966 2966 def diffline(f, revs):
2967 2967 revinfo = b' '.join([b"-r %s" % rev for rev in revs])
2968 2968 return b'diff %s %s' % (revinfo, f)
2969 2969
2970 2970 def isempty(fctx):
2971 2971 return fctx is None or fctx.size() == 0
2972 2972
2973 2973 date1 = dateutil.datestr(ctx1.date())
2974 2974 date2 = dateutil.datestr(ctx2.date())
2975 2975
2976 2976 if not pathfn:
2977 2977 pathfn = lambda f: f
2978 2978
2979 2979 for f1, f2, copyop in _filepairs(modified, added, removed, copy, opts):
2980 2980 content1 = None
2981 2981 content2 = None
2982 2982 fctx1 = None
2983 2983 fctx2 = None
2984 2984 flag1 = None
2985 2985 flag2 = None
2986 2986 if f1:
2987 2987 fctx1 = getfilectx(f1, ctx1)
2988 2988 if opts.git or losedatafn:
2989 2989 flag1 = ctx1.flags(f1)
2990 2990 if f2:
2991 2991 fctx2 = getfilectx(f2, ctx2)
2992 2992 if opts.git or losedatafn:
2993 2993 flag2 = ctx2.flags(f2)
2994 2994 # if binary is True, output "summary" or "base85", but not "text diff"
2995 2995 if opts.text:
2996 2996 binary = False
2997 2997 else:
2998 2998 binary = any(f.isbinary() for f in [fctx1, fctx2] if f is not None)
2999 2999
3000 3000 if losedatafn and not opts.git:
3001 3001 if (
3002 3002 binary
3003 3003 or
3004 3004 # copy/rename
3005 3005 f2 in copy
3006 3006 or
3007 3007 # empty file creation
3008 3008 (not f1 and isempty(fctx2))
3009 3009 or
3010 3010 # empty file deletion
3011 3011 (isempty(fctx1) and not f2)
3012 3012 or
3013 3013 # create with flags
3014 3014 (not f1 and flag2)
3015 3015 or
3016 3016 # change flags
3017 3017 (f1 and f2 and flag1 != flag2)
3018 3018 ):
3019 3019 losedatafn(f2 or f1)
3020 3020
3021 3021 path1 = pathfn(f1 or f2)
3022 3022 path2 = pathfn(f2 or f1)
3023 3023 header = []
3024 3024 if opts.git:
3025 3025 header.append(
3026 3026 b'diff --git %s%s %s%s' % (aprefix, path1, bprefix, path2)
3027 3027 )
3028 3028 if not f1: # added
3029 3029 header.append(b'new file mode %s' % _gitmode[flag2])
3030 3030 elif not f2: # removed
3031 3031 header.append(b'deleted file mode %s' % _gitmode[flag1])
3032 3032 else: # modified/copied/renamed
3033 3033 mode1, mode2 = _gitmode[flag1], _gitmode[flag2]
3034 3034 if mode1 != mode2:
3035 3035 header.append(b'old mode %s' % mode1)
3036 3036 header.append(b'new mode %s' % mode2)
3037 3037 if copyop is not None:
3038 3038 if opts.showsimilarity:
3039 3039 sim = similar.score(ctx1[path1], ctx2[path2]) * 100
3040 3040 header.append(b'similarity index %d%%' % sim)
3041 3041 header.append(b'%s from %s' % (copyop, path1))
3042 3042 header.append(b'%s to %s' % (copyop, path2))
3043 3043 elif revs:
3044 3044 header.append(diffline(path1, revs))
3045 3045
3046 3046 # fctx.is | diffopts | what to | is fctx.data()
3047 3047 # binary() | text nobinary git index | output? | outputted?
3048 3048 # ------------------------------------|----------------------------
3049 3049 # yes | no no no * | summary | no
3050 3050 # yes | no no yes * | base85 | yes
3051 3051 # yes | no yes no * | summary | no
3052 3052 # yes | no yes yes 0 | summary | no
3053 3053 # yes | no yes yes >0 | summary | semi [1]
3054 3054 # yes | yes * * * | text diff | yes
3055 3055 # no | * * * * | text diff | yes
3056 3056 # [1]: hash(fctx.data()) is outputted. so fctx.data() cannot be faked
3057 3057 if binary and (
3058 3058 not opts.git or (opts.git and opts.nobinary and not opts.index)
3059 3059 ):
3060 3060 # fast path: no binary content will be displayed, content1 and
3061 3061 # content2 are only used for equivalent test. cmp() could have a
3062 3062 # fast path.
3063 3063 if fctx1 is not None:
3064 3064 content1 = b'\0'
3065 3065 if fctx2 is not None:
3066 3066 if fctx1 is not None and not fctx1.cmp(fctx2):
3067 3067 content2 = b'\0' # not different
3068 3068 else:
3069 3069 content2 = b'\0\0'
3070 3070 else:
3071 3071 # normal path: load contents
3072 3072 if fctx1 is not None:
3073 3073 content1 = fctx1.data()
3074 3074 if fctx2 is not None:
3075 3075 content2 = fctx2.data()
3076 3076
3077 3077 data1 = (ctx1, fctx1, path1, flag1, content1, date1)
3078 3078 data2 = (ctx2, fctx2, path2, flag2, content2, date2)
3079 3079 yield diffcontent(data1, data2, header, binary, opts)
3080 3080
3081 3081
3082 3082 def diffcontent(data1, data2, header, binary, opts):
3083 3083 """diffs two versions of a file.
3084 3084
3085 3085 data1 and data2 are tuples containg:
3086 3086
3087 3087 * ctx: changeset for the file
3088 3088 * fctx: file context for that file
3089 3089 * path1: name of the file
3090 3090 * flag: flags of the file
3091 3091 * content: full content of the file (can be null in case of binary)
3092 3092 * date: date of the changeset
3093 3093
3094 3094 header: the patch header
3095 3095 binary: whether the any of the version of file is binary or not
3096 3096 opts: user passed options
3097 3097
3098 3098 It exists as a separate function so that extensions like extdiff can wrap
3099 3099 it and use the file content directly.
3100 3100 """
3101 3101
3102 3102 ctx1, fctx1, path1, flag1, content1, date1 = data1
3103 3103 ctx2, fctx2, path2, flag2, content2, date2 = data2
3104 3104 index1 = _gitindex(content1) if path1 in ctx1 else sha1nodeconstants.nullhex
3105 3105 index2 = _gitindex(content2) if path2 in ctx2 else sha1nodeconstants.nullhex
3106 3106 if binary and opts.git and not opts.nobinary:
3107 3107 text = mdiff.b85diff(content1, content2)
3108 3108 if text:
3109 3109 header.append(b'index %s..%s' % (index1, index2))
3110 3110 hunks = ((None, [text]),)
3111 3111 else:
3112 3112 if opts.git and opts.index > 0:
3113 3113 flag = flag1
3114 3114 if flag is None:
3115 3115 flag = flag2
3116 3116 header.append(
3117 3117 b'index %s..%s %s'
3118 3118 % (
3119 3119 index1[0 : opts.index],
3120 3120 index2[0 : opts.index],
3121 3121 _gitmode[flag],
3122 3122 )
3123 3123 )
3124 3124
3125 3125 uheaders, hunks = mdiff.unidiff(
3126 3126 content1,
3127 3127 date1,
3128 3128 content2,
3129 3129 date2,
3130 3130 path1,
3131 3131 path2,
3132 3132 binary=binary,
3133 3133 opts=opts,
3134 3134 )
3135 3135 header.extend(uheaders)
3136 3136 return fctx1, fctx2, header, hunks
3137 3137
3138 3138
3139 3139 def diffstatsum(stats):
3140 3140 maxfile, maxtotal, addtotal, removetotal, binary = 0, 0, 0, 0, False
3141 3141 for f, a, r, b in stats:
3142 3142 maxfile = max(maxfile, encoding.colwidth(f))
3143 3143 maxtotal = max(maxtotal, a + r)
3144 3144 addtotal += a
3145 3145 removetotal += r
3146 3146 binary = binary or b
3147 3147
3148 3148 return maxfile, maxtotal, addtotal, removetotal, binary
3149 3149
3150 3150
3151 3151 def diffstatdata(lines):
3152 3152 diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$')
3153 3153
3154 3154 results = []
3155 3155 filename, adds, removes, isbinary = None, 0, 0, False
3156 3156
3157 3157 def addresult():
3158 3158 if filename:
3159 3159 results.append((filename, adds, removes, isbinary))
3160 3160
3161 3161 # inheader is used to track if a line is in the
3162 3162 # header portion of the diff. This helps properly account
3163 3163 # for lines that start with '--' or '++'
3164 3164 inheader = False
3165 3165
3166 3166 for line in lines:
3167 3167 if line.startswith(b'diff'):
3168 3168 addresult()
3169 3169 # starting a new file diff
3170 3170 # set numbers to 0 and reset inheader
3171 3171 inheader = True
3172 3172 adds, removes, isbinary = 0, 0, False
3173 3173 if line.startswith(b'diff --git a/'):
3174 3174 filename = gitre.search(line).group(2)
3175 3175 elif line.startswith(b'diff -r'):
3176 3176 # format: "diff -r ... -r ... filename"
3177 3177 filename = diffre.search(line).group(1)
3178 3178 elif line.startswith(b'@@'):
3179 3179 inheader = False
3180 3180 elif line.startswith(b'+') and not inheader:
3181 3181 adds += 1
3182 3182 elif line.startswith(b'-') and not inheader:
3183 3183 removes += 1
3184 3184 elif line.startswith(b'GIT binary patch') or line.startswith(
3185 3185 b'Binary file'
3186 3186 ):
3187 3187 isbinary = True
3188 3188 elif line.startswith(b'rename from'):
3189 3189 filename = line[12:]
3190 3190 elif line.startswith(b'rename to'):
3191 3191 filename += b' => %s' % line[10:]
3192 3192 addresult()
3193 3193 return results
3194 3194
3195 3195
3196 3196 def diffstat(lines, width=80):
3197 3197 output = []
3198 3198 stats = diffstatdata(lines)
3199 3199 maxname, maxtotal, totaladds, totalremoves, hasbinary = diffstatsum(stats)
3200 3200
3201 3201 countwidth = len(str(maxtotal))
3202 3202 if hasbinary and countwidth < 3:
3203 3203 countwidth = 3
3204 3204 graphwidth = width - countwidth - maxname - 6
3205 3205 if graphwidth < 10:
3206 3206 graphwidth = 10
3207 3207
3208 3208 def scale(i):
3209 3209 if maxtotal <= graphwidth:
3210 3210 return i
3211 3211 # If diffstat runs out of room it doesn't print anything,
3212 3212 # which isn't very useful, so always print at least one + or -
3213 3213 # if there were at least some changes.
3214 3214 return max(i * graphwidth // maxtotal, int(bool(i)))
3215 3215
3216 3216 for filename, adds, removes, isbinary in stats:
3217 3217 if isbinary:
3218 3218 count = b'Bin'
3219 3219 else:
3220 3220 count = b'%d' % (adds + removes)
3221 3221 pluses = b'+' * scale(adds)
3222 3222 minuses = b'-' * scale(removes)
3223 3223 output.append(
3224 3224 b' %s%s | %*s %s%s\n'
3225 3225 % (
3226 3226 filename,
3227 3227 b' ' * (maxname - encoding.colwidth(filename)),
3228 3228 countwidth,
3229 3229 count,
3230 3230 pluses,
3231 3231 minuses,
3232 3232 )
3233 3233 )
3234 3234
3235 3235 if stats:
3236 3236 output.append(
3237 3237 _(b' %d files changed, %d insertions(+), %d deletions(-)\n')
3238 3238 % (len(stats), totaladds, totalremoves)
3239 3239 )
3240 3240
3241 3241 return b''.join(output)
3242 3242
3243 3243
3244 3244 def diffstatui(*args, **kw):
3245 3245 """like diffstat(), but yields 2-tuples of (output, label) for
3246 3246 ui.write()
3247 3247 """
3248 3248
3249 3249 for line in diffstat(*args, **kw).splitlines():
3250 3250 if line and line[-1] in b'+-':
3251 3251 name, graph = line.rsplit(b' ', 1)
3252 3252 yield (name + b' ', b'')
3253 3253 m = re.search(br'\++', graph)
3254 3254 if m:
3255 3255 yield (m.group(0), b'diffstat.inserted')
3256 3256 m = re.search(br'-+', graph)
3257 3257 if m:
3258 3258 yield (m.group(0), b'diffstat.deleted')
3259 3259 else:
3260 3260 yield (line, b'')
3261 3261 yield (b'\n', b'')
@@ -1,849 +1,849 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import errno
10 10 import functools
11 11 import os
12 12 import re
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .node import hex
18 18 from . import (
19 19 changelog,
20 20 error,
21 21 manifest,
22 22 policy,
23 23 pycompat,
24 24 util,
25 25 vfs as vfsmod,
26 26 )
27 27 from .utils import hashutil
28 28
29 29 parsers = policy.importmod('parsers')
30 30 # how much bytes should be read from fncache in one read
31 31 # It is done to prevent loading large fncache files into memory
32 32 fncache_chunksize = 10 ** 6
33 33
34 34
35 35 def _matchtrackedpath(path, matcher):
36 36 """parses a fncache entry and returns whether the entry is tracking a path
37 37 matched by matcher or not.
38 38
39 39 If matcher is None, returns True"""
40 40
41 41 if matcher is None:
42 42 return True
43 43 path = decodedir(path)
44 44 if path.startswith(b'data/'):
45 45 return matcher(path[len(b'data/') : -len(b'.i')])
46 46 elif path.startswith(b'meta/'):
47 47 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 48
49 49 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 50
51 51
52 52 # This avoids a collision between a file named foo and a dir named
53 53 # foo.i or foo.d
54 54 def _encodedir(path):
55 55 """
56 56 >>> _encodedir(b'data/foo.i')
57 57 'data/foo.i'
58 58 >>> _encodedir(b'data/foo.i/bla.i')
59 59 'data/foo.i.hg/bla.i'
60 60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 61 'data/foo.i.hg.hg/bla.i'
62 62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 64 """
65 65 return (
66 66 path.replace(b".hg/", b".hg.hg/")
67 67 .replace(b".i/", b".i.hg/")
68 68 .replace(b".d/", b".d.hg/")
69 69 )
70 70
71 71
72 72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 73
74 74
75 75 def decodedir(path):
76 76 """
77 77 >>> decodedir(b'data/foo.i')
78 78 'data/foo.i'
79 79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 80 'data/foo.i/bla.i'
81 81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 82 'data/foo.i.hg/bla.i'
83 83 """
84 84 if b".hg/" not in path:
85 85 return path
86 86 return (
87 87 path.replace(b".d.hg/", b".d/")
88 88 .replace(b".i.hg/", b".i/")
89 89 .replace(b".hg.hg/", b".hg/")
90 90 )
91 91
92 92
93 93 def _reserved():
94 94 """characters that are problematic for filesystems
95 95
96 96 * ascii escapes (0..31)
97 97 * ascii hi (126..255)
98 98 * windows specials
99 99
100 100 these characters will be escaped by encodefunctions
101 101 """
102 102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 103 for x in range(32):
104 104 yield x
105 105 for x in range(126, 256):
106 106 yield x
107 107 for x in winreserved:
108 108 yield x
109 109
110 110
111 111 def _buildencodefun():
112 112 """
113 113 >>> enc, dec = _buildencodefun()
114 114
115 115 >>> enc(b'nothing/special.txt')
116 116 'nothing/special.txt'
117 117 >>> dec(b'nothing/special.txt')
118 118 'nothing/special.txt'
119 119
120 120 >>> enc(b'HELLO')
121 121 '_h_e_l_l_o'
122 122 >>> dec(b'_h_e_l_l_o')
123 123 'HELLO'
124 124
125 125 >>> enc(b'hello:world?')
126 126 'hello~3aworld~3f'
127 127 >>> dec(b'hello~3aworld~3f')
128 128 'hello:world?'
129 129
130 130 >>> enc(b'the\\x07quick\\xADshot')
131 131 'the~07quick~adshot'
132 132 >>> dec(b'the~07quick~adshot')
133 133 'the\\x07quick\\xadshot'
134 134 """
135 135 e = b'_'
136 136 xchr = pycompat.bytechr
137 137 asciistr = list(map(xchr, range(127)))
138 138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 139
140 140 cmap = {x: x for x in asciistr}
141 141 for x in _reserved():
142 142 cmap[xchr(x)] = b"~%02x" % x
143 143 for x in capitals + [ord(e)]:
144 144 cmap[xchr(x)] = e + xchr(x).lower()
145 145
146 146 dmap = {}
147 147 for k, v in cmap.items():
148 148 dmap[v] = k
149 149
150 150 def decode(s):
151 151 i = 0
152 152 while i < len(s):
153 153 for l in pycompat.xrange(1, 4):
154 154 try:
155 155 yield dmap[s[i : i + l]]
156 156 i += l
157 157 break
158 158 except KeyError:
159 159 pass
160 160 else:
161 161 raise KeyError
162 162
163 163 return (
164 164 lambda s: b''.join(
165 165 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
166 166 ),
167 167 lambda s: b''.join(list(decode(s))),
168 168 )
169 169
170 170
171 171 _encodefname, _decodefname = _buildencodefun()
172 172
173 173
174 174 def encodefilename(s):
175 175 """
176 176 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
177 177 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
178 178 """
179 179 return _encodefname(encodedir(s))
180 180
181 181
182 182 def decodefilename(s):
183 183 """
184 184 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
185 185 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
186 186 """
187 187 return decodedir(_decodefname(s))
188 188
189 189
190 190 def _buildlowerencodefun():
191 191 """
192 192 >>> f = _buildlowerencodefun()
193 193 >>> f(b'nothing/special.txt')
194 194 'nothing/special.txt'
195 195 >>> f(b'HELLO')
196 196 'hello'
197 197 >>> f(b'hello:world?')
198 198 'hello~3aworld~3f'
199 199 >>> f(b'the\\x07quick\\xADshot')
200 200 'the~07quick~adshot'
201 201 """
202 202 xchr = pycompat.bytechr
203 203 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
204 204 for x in _reserved():
205 205 cmap[xchr(x)] = b"~%02x" % x
206 206 for x in range(ord(b"A"), ord(b"Z") + 1):
207 207 cmap[xchr(x)] = xchr(x).lower()
208 208
209 209 def lowerencode(s):
210 210 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
211 211
212 212 return lowerencode
213 213
214 214
215 215 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
216 216
217 217 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
218 218 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
219 219 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
220 220
221 221
222 222 def _auxencode(path, dotencode):
223 223 """
224 224 Encodes filenames containing names reserved by Windows or which end in
225 225 period or space. Does not touch other single reserved characters c.
226 226 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
227 227 Additionally encodes space or period at the beginning, if dotencode is
228 228 True. Parameter path is assumed to be all lowercase.
229 229 A segment only needs encoding if a reserved name appears as a
230 230 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
231 231 doesn't need encoding.
232 232
233 233 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
234 234 >>> _auxencode(s.split(b'/'), True)
235 235 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
236 236 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
237 237 >>> _auxencode(s.split(b'/'), False)
238 238 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
239 239 >>> _auxencode([b'foo. '], True)
240 240 ['foo.~20']
241 241 >>> _auxencode([b' .foo'], True)
242 242 ['~20.foo']
243 243 """
244 244 for i, n in enumerate(path):
245 245 if not n:
246 246 continue
247 247 if dotencode and n[0] in b'. ':
248 248 n = b"~%02x" % ord(n[0:1]) + n[1:]
249 249 path[i] = n
250 250 else:
251 251 l = n.find(b'.')
252 252 if l == -1:
253 253 l = len(n)
254 254 if (l == 3 and n[:3] in _winres3) or (
255 255 l == 4
256 256 and n[3:4] <= b'9'
257 257 and n[3:4] >= b'1'
258 258 and n[:3] in _winres4
259 259 ):
260 260 # encode third letter ('aux' -> 'au~78')
261 261 ec = b"~%02x" % ord(n[2:3])
262 262 n = n[0:2] + ec + n[3:]
263 263 path[i] = n
264 264 if n[-1] in b'. ':
265 265 # encode last period or space ('foo...' -> 'foo..~2e')
266 266 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
267 267 return path
268 268
269 269
270 270 _maxstorepathlen = 120
271 271 _dirprefixlen = 8
272 272 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
273 273
274 274
275 275 def _hashencode(path, dotencode):
276 276 digest = hex(hashutil.sha1(path).digest())
277 277 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
278 278 parts = _auxencode(le, dotencode)
279 279 basename = parts[-1]
280 280 _root, ext = os.path.splitext(basename)
281 281 sdirs = []
282 282 sdirslen = 0
283 283 for p in parts[:-1]:
284 284 d = p[:_dirprefixlen]
285 285 if d[-1] in b'. ':
286 286 # Windows can't access dirs ending in period or space
287 287 d = d[:-1] + b'_'
288 288 if sdirslen == 0:
289 289 t = len(d)
290 290 else:
291 291 t = sdirslen + 1 + len(d)
292 292 if t > _maxshortdirslen:
293 293 break
294 294 sdirs.append(d)
295 295 sdirslen = t
296 296 dirs = b'/'.join(sdirs)
297 297 if len(dirs) > 0:
298 298 dirs += b'/'
299 299 res = b'dh/' + dirs + digest + ext
300 300 spaceleft = _maxstorepathlen - len(res)
301 301 if spaceleft > 0:
302 302 filler = basename[:spaceleft]
303 303 res = b'dh/' + dirs + filler + digest + ext
304 304 return res
305 305
306 306
307 307 def _hybridencode(path, dotencode):
308 308 """encodes path with a length limit
309 309
310 310 Encodes all paths that begin with 'data/', according to the following.
311 311
312 312 Default encoding (reversible):
313 313
314 314 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
315 315 characters are encoded as '~xx', where xx is the two digit hex code
316 316 of the character (see encodefilename).
317 317 Relevant path components consisting of Windows reserved filenames are
318 318 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
319 319
320 320 Hashed encoding (not reversible):
321 321
322 322 If the default-encoded path is longer than _maxstorepathlen, a
323 323 non-reversible hybrid hashing of the path is done instead.
324 324 This encoding uses up to _dirprefixlen characters of all directory
325 325 levels of the lowerencoded path, but not more levels than can fit into
326 326 _maxshortdirslen.
327 327 Then follows the filler followed by the sha digest of the full path.
328 328 The filler is the beginning of the basename of the lowerencoded path
329 329 (the basename is everything after the last path separator). The filler
330 330 is as long as possible, filling in characters from the basename until
331 331 the encoded path has _maxstorepathlen characters (or all chars of the
332 332 basename have been taken).
333 333 The extension (e.g. '.i' or '.d') is preserved.
334 334
335 335 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
336 336 encoding was used.
337 337 """
338 338 path = encodedir(path)
339 339 ef = _encodefname(path).split(b'/')
340 340 res = b'/'.join(_auxencode(ef, dotencode))
341 341 if len(res) > _maxstorepathlen:
342 342 res = _hashencode(path, dotencode)
343 343 return res
344 344
345 345
346 346 def _pathencode(path):
347 347 de = encodedir(path)
348 348 if len(path) > _maxstorepathlen:
349 349 return _hashencode(de, True)
350 350 ef = _encodefname(de).split(b'/')
351 351 res = b'/'.join(_auxencode(ef, True))
352 352 if len(res) > _maxstorepathlen:
353 353 return _hashencode(de, True)
354 354 return res
355 355
356 356
357 357 _pathencode = getattr(parsers, 'pathencode', _pathencode)
358 358
359 359
360 360 def _plainhybridencode(f):
361 361 return _hybridencode(f, False)
362 362
363 363
364 364 def _calcmode(vfs):
365 365 try:
366 366 # files in .hg/ will be created using this mode
367 367 mode = vfs.stat().st_mode
368 368 # avoid some useless chmods
369 369 if (0o777 & ~util.umask) == (0o777 & mode):
370 370 mode = None
371 371 except OSError:
372 372 mode = None
373 373 return mode
374 374
375 375
376 376 _data = [
377 377 b'bookmarks',
378 378 b'narrowspec',
379 379 b'data',
380 380 b'meta',
381 381 b'00manifest.d',
382 382 b'00manifest.i',
383 383 b'00changelog.d',
384 384 b'00changelog.i',
385 385 b'phaseroots',
386 386 b'obsstore',
387 387 b'requires',
388 388 ]
389 389
390 390 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
391 391 REVLOG_FILES_OTHER_EXT = (
392 392 b'.idx',
393 393 b'.d',
394 394 b'.dat',
395 395 b'.n',
396 396 b'.nd',
397 397 b'.sda',
398 398 b'd.tmpcensored',
399 399 )
400 400 # files that are "volatile" and might change between listing and streaming
401 401 #
402 402 # note: the ".nd" file are nodemap data and won't "change" but they might be
403 403 # deleted.
404 404 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
405 405
406 406 # some exception to the above matching
407 407 #
408 408 # XXX This is currently not in use because of issue6542
409 409 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
410 410
411 411
412 412 def is_revlog(f, kind, st):
413 413 if kind != stat.S_IFREG:
414 414 return None
415 415 return revlog_type(f)
416 416
417 417
418 418 def revlog_type(f):
419 419 # XXX we need to filter `undo.` created by the transaction here, however
420 420 # being naive about it also filter revlog for `undo.*` files, leading to
421 421 # issue6542. So we no longer use EXCLUDED.
422 422 if f.endswith(REVLOG_FILES_MAIN_EXT):
423 423 return FILEFLAGS_REVLOG_MAIN
424 424 elif f.endswith(REVLOG_FILES_OTHER_EXT):
425 425 t = FILETYPE_FILELOG_OTHER
426 426 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
427 427 t |= FILEFLAGS_VOLATILE
428 428 return t
429 429 return None
430 430
431 431
432 432 # the file is part of changelog data
433 433 FILEFLAGS_CHANGELOG = 1 << 13
434 434 # the file is part of manifest data
435 435 FILEFLAGS_MANIFESTLOG = 1 << 12
436 436 # the file is part of filelog data
437 437 FILEFLAGS_FILELOG = 1 << 11
438 438 # file that are not directly part of a revlog
439 439 FILEFLAGS_OTHER = 1 << 10
440 440
441 441 # the main entry point for a revlog
442 442 FILEFLAGS_REVLOG_MAIN = 1 << 1
443 443 # a secondary file for a revlog
444 444 FILEFLAGS_REVLOG_OTHER = 1 << 0
445 445
446 446 # files that are "volatile" and might change between listing and streaming
447 447 FILEFLAGS_VOLATILE = 1 << 20
448 448
449 449 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
450 450 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
451 451 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
452 452 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
453 453 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
454 454 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
455 455 FILETYPE_OTHER = FILEFLAGS_OTHER
456 456
457 457
458 458 class basicstore(object):
459 459 '''base class for local repository stores'''
460 460
461 461 def __init__(self, path, vfstype):
462 462 vfs = vfstype(path)
463 463 self.path = vfs.base
464 464 self.createmode = _calcmode(vfs)
465 465 vfs.createmode = self.createmode
466 466 self.rawvfs = vfs
467 467 self.vfs = vfsmod.filtervfs(vfs, encodedir)
468 468 self.opener = self.vfs
469 469
470 470 def join(self, f):
471 471 return self.path + b'/' + encodedir(f)
472 472
473 473 def _walk(self, relpath, recurse):
474 474 '''yields (revlog_type, unencoded, size)'''
475 475 path = self.path
476 476 if relpath:
477 477 path += b'/' + relpath
478 478 striplen = len(self.path) + 1
479 479 l = []
480 480 if self.rawvfs.isdir(path):
481 481 visit = [path]
482 482 readdir = self.rawvfs.readdir
483 483 while visit:
484 484 p = visit.pop()
485 485 for f, kind, st in readdir(p, stat=True):
486 486 fp = p + b'/' + f
487 487 rl_type = is_revlog(f, kind, st)
488 488 if rl_type is not None:
489 489 n = util.pconvert(fp[striplen:])
490 490 l.append((rl_type, decodedir(n), st.st_size))
491 491 elif kind == stat.S_IFDIR and recurse:
492 492 visit.append(fp)
493 493 l.sort()
494 494 return l
495 495
496 496 def changelog(self, trypending, concurrencychecker=None):
497 497 return changelog.changelog(
498 498 self.vfs,
499 499 trypending=trypending,
500 500 concurrencychecker=concurrencychecker,
501 501 )
502 502
503 503 def manifestlog(self, repo, storenarrowmatch):
504 504 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
505 505 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
506 506
507 507 def datafiles(self, matcher=None, undecodable=None):
508 508 """Like walk, but excluding the changelog and root manifest.
509 509
510 510 When [undecodable] is None, revlogs names that can't be
511 511 decoded cause an exception. When it is provided, it should
512 512 be a list and the filenames that can't be decoded are added
513 513 to it instead. This is very rarely needed."""
514 514 files = self._walk(b'data', True) + self._walk(b'meta', True)
515 515 for (t, u, s) in files:
516 516 yield (FILEFLAGS_FILELOG | t, u, s)
517 517
518 518 def topfiles(self):
519 519 # yield manifest before changelog
520 520 files = reversed(self._walk(b'', False))
521 521 for (t, u, s) in files:
522 522 if u.startswith(b'00changelog'):
523 523 yield (FILEFLAGS_CHANGELOG | t, u, s)
524 524 elif u.startswith(b'00manifest'):
525 525 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
526 526 else:
527 527 yield (FILETYPE_OTHER | t, u, s)
528 528
529 529 def walk(self, matcher=None):
530 530 """return file related to data storage (ie: revlogs)
531 531
532 532 yields (file_type, unencoded, size)
533 533
534 534 if a matcher is passed, storage files of only those tracked paths
535 535 are passed with matches the matcher
536 536 """
537 537 # yield data files first
538 538 for x in self.datafiles(matcher):
539 539 yield x
540 540 for x in self.topfiles():
541 541 yield x
542 542
543 543 def copylist(self):
544 544 return _data
545 545
546 546 def write(self, tr):
547 547 pass
548 548
549 549 def invalidatecaches(self):
550 550 pass
551 551
552 552 def markremoved(self, fn):
553 553 pass
554 554
555 555 def __contains__(self, path):
556 556 '''Checks if the store contains path'''
557 557 path = b"/".join((b"data", path))
558 558 # file?
559 559 if self.vfs.exists(path + b".i"):
560 560 return True
561 561 # dir?
562 562 if not path.endswith(b"/"):
563 563 path = path + b"/"
564 564 return self.vfs.exists(path)
565 565
566 566
567 567 class encodedstore(basicstore):
568 568 def __init__(self, path, vfstype):
569 569 vfs = vfstype(path + b'/store')
570 570 self.path = vfs.base
571 571 self.createmode = _calcmode(vfs)
572 572 vfs.createmode = self.createmode
573 573 self.rawvfs = vfs
574 574 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
575 575 self.opener = self.vfs
576 576
577 577 # note: topfiles would also need a decode phase. It is just that in
578 578 # practice we do not have any file outside of `data/` that needs encoding.
579 579 # However that might change so we should probably add a test and encoding
580 580 # decoding for it too. see issue6548
581 581
582 582 def datafiles(self, matcher=None, undecodable=None):
583 583 for t, f1, size in super(encodedstore, self).datafiles():
584 584 try:
585 585 f2 = decodefilename(f1)
586 586 except KeyError:
587 587 if undecodable is None:
588 588 msg = _(b'undecodable revlog name %s') % f1
589 589 raise error.StorageError(msg)
590 590 else:
591 591 undecodable.append(f1)
592 592 continue
593 593 if not _matchtrackedpath(f2, matcher):
594 594 continue
595 595 yield t, f2, size
596 596
597 597 def join(self, f):
598 598 return self.path + b'/' + encodefilename(f)
599 599
600 600 def copylist(self):
601 601 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
602 602
603 603
604 604 class fncache(object):
605 605 # the filename used to be partially encoded
606 606 # hence the encodedir/decodedir dance
607 607 def __init__(self, vfs):
608 608 self.vfs = vfs
609 609 self.entries = None
610 610 self._dirty = False
611 611 # set of new additions to fncache
612 612 self.addls = set()
613 613
614 614 def ensureloaded(self, warn=None):
615 615 """read the fncache file if not already read.
616 616
617 617 If the file on disk is corrupted, raise. If warn is provided,
618 618 warn and keep going instead."""
619 619 if self.entries is None:
620 620 self._load(warn)
621 621
622 622 def _load(self, warn=None):
623 623 '''fill the entries from the fncache file'''
624 624 self._dirty = False
625 625 try:
626 626 fp = self.vfs(b'fncache', mode=b'rb')
627 627 except IOError:
628 628 # skip nonexistent file
629 629 self.entries = set()
630 630 return
631 631
632 632 self.entries = set()
633 633 chunk = b''
634 634 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
635 635 chunk += c
636 636 try:
637 637 p = chunk.rindex(b'\n')
638 638 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
639 639 chunk = chunk[p + 1 :]
640 640 except ValueError:
641 641 # substring '\n' not found, maybe the entry is bigger than the
642 642 # chunksize, so let's keep iterating
643 643 pass
644 644
645 645 if chunk:
646 646 msg = _(b"fncache does not ends with a newline")
647 647 if warn:
648 648 warn(msg + b'\n')
649 649 else:
650 650 raise error.Abort(
651 651 msg,
652 652 hint=_(
653 653 b"use 'hg debugrebuildfncache' to "
654 654 b"rebuild the fncache"
655 655 ),
656 656 )
657 657 self._checkentries(fp, warn)
658 658 fp.close()
659 659
660 660 def _checkentries(self, fp, warn):
661 661 """make sure there is no empty string in entries"""
662 662 if b'' in self.entries:
663 663 fp.seek(0)
664 for n, line in enumerate(util.iterfile(fp)):
664 for n, line in enumerate(fp):
665 665 if not line.rstrip(b'\n'):
666 666 t = _(b'invalid entry in fncache, line %d') % (n + 1)
667 667 if warn:
668 668 warn(t + b'\n')
669 669 else:
670 670 raise error.Abort(t)
671 671
672 672 def write(self, tr):
673 673 if self._dirty:
674 674 assert self.entries is not None
675 675 self.entries = self.entries | self.addls
676 676 self.addls = set()
677 677 tr.addbackup(b'fncache')
678 678 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
679 679 if self.entries:
680 680 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
681 681 fp.close()
682 682 self._dirty = False
683 683 if self.addls:
684 684 # if we have just new entries, let's append them to the fncache
685 685 tr.addbackup(b'fncache')
686 686 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
687 687 if self.addls:
688 688 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
689 689 fp.close()
690 690 self.entries = None
691 691 self.addls = set()
692 692
693 693 def add(self, fn):
694 694 if self.entries is None:
695 695 self._load()
696 696 if fn not in self.entries:
697 697 self.addls.add(fn)
698 698
699 699 def remove(self, fn):
700 700 if self.entries is None:
701 701 self._load()
702 702 if fn in self.addls:
703 703 self.addls.remove(fn)
704 704 return
705 705 try:
706 706 self.entries.remove(fn)
707 707 self._dirty = True
708 708 except KeyError:
709 709 pass
710 710
711 711 def __contains__(self, fn):
712 712 if fn in self.addls:
713 713 return True
714 714 if self.entries is None:
715 715 self._load()
716 716 return fn in self.entries
717 717
718 718 def __iter__(self):
719 719 if self.entries is None:
720 720 self._load()
721 721 return iter(self.entries | self.addls)
722 722
723 723
724 724 class _fncachevfs(vfsmod.proxyvfs):
725 725 def __init__(self, vfs, fnc, encode):
726 726 vfsmod.proxyvfs.__init__(self, vfs)
727 727 self.fncache = fnc
728 728 self.encode = encode
729 729
730 730 def __call__(self, path, mode=b'r', *args, **kw):
731 731 encoded = self.encode(path)
732 732 if mode not in (b'r', b'rb') and (
733 733 path.startswith(b'data/') or path.startswith(b'meta/')
734 734 ):
735 735 # do not trigger a fncache load when adding a file that already is
736 736 # known to exist.
737 737 notload = self.fncache.entries is None and self.vfs.exists(encoded)
738 738 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
739 739 # when appending to an existing file, if the file has size zero,
740 740 # it should be considered as missing. Such zero-size files are
741 741 # the result of truncation when a transaction is aborted.
742 742 notload = False
743 743 if not notload:
744 744 self.fncache.add(path)
745 745 return self.vfs(encoded, mode, *args, **kw)
746 746
747 747 def join(self, path):
748 748 if path:
749 749 return self.vfs.join(self.encode(path))
750 750 else:
751 751 return self.vfs.join(path)
752 752
753 753 def register_file(self, path):
754 754 """generic hook point to lets fncache steer its stew"""
755 755 if path.startswith(b'data/') or path.startswith(b'meta/'):
756 756 self.fncache.add(path)
757 757
758 758
759 759 class fncachestore(basicstore):
760 760 def __init__(self, path, vfstype, dotencode):
761 761 if dotencode:
762 762 encode = _pathencode
763 763 else:
764 764 encode = _plainhybridencode
765 765 self.encode = encode
766 766 vfs = vfstype(path + b'/store')
767 767 self.path = vfs.base
768 768 self.pathsep = self.path + b'/'
769 769 self.createmode = _calcmode(vfs)
770 770 vfs.createmode = self.createmode
771 771 self.rawvfs = vfs
772 772 fnc = fncache(vfs)
773 773 self.fncache = fnc
774 774 self.vfs = _fncachevfs(vfs, fnc, encode)
775 775 self.opener = self.vfs
776 776
777 777 def join(self, f):
778 778 return self.pathsep + self.encode(f)
779 779
780 780 def getsize(self, path):
781 781 return self.rawvfs.stat(path).st_size
782 782
783 783 def datafiles(self, matcher=None, undecodable=None):
784 784 for f in sorted(self.fncache):
785 785 if not _matchtrackedpath(f, matcher):
786 786 continue
787 787 ef = self.encode(f)
788 788 try:
789 789 t = revlog_type(f)
790 790 assert t is not None, f
791 791 t |= FILEFLAGS_FILELOG
792 792 yield t, f, self.getsize(ef)
793 793 except OSError as err:
794 794 if err.errno != errno.ENOENT:
795 795 raise
796 796
797 797 def copylist(self):
798 798 d = (
799 799 b'bookmarks',
800 800 b'narrowspec',
801 801 b'data',
802 802 b'meta',
803 803 b'dh',
804 804 b'fncache',
805 805 b'phaseroots',
806 806 b'obsstore',
807 807 b'00manifest.d',
808 808 b'00manifest.i',
809 809 b'00changelog.d',
810 810 b'00changelog.i',
811 811 b'requires',
812 812 )
813 813 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
814 814
815 815 def write(self, tr):
816 816 self.fncache.write(tr)
817 817
818 818 def invalidatecaches(self):
819 819 self.fncache.entries = None
820 820 self.fncache.addls = set()
821 821
822 822 def markremoved(self, fn):
823 823 self.fncache.remove(fn)
824 824
825 825 def _exists(self, f):
826 826 ef = self.encode(f)
827 827 try:
828 828 self.getsize(ef)
829 829 return True
830 830 except OSError as err:
831 831 if err.errno != errno.ENOENT:
832 832 raise
833 833 # nonexistent entry
834 834 return False
835 835
836 836 def __contains__(self, path):
837 837 '''Checks if the store contains path'''
838 838 path = b"/".join((b"data", path))
839 839 # check for files (exact match)
840 840 e = path + b'.i'
841 841 if e in self.fncache and self._exists(e):
842 842 return True
843 843 # now check for directories (prefix match)
844 844 if not path.endswith(b'/'):
845 845 path += b'/'
846 846 for e in self.fncache:
847 847 if e.startswith(path) and self._exists(e):
848 848 return True
849 849 return False
General Comments 0
You need to be logged in to leave comments. Login now