##// END OF EJS Templates
py3: pass unicode strings to hasattr() throughout...
Martin von Zweigbergk -
r52039:de9cbc5a stable
parent child Browse files
Show More
@@ -1,1071 +1,1071 b''
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import functools
9 9 import os
10 10 import pickle
11 11 import re
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.pycompat import open
15 15 from mercurial import (
16 16 encoding,
17 17 error,
18 18 hook,
19 19 pycompat,
20 20 util,
21 21 )
22 22 from mercurial.utils import (
23 23 dateutil,
24 24 procutil,
25 25 stringutil,
26 26 )
27 27
28 28
29 29 class logentry:
30 30 """Class logentry has the following attributes:
31 31 .author - author name as CVS knows it
32 32 .branch - name of branch this revision is on
33 33 .branches - revision tuple of branches starting at this revision
34 34 .comment - commit message
35 35 .commitid - CVS commitid or None
36 36 .date - the commit date as a (time, tz) tuple
37 37 .dead - true if file revision is dead
38 38 .file - Name of file
39 39 .lines - a tuple (+lines, -lines) or None
40 40 .parent - Previous revision of this entry
41 41 .rcs - name of file as returned from CVS
42 42 .revision - revision number as tuple
43 43 .tags - list of tags on the file
44 44 .synthetic - is this a synthetic "file ... added on ..." revision?
45 45 .mergepoint - the branch that has been merged from (if present in
46 46 rlog output) or None
47 47 .branchpoints - the branches that start at the current entry or empty
48 48 """
49 49
50 50 def __init__(self, **entries):
51 51 self.synthetic = False
52 52 self.__dict__.update(entries)
53 53
54 54 def __repr__(self):
55 55 items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__))
56 56 return "%s(%s)" % (type(self).__name__, ", ".join(items))
57 57
58 58
59 59 class logerror(Exception):
60 60 pass
61 61
62 62
63 63 def getrepopath(cvspath):
64 64 """Return the repository path from a CVS path.
65 65
66 66 >>> getrepopath(b'/foo/bar')
67 67 '/foo/bar'
68 68 >>> getrepopath(b'c:/foo/bar')
69 69 '/foo/bar'
70 70 >>> getrepopath(b':pserver:10/foo/bar')
71 71 '/foo/bar'
72 72 >>> getrepopath(b':pserver:10c:/foo/bar')
73 73 '/foo/bar'
74 74 >>> getrepopath(b':pserver:/foo/bar')
75 75 '/foo/bar'
76 76 >>> getrepopath(b':pserver:c:/foo/bar')
77 77 '/foo/bar'
78 78 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
79 79 '/foo/bar'
80 80 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
81 81 '/foo/bar'
82 82 >>> getrepopath(b'user@server/path/to/repository')
83 83 '/path/to/repository'
84 84 """
85 85 # According to CVS manual, CVS paths are expressed like:
86 86 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
87 87 #
88 88 # CVSpath is splitted into parts and then position of the first occurrence
89 89 # of the '/' char after the '@' is located. The solution is the rest of the
90 90 # string after that '/' sign including it
91 91
92 92 parts = cvspath.split(b':')
93 93 atposition = parts[-1].find(b'@')
94 94 start = 0
95 95
96 96 if atposition != -1:
97 97 start = atposition
98 98
99 99 repopath = parts[-1][parts[-1].find(b'/', start) :]
100 100 return repopath
101 101
102 102
103 103 def createlog(ui, directory=None, root=b"", rlog=True, cache=None):
104 104 '''Collect the CVS rlog'''
105 105
106 106 # Because we store many duplicate commit log messages, reusing strings
107 107 # saves a lot of memory and pickle storage space.
108 108 _scache = {}
109 109
110 110 def scache(s):
111 111 """return a shared version of a string"""
112 112 return _scache.setdefault(s, s)
113 113
114 114 ui.status(_(b'collecting CVS rlog\n'))
115 115
116 116 log = [] # list of logentry objects containing the CVS state
117 117
118 118 # patterns to match in CVS (r)log output, by state of use
119 119 re_00 = re.compile(b'RCS file: (.+)$')
120 120 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
121 121 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
122 122 re_03 = re.compile(
123 123 b"(Cannot access.+CVSROOT)|(can't create temporary directory.+)$"
124 124 )
125 125 re_10 = re.compile(b'Working file: (.+)$')
126 126 re_20 = re.compile(b'symbolic names:')
127 127 re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
128 128 re_31 = re.compile(b'----------------------------$')
129 129 re_32 = re.compile(
130 130 b'======================================='
131 131 b'======================================$'
132 132 )
133 133 re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
134 134 re_60 = re.compile(
135 135 br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
136 136 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
137 137 br'(\s+commitid:\s+([^;]+);)?'
138 138 br'(.*mergepoint:\s+([^;]+);)?'
139 139 )
140 140 re_70 = re.compile(b'branches: (.+);$')
141 141
142 142 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
143 143
144 144 prefix = b'' # leading path to strip of what we get from CVS
145 145
146 146 if directory is None:
147 147 # Current working directory
148 148
149 149 # Get the real directory in the repository
150 150 try:
151 151 with open(os.path.join(b'CVS', b'Repository'), b'rb') as f:
152 152 prefix = f.read().strip()
153 153 directory = prefix
154 154 if prefix == b".":
155 155 prefix = b""
156 156 except IOError:
157 157 raise logerror(_(b'not a CVS sandbox'))
158 158
159 159 if prefix and not prefix.endswith(pycompat.ossep):
160 160 prefix += pycompat.ossep
161 161
162 162 # Use the Root file in the sandbox, if it exists
163 163 try:
164 164 root = open(os.path.join(b'CVS', b'Root'), b'rb').read().strip()
165 165 except IOError:
166 166 pass
167 167
168 168 if not root:
169 169 root = encoding.environ.get(b'CVSROOT', b'')
170 170
171 171 # read log cache if one exists
172 172 oldlog = []
173 173 date = None
174 174
175 175 if cache:
176 176 cachedir = os.path.expanduser(b'~/.hg.cvsps')
177 177 if not os.path.exists(cachedir):
178 178 os.mkdir(cachedir)
179 179
180 180 # The cvsps cache pickle needs a uniquified name, based on the
181 181 # repository location. The address may have all sort of nasties
182 182 # in it, slashes, colons and such. So here we take just the
183 183 # alphanumeric characters, concatenated in a way that does not
184 184 # mix up the various components, so that
185 185 # :pserver:user@server:/path
186 186 # and
187 187 # /pserver/user/server/path
188 188 # are mapped to different cache file names.
189 189 cachefile = root.split(b":") + [directory, b"cache"]
190 190 cachefile = [b'-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
191 191 cachefile = os.path.join(
192 192 cachedir, b'.'.join([s for s in cachefile if s])
193 193 )
194 194
195 195 if cache == b'update':
196 196 try:
197 197 ui.note(_(b'reading cvs log cache %s\n') % cachefile)
198 198 oldlog = pickle.load(open(cachefile, b'rb'))
199 199 for e in oldlog:
200 200 if not (
201 hasattr(e, b'branchpoints')
202 and hasattr(e, b'commitid')
203 and hasattr(e, b'mergepoint')
201 hasattr(e, 'branchpoints')
202 and hasattr(e, 'commitid')
203 and hasattr(e, 'mergepoint')
204 204 ):
205 205 ui.status(_(b'ignoring old cache\n'))
206 206 oldlog = []
207 207 break
208 208
209 209 ui.note(_(b'cache has %d log entries\n') % len(oldlog))
210 210 except Exception as e:
211 211 ui.note(_(b'error reading cache: %r\n') % e)
212 212
213 213 if oldlog:
214 214 date = oldlog[-1].date # last commit date as a (time,tz) tuple
215 215 date = dateutil.datestr(date, b'%Y/%m/%d %H:%M:%S %1%2')
216 216
217 217 # build the CVS commandline
218 218 cmd = [b'cvs', b'-q']
219 219 if root:
220 220 cmd.append(b'-d%s' % root)
221 221 p = util.normpath(getrepopath(root))
222 222 if not p.endswith(b'/'):
223 223 p += b'/'
224 224 if prefix:
225 225 # looks like normpath replaces "" by "."
226 226 prefix = p + util.normpath(prefix)
227 227 else:
228 228 prefix = p
229 229 cmd.append([b'log', b'rlog'][rlog])
230 230 if date:
231 231 # no space between option and date string
232 232 cmd.append(b'-d>%s' % date)
233 233 cmd.append(directory)
234 234
235 235 # state machine begins here
236 236 tags = {} # dictionary of revisions on current file with their tags
237 237 branchmap = {} # mapping between branch names and revision numbers
238 238 rcsmap = {}
239 239 state = 0
240 240 store = False # set when a new record can be appended
241 241
242 242 cmd = [procutil.shellquote(arg) for arg in cmd]
243 243 ui.note(_(b"running %s\n") % (b' '.join(cmd)))
244 244 ui.debug(b"prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
245 245
246 246 pfp = procutil.popen(b' '.join(cmd), b'rb')
247 247 peek = util.fromnativeeol(pfp.readline())
248 248 while True:
249 249 line = peek
250 250 if line == b'':
251 251 break
252 252 peek = util.fromnativeeol(pfp.readline())
253 253 if line.endswith(b'\n'):
254 254 line = line[:-1]
255 255 # ui.debug('state=%d line=%r\n' % (state, line))
256 256
257 257 if state == 0:
258 258 # initial state, consume input until we see 'RCS file'
259 259 match = re_00.match(line)
260 260 if match:
261 261 rcs = match.group(1)
262 262 tags = {}
263 263 if rlog:
264 264 filename = util.normpath(rcs[:-2])
265 265 if filename.startswith(prefix):
266 266 filename = filename[len(prefix) :]
267 267 if filename.startswith(b'/'):
268 268 filename = filename[1:]
269 269 if filename.startswith(b'Attic/'):
270 270 filename = filename[6:]
271 271 else:
272 272 filename = filename.replace(b'/Attic/', b'/')
273 273 state = 2
274 274 continue
275 275 state = 1
276 276 continue
277 277 match = re_01.match(line)
278 278 if match:
279 279 raise logerror(match.group(1))
280 280 match = re_02.match(line)
281 281 if match:
282 282 raise logerror(match.group(2))
283 283 if re_03.match(line):
284 284 raise logerror(line)
285 285
286 286 elif state == 1:
287 287 # expect 'Working file' (only when using log instead of rlog)
288 288 match = re_10.match(line)
289 289 assert match, _(b'RCS file must be followed by working file')
290 290 filename = util.normpath(match.group(1))
291 291 state = 2
292 292
293 293 elif state == 2:
294 294 # expect 'symbolic names'
295 295 if re_20.match(line):
296 296 branchmap = {}
297 297 state = 3
298 298
299 299 elif state == 3:
300 300 # read the symbolic names and store as tags
301 301 match = re_30.match(line)
302 302 if match:
303 303 rev = [int(x) for x in match.group(2).split(b'.')]
304 304
305 305 # Convert magic branch number to an odd-numbered one
306 306 revn = len(rev)
307 307 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
308 308 rev = rev[:-2] + rev[-1:]
309 309 rev = tuple(rev)
310 310
311 311 if rev not in tags:
312 312 tags[rev] = []
313 313 tags[rev].append(match.group(1))
314 314 branchmap[match.group(1)] = match.group(2)
315 315
316 316 elif re_31.match(line):
317 317 state = 5
318 318 elif re_32.match(line):
319 319 state = 0
320 320
321 321 elif state == 4:
322 322 # expecting '------' separator before first revision
323 323 if re_31.match(line):
324 324 state = 5
325 325 else:
326 326 assert not re_32.match(line), _(
327 327 b'must have at least some revisions'
328 328 )
329 329
330 330 elif state == 5:
331 331 # expecting revision number and possibly (ignored) lock indication
332 332 # we create the logentry here from values stored in states 0 to 4,
333 333 # as this state is re-entered for subsequent revisions of a file.
334 334 match = re_50.match(line)
335 335 assert match, _(b'expected revision number')
336 336 e = logentry(
337 337 rcs=scache(rcs),
338 338 file=scache(filename),
339 339 revision=tuple([int(x) for x in match.group(1).split(b'.')]),
340 340 branches=[],
341 341 parent=None,
342 342 commitid=None,
343 343 mergepoint=None,
344 344 branchpoints=set(),
345 345 )
346 346
347 347 state = 6
348 348
349 349 elif state == 6:
350 350 # expecting date, author, state, lines changed
351 351 match = re_60.match(line)
352 352 assert match, _(b'revision must be followed by date line')
353 353 d = match.group(1)
354 354 if d[2] == b'/':
355 355 # Y2K
356 356 d = b'19' + d
357 357
358 358 if len(d.split()) != 3:
359 359 # cvs log dates always in GMT
360 360 d = d + b' UTC'
361 361 e.date = dateutil.parsedate(
362 362 d,
363 363 [
364 364 b'%y/%m/%d %H:%M:%S',
365 365 b'%Y/%m/%d %H:%M:%S',
366 366 b'%Y-%m-%d %H:%M:%S',
367 367 ],
368 368 )
369 369 e.author = scache(match.group(2))
370 370 e.dead = match.group(3).lower() == b'dead'
371 371
372 372 if match.group(5):
373 373 if match.group(6):
374 374 e.lines = (int(match.group(5)), int(match.group(6)))
375 375 else:
376 376 e.lines = (int(match.group(5)), 0)
377 377 elif match.group(6):
378 378 e.lines = (0, int(match.group(6)))
379 379 else:
380 380 e.lines = None
381 381
382 382 if match.group(7): # cvs 1.12 commitid
383 383 e.commitid = match.group(8)
384 384
385 385 if match.group(9): # cvsnt mergepoint
386 386 myrev = match.group(10).split(b'.')
387 387 if len(myrev) == 2: # head
388 388 e.mergepoint = b'HEAD'
389 389 else:
390 390 myrev = b'.'.join(myrev[:-2] + [b'0', myrev[-2]])
391 391 branches = [b for b in branchmap if branchmap[b] == myrev]
392 392 assert len(branches) == 1, (
393 393 b'unknown branch: %s' % e.mergepoint
394 394 )
395 395 e.mergepoint = branches[0]
396 396
397 397 e.comment = []
398 398 state = 7
399 399
400 400 elif state == 7:
401 401 # read the revision numbers of branches that start at this revision
402 402 # or store the commit log message otherwise
403 403 m = re_70.match(line)
404 404 if m:
405 405 e.branches = [
406 406 tuple([int(y) for y in x.strip().split(b'.')])
407 407 for x in m.group(1).split(b';')
408 408 ]
409 409 state = 8
410 410 elif re_31.match(line) and re_50.match(peek):
411 411 state = 5
412 412 store = True
413 413 elif re_32.match(line):
414 414 state = 0
415 415 store = True
416 416 else:
417 417 e.comment.append(line)
418 418
419 419 elif state == 8:
420 420 # store commit log message
421 421 if re_31.match(line):
422 422 cpeek = peek
423 423 if cpeek.endswith(b'\n'):
424 424 cpeek = cpeek[:-1]
425 425 if re_50.match(cpeek):
426 426 state = 5
427 427 store = True
428 428 else:
429 429 e.comment.append(line)
430 430 elif re_32.match(line):
431 431 state = 0
432 432 store = True
433 433 else:
434 434 e.comment.append(line)
435 435
436 436 # When a file is added on a branch B1, CVS creates a synthetic
437 437 # dead trunk revision 1.1 so that the branch has a root.
438 438 # Likewise, if you merge such a file to a later branch B2 (one
439 439 # that already existed when the file was added on B1), CVS
440 440 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
441 441 # these revisions now, but mark them synthetic so
442 442 # createchangeset() can take care of them.
443 443 if (
444 444 store
445 445 and e.dead
446 446 and e.revision[-1] == 1
447 447 and len(e.comment) == 1 # 1.1 or 1.1.x.1
448 448 and file_added_re.match(e.comment[0])
449 449 ):
450 450 ui.debug(
451 451 b'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0])
452 452 )
453 453 e.synthetic = True
454 454
455 455 if store:
456 456 # clean up the results and save in the log.
457 457 store = False
458 458 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
459 459 e.comment = scache(b'\n'.join(e.comment))
460 460
461 461 revn = len(e.revision)
462 462 if revn > 3 and (revn % 2) == 0:
463 463 e.branch = tags.get(e.revision[:-1], [None])[0]
464 464 else:
465 465 e.branch = None
466 466
467 467 # find the branches starting from this revision
468 468 branchpoints = set()
469 469 for branch, revision in branchmap.items():
470 470 revparts = tuple([int(i) for i in revision.split(b'.')])
471 471 if len(revparts) < 2: # bad tags
472 472 continue
473 473 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
474 474 # normal branch
475 475 if revparts[:-2] == e.revision:
476 476 branchpoints.add(branch)
477 477 elif revparts == (1, 1, 1): # vendor branch
478 478 if revparts in e.branches:
479 479 branchpoints.add(branch)
480 480 e.branchpoints = branchpoints
481 481
482 482 log.append(e)
483 483
484 484 rcsmap[e.rcs.replace(b'/Attic/', b'/')] = e.rcs
485 485
486 486 if len(log) % 100 == 0:
487 487 ui.status(
488 488 stringutil.ellipsis(b'%d %s' % (len(log), e.file), 80)
489 489 + b'\n'
490 490 )
491 491
492 492 log.sort(key=lambda x: (x.rcs, x.revision))
493 493
494 494 # find parent revisions of individual files
495 495 versions = {}
496 496 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
497 497 rcs = e.rcs.replace(b'/Attic/', b'/')
498 498 if rcs in rcsmap:
499 499 e.rcs = rcsmap[rcs]
500 500 branch = e.revision[:-1]
501 501 versions[(e.rcs, branch)] = e.revision
502 502
503 503 for e in log:
504 504 branch = e.revision[:-1]
505 505 p = versions.get((e.rcs, branch), None)
506 506 if p is None:
507 507 p = e.revision[:-2]
508 508 e.parent = p
509 509 versions[(e.rcs, branch)] = e.revision
510 510
511 511 # update the log cache
512 512 if cache:
513 513 if log:
514 514 # join up the old and new logs
515 515 log.sort(key=lambda x: x.date)
516 516
517 517 if oldlog and oldlog[-1].date >= log[0].date:
518 518 raise logerror(
519 519 _(
520 520 b'log cache overlaps with new log entries,'
521 521 b' re-run without cache.'
522 522 )
523 523 )
524 524
525 525 log = oldlog + log
526 526
527 527 # write the new cachefile
528 528 ui.note(_(b'writing cvs log cache %s\n') % cachefile)
529 529 pickle.dump(log, open(cachefile, b'wb'))
530 530 else:
531 531 log = oldlog
532 532
533 533 ui.status(_(b'%d log entries\n') % len(log))
534 534
535 535 encodings = ui.configlist(b'convert', b'cvsps.logencoding')
536 536 if encodings:
537 537
538 538 def revstr(r):
539 539 # this is needed, because logentry.revision is a tuple of "int"
540 540 # (e.g. (1, 2) for "1.2")
541 541 return b'.'.join(pycompat.maplist(pycompat.bytestr, r))
542 542
543 543 for entry in log:
544 544 comment = entry.comment
545 545 for e in encodings:
546 546 try:
547 547 entry.comment = comment.decode(pycompat.sysstr(e)).encode(
548 548 'utf-8'
549 549 )
550 550 if ui.debugflag:
551 551 ui.debug(
552 552 b"transcoding by %s: %s of %s\n"
553 553 % (e, revstr(entry.revision), entry.file)
554 554 )
555 555 break
556 556 except UnicodeDecodeError:
557 557 pass # try next encoding
558 558 except LookupError as inst: # unknown encoding, maybe
559 559 raise error.Abort(
560 560 pycompat.bytestr(inst),
561 561 hint=_(
562 562 b'check convert.cvsps.logencoding configuration'
563 563 ),
564 564 )
565 565 else:
566 566 raise error.Abort(
567 567 _(
568 568 b"no encoding can transcode"
569 569 b" CVS log message for %s of %s"
570 570 )
571 571 % (revstr(entry.revision), entry.file),
572 572 hint=_(b'check convert.cvsps.logencoding configuration'),
573 573 )
574 574
575 575 hook.hook(ui, None, b"cvslog", True, log=log)
576 576
577 577 return log
578 578
579 579
580 580 class changeset:
581 581 """Class changeset has the following attributes:
582 582 .id - integer identifying this changeset (list index)
583 583 .author - author name as CVS knows it
584 584 .branch - name of branch this changeset is on, or None
585 585 .comment - commit message
586 586 .commitid - CVS commitid or None
587 587 .date - the commit date as a (time,tz) tuple
588 588 .entries - list of logentry objects in this changeset
589 589 .parents - list of one or two parent changesets
590 590 .tags - list of tags on this changeset
591 591 .synthetic - from synthetic revision "file ... added on branch ..."
592 592 .mergepoint- the branch that has been merged from or None
593 593 .branchpoints- the branches that start at the current entry or empty
594 594 """
595 595
596 596 def __init__(self, **entries):
597 597 self.id = None
598 598 self.synthetic = False
599 599 self.__dict__.update(entries)
600 600
601 601 def __repr__(self):
602 602 items = (
603 603 b"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)
604 604 )
605 605 return b"%s(%s)" % (type(self).__name__, b", ".join(items))
606 606
607 607
608 608 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
609 609 '''Convert log into changesets.'''
610 610
611 611 ui.status(_(b'creating changesets\n'))
612 612
613 613 # try to order commitids by date
614 614 mindate = {}
615 615 for e in log:
616 616 if e.commitid:
617 617 if e.commitid not in mindate:
618 618 mindate[e.commitid] = e.date
619 619 else:
620 620 mindate[e.commitid] = min(e.date, mindate[e.commitid])
621 621
622 622 # Merge changesets
623 623 log.sort(
624 624 key=lambda x: (
625 625 mindate.get(x.commitid, (-1, 0)),
626 626 x.commitid or b'',
627 627 x.comment,
628 628 x.author,
629 629 x.branch or b'',
630 630 x.date,
631 631 x.branchpoints,
632 632 )
633 633 )
634 634
635 635 changesets = []
636 636 files = set()
637 637 c = None
638 638 for i, e in enumerate(log):
639 639
640 640 # Check if log entry belongs to the current changeset or not.
641 641
642 642 # Since CVS is file-centric, two different file revisions with
643 643 # different branchpoints should be treated as belonging to two
644 644 # different changesets (and the ordering is important and not
645 645 # honoured by cvsps at this point).
646 646 #
647 647 # Consider the following case:
648 648 # foo 1.1 branchpoints: [MYBRANCH]
649 649 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
650 650 #
651 651 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
652 652 # later version of foo may be in MYBRANCH2, so foo should be the
653 653 # first changeset and bar the next and MYBRANCH and MYBRANCH2
654 654 # should both start off of the bar changeset. No provisions are
655 655 # made to ensure that this is, in fact, what happens.
656 656 if not (
657 657 c
658 658 and e.branchpoints == c.branchpoints
659 659 and ( # cvs commitids
660 660 (e.commitid is not None and e.commitid == c.commitid)
661 661 or ( # no commitids, use fuzzy commit detection
662 662 (e.commitid is None or c.commitid is None)
663 663 and e.comment == c.comment
664 664 and e.author == c.author
665 665 and e.branch == c.branch
666 666 and (
667 667 (c.date[0] + c.date[1])
668 668 <= (e.date[0] + e.date[1])
669 669 <= (c.date[0] + c.date[1]) + fuzz
670 670 )
671 671 and e.file not in files
672 672 )
673 673 )
674 674 ):
675 675 c = changeset(
676 676 comment=e.comment,
677 677 author=e.author,
678 678 branch=e.branch,
679 679 date=e.date,
680 680 entries=[],
681 681 mergepoint=e.mergepoint,
682 682 branchpoints=e.branchpoints,
683 683 commitid=e.commitid,
684 684 )
685 685 changesets.append(c)
686 686
687 687 files = set()
688 688 if len(changesets) % 100 == 0:
689 689 t = b'%d %s' % (
690 690 len(changesets),
691 691 pycompat.byterepr(e.comment)[2:-1],
692 692 )
693 693 ui.status(stringutil.ellipsis(t, 80) + b'\n')
694 694
695 695 c.entries.append(e)
696 696 files.add(e.file)
697 697 c.date = e.date # changeset date is date of latest commit in it
698 698
699 699 # Mark synthetic changesets
700 700
701 701 for c in changesets:
702 702 # Synthetic revisions always get their own changeset, because
703 703 # the log message includes the filename. E.g. if you add file3
704 704 # and file4 on a branch, you get four log entries and three
705 705 # changesets:
706 706 # "File file3 was added on branch ..." (synthetic, 1 entry)
707 707 # "File file4 was added on branch ..." (synthetic, 1 entry)
708 708 # "Add file3 and file4 to fix ..." (real, 2 entries)
709 709 # Hence the check for 1 entry here.
710 710 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
711 711
712 712 # Sort files in each changeset
713 713
714 714 def entitycompare(l, r):
715 715 """Mimic cvsps sorting order"""
716 716 l = l.file.split(b'/')
717 717 r = r.file.split(b'/')
718 718 nl = len(l)
719 719 nr = len(r)
720 720 n = min(nl, nr)
721 721 for i in range(n):
722 722 if i + 1 == nl and nl < nr:
723 723 return -1
724 724 elif i + 1 == nr and nl > nr:
725 725 return +1
726 726 elif l[i] < r[i]:
727 727 return -1
728 728 elif l[i] > r[i]:
729 729 return +1
730 730 return 0
731 731
732 732 for c in changesets:
733 733 c.entries.sort(key=functools.cmp_to_key(entitycompare))
734 734
735 735 # Sort changesets by date
736 736
737 737 odd = set()
738 738
739 739 def cscmp(l, r):
740 740 d = sum(l.date) - sum(r.date)
741 741 if d:
742 742 return d
743 743
744 744 # detect vendor branches and initial commits on a branch
745 745 le = {}
746 746 for e in l.entries:
747 747 le[e.rcs] = e.revision
748 748 re = {}
749 749 for e in r.entries:
750 750 re[e.rcs] = e.revision
751 751
752 752 d = 0
753 753 for e in l.entries:
754 754 if re.get(e.rcs, None) == e.parent:
755 755 assert not d
756 756 d = 1
757 757 break
758 758
759 759 for e in r.entries:
760 760 if le.get(e.rcs, None) == e.parent:
761 761 if d:
762 762 odd.add((l, r))
763 763 d = -1
764 764 break
765 765 # By this point, the changesets are sufficiently compared that
766 766 # we don't really care about ordering. However, this leaves
767 767 # some race conditions in the tests, so we compare on the
768 768 # number of files modified, the files contained in each
769 769 # changeset, and the branchpoints in the change to ensure test
770 770 # output remains stable.
771 771
772 772 # recommended replacement for cmp from
773 773 # https://docs.python.org/3.0/whatsnew/3.0.html
774 774 c = lambda x, y: (x > y) - (x < y)
775 775 # Sort bigger changes first.
776 776 if not d:
777 777 d = c(len(l.entries), len(r.entries))
778 778 # Try sorting by filename in the change.
779 779 if not d:
780 780 d = c([e.file for e in l.entries], [e.file for e in r.entries])
781 781 # Try and put changes without a branch point before ones with
782 782 # a branch point.
783 783 if not d:
784 784 d = c(len(l.branchpoints), len(r.branchpoints))
785 785 return d
786 786
787 787 changesets.sort(key=functools.cmp_to_key(cscmp))
788 788
789 789 # Collect tags
790 790
791 791 globaltags = {}
792 792 for c in changesets:
793 793 for e in c.entries:
794 794 for tag in e.tags:
795 795 # remember which is the latest changeset to have this tag
796 796 globaltags[tag] = c
797 797
798 798 for c in changesets:
799 799 tags = set()
800 800 for e in c.entries:
801 801 tags.update(e.tags)
802 802 # remember tags only if this is the latest changeset to have it
803 803 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
804 804
805 805 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
806 806 # by inserting dummy changesets with two parents, and handle
807 807 # {{mergefrombranch BRANCHNAME}} by setting two parents.
808 808
809 809 if mergeto is None:
810 810 mergeto = br'{{mergetobranch ([-\w]+)}}'
811 811 if mergeto:
812 812 mergeto = re.compile(mergeto)
813 813
814 814 if mergefrom is None:
815 815 mergefrom = br'{{mergefrombranch ([-\w]+)}}'
816 816 if mergefrom:
817 817 mergefrom = re.compile(mergefrom)
818 818
819 819 versions = {} # changeset index where we saw any particular file version
820 820 branches = {} # changeset index where we saw a branch
821 821 n = len(changesets)
822 822 i = 0
823 823 while i < n:
824 824 c = changesets[i]
825 825
826 826 for f in c.entries:
827 827 versions[(f.rcs, f.revision)] = i
828 828
829 829 p = None
830 830 if c.branch in branches:
831 831 p = branches[c.branch]
832 832 else:
833 833 # first changeset on a new branch
834 834 # the parent is a changeset with the branch in its
835 835 # branchpoints such that it is the latest possible
836 836 # commit without any intervening, unrelated commits.
837 837
838 838 for candidate in range(i):
839 839 if c.branch not in changesets[candidate].branchpoints:
840 840 if p is not None:
841 841 break
842 842 continue
843 843 p = candidate
844 844
845 845 c.parents = []
846 846 if p is not None:
847 847 p = changesets[p]
848 848
849 849 # Ensure no changeset has a synthetic changeset as a parent.
850 850 while p.synthetic:
851 851 assert len(p.parents) <= 1, _(
852 852 b'synthetic changeset cannot have multiple parents'
853 853 )
854 854 if p.parents:
855 855 p = p.parents[0]
856 856 else:
857 857 p = None
858 858 break
859 859
860 860 if p is not None:
861 861 c.parents.append(p)
862 862
863 863 if c.mergepoint:
864 864 if c.mergepoint == b'HEAD':
865 865 c.mergepoint = None
866 866 c.parents.append(changesets[branches[c.mergepoint]])
867 867
868 868 if mergefrom:
869 869 m = mergefrom.search(c.comment)
870 870 if m:
871 871 m = m.group(1)
872 872 if m == b'HEAD':
873 873 m = None
874 874 try:
875 875 candidate = changesets[branches[m]]
876 876 except KeyError:
877 877 ui.warn(
878 878 _(
879 879 b"warning: CVS commit message references "
880 880 b"non-existent branch %r:\n%s\n"
881 881 )
882 882 % (pycompat.bytestr(m), c.comment)
883 883 )
884 884 if m in branches and c.branch != m and not candidate.synthetic:
885 885 c.parents.append(candidate)
886 886
887 887 if mergeto:
888 888 m = mergeto.search(c.comment)
889 889 if m:
890 890 if m.groups():
891 891 m = m.group(1)
892 892 if m == b'HEAD':
893 893 m = None
894 894 else:
895 895 m = None # if no group found then merge to HEAD
896 896 if m in branches and c.branch != m:
897 897 # insert empty changeset for merge
898 898 cc = changeset(
899 899 author=c.author,
900 900 branch=m,
901 901 date=c.date,
902 902 comment=b'convert-repo: CVS merge from branch %s'
903 903 % c.branch,
904 904 entries=[],
905 905 tags=[],
906 906 parents=[changesets[branches[m]], c],
907 907 )
908 908 changesets.insert(i + 1, cc)
909 909 branches[m] = i + 1
910 910
911 911 # adjust our loop counters now we have inserted a new entry
912 912 n += 1
913 913 i += 2
914 914 continue
915 915
916 916 branches[c.branch] = i
917 917 i += 1
918 918
919 919 # Drop synthetic changesets (safe now that we have ensured no other
920 920 # changesets can have them as parents).
921 921 i = 0
922 922 while i < len(changesets):
923 923 if changesets[i].synthetic:
924 924 del changesets[i]
925 925 else:
926 926 i += 1
927 927
928 928 # Number changesets
929 929
930 930 for i, c in enumerate(changesets):
931 931 c.id = i + 1
932 932
933 933 if odd:
934 934 for l, r in odd:
935 935 if l.id is not None and r.id is not None:
936 936 ui.warn(
937 937 _(b'changeset %d is both before and after %d\n')
938 938 % (l.id, r.id)
939 939 )
940 940
941 941 ui.status(_(b'%d changeset entries\n') % len(changesets))
942 942
943 943 hook.hook(ui, None, b"cvschangesets", True, changesets=changesets)
944 944
945 945 return changesets
946 946
947 947
948 948 def debugcvsps(ui, *args, **opts):
949 949 """Read CVS rlog for current directory or named path in
950 950 repository, and convert the log to changesets based on matching
951 951 commit log entries and dates.
952 952 """
953 953 opts = pycompat.byteskwargs(opts)
954 954 if opts[b"new_cache"]:
955 955 cache = b"write"
956 956 elif opts[b"update_cache"]:
957 957 cache = b"update"
958 958 else:
959 959 cache = None
960 960
961 961 revisions = opts[b"revisions"]
962 962
963 963 try:
964 964 if args:
965 965 log = []
966 966 for d in args:
967 967 log += createlog(ui, d, root=opts[b"root"], cache=cache)
968 968 else:
969 969 log = createlog(ui, root=opts[b"root"], cache=cache)
970 970 except logerror as e:
971 971 ui.write(b"%r\n" % e)
972 972 return
973 973
974 974 changesets = createchangeset(ui, log, opts[b"fuzz"])
975 975 del log
976 976
977 977 # Print changesets (optionally filtered)
978 978
979 979 off = len(revisions)
980 980 branches = {} # latest version number in each branch
981 981 ancestors = {} # parent branch
982 982 for cs in changesets:
983 983
984 984 if opts[b"ancestors"]:
985 985 if cs.branch not in branches and cs.parents and cs.parents[0].id:
986 986 ancestors[cs.branch] = (
987 987 changesets[cs.parents[0].id - 1].branch,
988 988 cs.parents[0].id,
989 989 )
990 990 branches[cs.branch] = cs.id
991 991
992 992 # limit by branches
993 993 if (
994 994 opts[b"branches"]
995 995 and (cs.branch or b'HEAD') not in opts[b"branches"]
996 996 ):
997 997 continue
998 998
999 999 if not off:
1000 1000 # Note: trailing spaces on several lines here are needed to have
1001 1001 # bug-for-bug compatibility with cvsps.
1002 1002 ui.write(b'---------------------\n')
1003 1003 ui.write((b'PatchSet %d \n' % cs.id))
1004 1004 ui.write(
1005 1005 (
1006 1006 b'Date: %s\n'
1007 1007 % dateutil.datestr(cs.date, b'%Y/%m/%d %H:%M:%S %1%2')
1008 1008 )
1009 1009 )
1010 1010 ui.write((b'Author: %s\n' % cs.author))
1011 1011 ui.write((b'Branch: %s\n' % (cs.branch or b'HEAD')))
1012 1012 ui.write(
1013 1013 (
1014 1014 b'Tag%s: %s \n'
1015 1015 % (
1016 1016 [b'', b's'][len(cs.tags) > 1],
1017 1017 b','.join(cs.tags) or b'(none)',
1018 1018 )
1019 1019 )
1020 1020 )
1021 1021 if cs.branchpoints:
1022 1022 ui.writenoi18n(
1023 1023 b'Branchpoints: %s \n' % b', '.join(sorted(cs.branchpoints))
1024 1024 )
1025 1025 if opts[b"parents"] and cs.parents:
1026 1026 if len(cs.parents) > 1:
1027 1027 ui.write(
1028 1028 (
1029 1029 b'Parents: %s\n'
1030 1030 % (b','.join([(b"%d" % p.id) for p in cs.parents]))
1031 1031 )
1032 1032 )
1033 1033 else:
1034 1034 ui.write((b'Parent: %d\n' % cs.parents[0].id))
1035 1035
1036 1036 if opts[b"ancestors"]:
1037 1037 b = cs.branch
1038 1038 r = []
1039 1039 while b:
1040 1040 b, c = ancestors[b]
1041 1041 r.append(b'%s:%d:%d' % (b or b"HEAD", c, branches[b]))
1042 1042 if r:
1043 1043 ui.write((b'Ancestors: %s\n' % (b','.join(r))))
1044 1044
1045 1045 ui.writenoi18n(b'Log:\n')
1046 1046 ui.write(b'%s\n\n' % cs.comment)
1047 1047 ui.writenoi18n(b'Members: \n')
1048 1048 for f in cs.entries:
1049 1049 fn = f.file
1050 1050 if fn.startswith(opts[b"prefix"]):
1051 1051 fn = fn[len(opts[b"prefix"]) :]
1052 1052 ui.write(
1053 1053 b'\t%s:%s->%s%s \n'
1054 1054 % (
1055 1055 fn,
1056 1056 b'.'.join([b"%d" % x for x in f.parent]) or b'INITIAL',
1057 1057 b'.'.join([(b"%d" % x) for x in f.revision]),
1058 1058 [b'', b'(DEAD)'][f.dead],
1059 1059 )
1060 1060 )
1061 1061 ui.write(b'\n')
1062 1062
1063 1063 # have we seen the start tag?
1064 1064 if revisions and off:
1065 1065 if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags:
1066 1066 off = False
1067 1067
1068 1068 # see if we reached the end tag
1069 1069 if len(revisions) > 1 and not off:
1070 1070 if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags:
1071 1071 break
@@ -1,1016 +1,1016 b''
1 1 # __init__.py - fsmonitor initialization and overrides
2 2 #
3 3 # Copyright 2013-2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
9 9
10 10 Integrates the file-watching program Watchman with Mercurial to produce faster
11 11 status results.
12 12
13 13 On a particular Linux system, for a real-world repository with over 400,000
14 14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
15 15 system, with fsmonitor it takes about 0.3 seconds.
16 16
17 17 fsmonitor requires no configuration -- it will tell Watchman about your
18 18 repository as necessary. You'll need to install Watchman from
19 19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
20 20
21 21 fsmonitor is incompatible with the largefiles and eol extensions, and
22 22 will disable itself if any of those are active.
23 23
24 24 The following configuration options exist:
25 25
26 26 ::
27 27
28 28 [fsmonitor]
29 29 mode = {off, on, paranoid}
30 30
31 31 When `mode = off`, fsmonitor will disable itself (similar to not loading the
32 32 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
33 33 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
34 34 and ensure that the results are consistent.
35 35
36 36 ::
37 37
38 38 [fsmonitor]
39 39 timeout = (float)
40 40
41 41 A value, in seconds, that determines how long fsmonitor will wait for Watchman
42 42 to return results. Defaults to `2.0`.
43 43
44 44 ::
45 45
46 46 [fsmonitor]
47 47 blacklistusers = (list of userids)
48 48
49 49 A list of usernames for which fsmonitor will disable itself altogether.
50 50
51 51 ::
52 52
53 53 [fsmonitor]
54 54 walk_on_invalidate = (boolean)
55 55
56 56 Whether or not to walk the whole repo ourselves when our cached state has been
57 57 invalidated, for example when Watchman has been restarted or .hgignore rules
58 58 have been changed. Walking the repo in that case can result in competing for
59 59 I/O with Watchman. For large repos it is recommended to set this value to
60 60 false. You may wish to set this to true if you have a very fast filesystem
61 61 that can outpace the IPC overhead of getting the result data for the full repo
62 62 from Watchman. Defaults to false.
63 63
64 64 ::
65 65
66 66 [fsmonitor]
67 67 warn_when_unused = (boolean)
68 68
69 69 Whether to print a warning during certain operations when fsmonitor would be
70 70 beneficial to performance but isn't enabled.
71 71
72 72 ::
73 73
74 74 [fsmonitor]
75 75 warn_update_file_count = (integer)
76 76 # or when mercurial is built with rust support
77 77 warn_update_file_count_rust = (integer)
78 78
79 79 If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
80 80 be printed during working directory updates if this many files will be
81 81 created.
82 82 '''
83 83
84 84 # Platforms Supported
85 85 # ===================
86 86 #
87 87 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
88 88 # even under severe loads.
89 89 #
90 90 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
91 91 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
92 92 # user testing under normal loads.
93 93 #
94 94 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
95 95 # very little testing has been done.
96 96 #
97 97 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
98 98 #
99 99 # Known Issues
100 100 # ============
101 101 #
102 102 # * fsmonitor will disable itself if any of the following extensions are
103 103 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
104 104 # * fsmonitor will produce incorrect results if nested repos that are not
105 105 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
106 106 #
107 107 # The issues related to nested repos and subrepos are probably not fundamental
108 108 # ones. Patches to fix them are welcome.
109 109
110 110
111 111 import codecs
112 112 import os
113 113 import stat
114 114 import sys
115 115 import tempfile
116 116 import weakref
117 117
118 118 from mercurial.i18n import _
119 119 from mercurial.node import hex
120 120
121 121 from mercurial.pycompat import open
122 122 from mercurial import (
123 123 context,
124 124 encoding,
125 125 error,
126 126 extensions,
127 127 localrepo,
128 128 merge,
129 129 pathutil,
130 130 pycompat,
131 131 registrar,
132 132 scmutil,
133 133 util,
134 134 )
135 135
136 136 # no-check-code because we're accessing private information only public in pure
137 137 from mercurial.pure import parsers
138 138 from mercurial import match as matchmod
139 139 from mercurial.utils import (
140 140 hashutil,
141 141 stringutil,
142 142 )
143 143
144 144 from . import (
145 145 pywatchman,
146 146 state,
147 147 watchmanclient,
148 148 )
149 149
150 150 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
151 151 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
152 152 # be specifying the version(s) of Mercurial they are tested with, or
153 153 # leave the attribute unspecified.
154 154 testedwith = b'ships-with-hg-core'
155 155
156 156 configtable = {}
157 157 configitem = registrar.configitem(configtable)
158 158
159 159 configitem(
160 160 b'fsmonitor',
161 161 b'mode',
162 162 default=b'on',
163 163 )
164 164 configitem(
165 165 b'fsmonitor',
166 166 b'walk_on_invalidate',
167 167 default=False,
168 168 )
169 169 configitem(
170 170 b'fsmonitor',
171 171 b'timeout',
172 172 default=b'2',
173 173 )
174 174 configitem(
175 175 b'fsmonitor',
176 176 b'blacklistusers',
177 177 default=list,
178 178 )
179 179 configitem(
180 180 b'fsmonitor',
181 181 b'watchman_exe',
182 182 default=b'watchman',
183 183 )
184 184 configitem(
185 185 b'fsmonitor',
186 186 b'verbose',
187 187 default=True,
188 188 experimental=True,
189 189 )
190 190 configitem(
191 191 b'experimental',
192 192 b'fsmonitor.transaction_notify',
193 193 default=False,
194 194 )
195 195
196 196 # This extension is incompatible with the following blacklisted extensions
197 197 # and will disable itself when encountering one of these:
198 198 _blacklist = [b'largefiles', b'eol']
199 199
200 200
201 201 def debuginstall(ui, fm):
202 202 fm.write(
203 203 b"fsmonitor-watchman",
204 204 _(b"fsmonitor checking for watchman binary... (%s)\n"),
205 205 ui.configpath(b"fsmonitor", b"watchman_exe"),
206 206 )
207 207 root = tempfile.mkdtemp()
208 208 c = watchmanclient.client(ui, root)
209 209 err = None
210 210 try:
211 211 v = c.command(b"version")
212 212 fm.write(
213 213 b"fsmonitor-watchman-version",
214 214 _(b" watchman binary version %s\n"),
215 215 pycompat.bytestr(v["version"]),
216 216 )
217 217 except watchmanclient.Unavailable as e:
218 218 err = stringutil.forcebytestr(e)
219 219 fm.condwrite(
220 220 err,
221 221 b"fsmonitor-watchman-error",
222 222 _(b" watchman binary missing or broken: %s\n"),
223 223 err,
224 224 )
225 225 return 1 if err else 0
226 226
227 227
228 228 def _handleunavailable(ui, state, ex):
229 229 """Exception handler for Watchman interaction exceptions"""
230 230 if isinstance(ex, watchmanclient.Unavailable):
231 231 # experimental config: fsmonitor.verbose
232 232 if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
233 233 if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
234 234 ui.warn(stringutil.forcebytestr(ex) + b'\n')
235 235 if ex.invalidate:
236 236 state.invalidate()
237 237 # experimental config: fsmonitor.verbose
238 238 if ui.configbool(b'fsmonitor', b'verbose'):
239 239 ui.log(
240 240 b'fsmonitor',
241 241 b'Watchman unavailable: %s\n',
242 242 stringutil.forcebytestr(ex.msg),
243 243 )
244 244 else:
245 245 ui.log(
246 246 b'fsmonitor',
247 247 b'Watchman exception: %s\n',
248 248 stringutil.forcebytestr(ex),
249 249 )
250 250
251 251
252 252 def _hashignore(ignore):
253 253 """Calculate hash for ignore patterns and filenames
254 254
255 255 If this information changes between Mercurial invocations, we can't
256 256 rely on Watchman information anymore and have to re-scan the working
257 257 copy.
258 258
259 259 """
260 260 sha1 = hashutil.sha1()
261 261 sha1.update(pycompat.byterepr(ignore))
262 262 return pycompat.sysbytes(sha1.hexdigest())
263 263
264 264
265 265 _watchmanencoding = pywatchman.encoding.get_local_encoding()
266 266 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
267 267 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
268 268
269 269
270 270 def _watchmantofsencoding(path):
271 271 """Fix path to match watchman and local filesystem encoding
272 272
273 273 watchman's paths encoding can differ from filesystem encoding. For example,
274 274 on Windows, it's always utf-8.
275 275 """
276 276 try:
277 277 decoded = path.decode(_watchmanencoding)
278 278 except UnicodeDecodeError as e:
279 279 raise error.Abort(
280 280 stringutil.forcebytestr(e), hint=b'watchman encoding error'
281 281 )
282 282
283 283 try:
284 284 encoded = decoded.encode(_fsencoding, 'strict')
285 285 except UnicodeEncodeError as e:
286 286 raise error.Abort(stringutil.forcebytestr(e))
287 287
288 288 return encoded
289 289
290 290
291 291 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
292 292 """Replacement for dirstate.walk, hooking into Watchman.
293 293
294 294 Whenever full is False, ignored is False, and the Watchman client is
295 295 available, use Watchman combined with saved state to possibly return only a
296 296 subset of files."""
297 297
298 298 def bail(reason):
299 299 self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
300 300 return orig(match, subrepos, unknown, ignored, full=True)
301 301
302 302 if full:
303 303 return bail(b'full rewalk requested')
304 304 if ignored:
305 305 return bail(b'listing ignored files')
306 306 if not self._watchmanclient.available():
307 307 return bail(b'client unavailable')
308 308 state = self._fsmonitorstate
309 309 clock, ignorehash, notefiles = state.get()
310 310 if not clock:
311 311 if state.walk_on_invalidate:
312 312 return bail(b'no clock')
313 313 # Initial NULL clock value, see
314 314 # https://facebook.github.io/watchman/docs/clockspec.html
315 315 clock = b'c:0:0'
316 316 notefiles = []
317 317
318 318 ignore = self._ignore
319 319 dirignore = self._dirignore
320 320 if unknown:
321 321 if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
322 322 # ignore list changed -- can't rely on Watchman state any more
323 323 if state.walk_on_invalidate:
324 324 return bail(b'ignore rules changed')
325 325 notefiles = []
326 326 clock = b'c:0:0'
327 327 else:
328 328 # always ignore
329 329 ignore = util.always
330 330 dirignore = util.always
331 331
332 332 matchfn = match.matchfn
333 333 matchalways = match.always()
334 334 dmap = self._map
335 if hasattr(dmap, b'_map'):
335 if hasattr(dmap, '_map'):
336 336 # for better performance, directly access the inner dirstate map if the
337 337 # standard dirstate implementation is in use.
338 338 dmap = dmap._map
339 339
340 340 has_mtime = parsers.DIRSTATE_V2_HAS_MTIME
341 341 mtime_is_ambiguous = parsers.DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS
342 342 mask = has_mtime | mtime_is_ambiguous
343 343
344 344 # All entries that may not be clean
345 345 nonnormalset = {
346 346 f
347 347 for f, e in self._map.items()
348 348 if not e.maybe_clean
349 349 # same as "not has_time or has_ambiguous_time", but factored to only
350 350 # need a single access to flags for performance.
351 351 # `mask` removes all irrelevant bits, then we flip the `mtime` bit so
352 352 # its `true` value is NOT having a mtime, then check if either bit
353 353 # is set.
354 354 or bool((e.v2_data()[0] & mask) ^ has_mtime)
355 355 }
356 356
357 357 copymap = self._map.copymap
358 358 getkind = stat.S_IFMT
359 359 dirkind = stat.S_IFDIR
360 360 regkind = stat.S_IFREG
361 361 lnkkind = stat.S_IFLNK
362 362 join = self._join
363 363 normcase = util.normcase
364 364 fresh_instance = False
365 365
366 366 exact = skipstep3 = False
367 367 if match.isexact(): # match.exact
368 368 exact = True
369 369 dirignore = util.always # skip step 2
370 370 elif match.prefix(): # match.match, no patterns
371 371 skipstep3 = True
372 372
373 373 if not exact and self._checkcase:
374 374 # note that even though we could receive directory entries, we're only
375 375 # interested in checking if a file with the same name exists. So only
376 376 # normalize files if possible.
377 377 normalize = self._normalizefile
378 378 skipstep3 = False
379 379 else:
380 380 normalize = None
381 381
382 382 # step 1: find all explicit files
383 383 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
384 384
385 385 skipstep3 = skipstep3 and not (work or dirsnotfound)
386 386 work = [d for d in work if not dirignore(d[0])]
387 387
388 388 if not work and (exact or skipstep3):
389 389 for s in subrepos:
390 390 del results[s]
391 391 del results[b'.hg']
392 392 return results
393 393
394 394 # step 2: query Watchman
395 395 try:
396 396 # Use the user-configured timeout for the query.
397 397 # Add a little slack over the top of the user query to allow for
398 398 # overheads while transferring the data
399 399 self._watchmanclient.settimeout(state.timeout + 0.1)
400 400 result = self._watchmanclient.command(
401 401 b'query',
402 402 {
403 403 b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
404 404 b'since': clock,
405 405 b'expression': [
406 406 b'not',
407 407 [
408 408 b'anyof',
409 409 [b'dirname', b'.hg'],
410 410 [b'name', b'.hg', b'wholename'],
411 411 ],
412 412 ],
413 413 b'sync_timeout': int(state.timeout * 1000),
414 414 b'empty_on_fresh_instance': state.walk_on_invalidate,
415 415 },
416 416 )
417 417 except Exception as ex:
418 418 _handleunavailable(self._ui, state, ex)
419 419 self._watchmanclient.clearconnection()
420 420 return bail(b'exception during run')
421 421 else:
422 422 # We need to propagate the last observed clock up so that we
423 423 # can use it for our next query
424 424 state.setlastclock(pycompat.sysbytes(result[b'clock']))
425 425 if result[b'is_fresh_instance']:
426 426 if state.walk_on_invalidate:
427 427 state.invalidate()
428 428 return bail(b'fresh instance')
429 429 fresh_instance = True
430 430 # Ignore any prior noteable files from the state info
431 431 notefiles = []
432 432
433 433 # for file paths which require normalization and we encounter a case
434 434 # collision, we store our own foldmap
435 435 if normalize:
436 436 foldmap = {normcase(k): k for k in results}
437 437
438 438 switch_slashes = pycompat.ossep == b'\\'
439 439 # The order of the results is, strictly speaking, undefined.
440 440 # For case changes on a case insensitive filesystem we may receive
441 441 # two entries, one with exists=True and another with exists=False.
442 442 # The exists=True entries in the same response should be interpreted
443 443 # as being happens-after the exists=False entries due to the way that
444 444 # Watchman tracks files. We use this property to reconcile deletes
445 445 # for name case changes.
446 446 for entry in result[b'files']:
447 447 fname = entry[b'name']
448 448
449 449 # Watchman always give us a str. Normalize to bytes on Python 3
450 450 # using Watchman's encoding, if needed.
451 451 if not isinstance(fname, bytes):
452 452 fname = fname.encode(_watchmanencoding)
453 453
454 454 if _fixencoding:
455 455 fname = _watchmantofsencoding(fname)
456 456
457 457 if switch_slashes:
458 458 fname = fname.replace(b'\\', b'/')
459 459 if normalize:
460 460 normed = normcase(fname)
461 461 fname = normalize(fname, True, True)
462 462 foldmap[normed] = fname
463 463 fmode = entry[b'mode']
464 464 fexists = entry[b'exists']
465 465 kind = getkind(fmode)
466 466
467 467 if b'/.hg/' in fname or fname.endswith(b'/.hg'):
468 468 return bail(b'nested-repo-detected')
469 469
470 470 if not fexists:
471 471 # if marked as deleted and we don't already have a change
472 472 # record, mark it as deleted. If we already have an entry
473 473 # for fname then it was either part of walkexplicit or was
474 474 # an earlier result that was a case change
475 475 if (
476 476 fname not in results
477 477 and fname in dmap
478 478 and (matchalways or matchfn(fname))
479 479 ):
480 480 results[fname] = None
481 481 elif kind == dirkind:
482 482 if fname in dmap and (matchalways or matchfn(fname)):
483 483 results[fname] = None
484 484 elif kind == regkind or kind == lnkkind:
485 485 if fname in dmap:
486 486 if matchalways or matchfn(fname):
487 487 results[fname] = entry
488 488 elif (matchalways or matchfn(fname)) and not ignore(fname):
489 489 results[fname] = entry
490 490 elif fname in dmap and (matchalways or matchfn(fname)):
491 491 results[fname] = None
492 492
493 493 # step 3: query notable files we don't already know about
494 494 # XXX try not to iterate over the entire dmap
495 495 if normalize:
496 496 # any notable files that have changed case will already be handled
497 497 # above, so just check membership in the foldmap
498 498 notefiles = {
499 499 normalize(f, True, True)
500 500 for f in notefiles
501 501 if normcase(f) not in foldmap
502 502 }
503 503 visit = {
504 504 f
505 505 for f in notefiles
506 506 if (f not in results and matchfn(f) and (f in dmap or not ignore(f)))
507 507 }
508 508
509 509 if not fresh_instance:
510 510 if matchalways:
511 511 visit.update(f for f in nonnormalset if f not in results)
512 512 visit.update(f for f in copymap if f not in results)
513 513 else:
514 514 visit.update(
515 515 f for f in nonnormalset if f not in results and matchfn(f)
516 516 )
517 517 visit.update(f for f in copymap if f not in results and matchfn(f))
518 518 else:
519 519 if matchalways:
520 520 visit.update(f for f, st in dmap.items() if f not in results)
521 521 visit.update(f for f in copymap if f not in results)
522 522 else:
523 523 visit.update(
524 524 f for f, st in dmap.items() if f not in results and matchfn(f)
525 525 )
526 526 visit.update(f for f in copymap if f not in results and matchfn(f))
527 527
528 528 audit = pathutil.pathauditor(self._root, cached=True).check
529 529 auditpass = [f for f in visit if audit(f)]
530 530 auditpass.sort()
531 531 auditfail = visit.difference(auditpass)
532 532 for f in auditfail:
533 533 results[f] = None
534 534
535 535 nf = iter(auditpass)
536 536 for st in util.statfiles([join(f) for f in auditpass]):
537 537 f = next(nf)
538 538 if st or f in dmap:
539 539 results[f] = st
540 540
541 541 for s in subrepos:
542 542 del results[s]
543 543 del results[b'.hg']
544 544 return results
545 545
546 546
547 547 def overridestatus(
548 548 orig,
549 549 self,
550 550 node1=b'.',
551 551 node2=None,
552 552 match=None,
553 553 ignored=False,
554 554 clean=False,
555 555 unknown=False,
556 556 listsubrepos=False,
557 557 ):
558 558 listignored = ignored
559 559 listclean = clean
560 560 listunknown = unknown
561 561
562 562 def _cmpsets(l1, l2):
563 563 try:
564 564 if b'FSMONITOR_LOG_FILE' in encoding.environ:
565 565 fn = encoding.environ[b'FSMONITOR_LOG_FILE']
566 566 f = open(fn, b'wb')
567 567 else:
568 568 fn = b'fsmonitorfail.log'
569 569 f = self.vfs.open(fn, b'wb')
570 570 except (IOError, OSError):
571 571 self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
572 572 return
573 573
574 574 try:
575 575 for i, (s1, s2) in enumerate(zip(l1, l2)):
576 576 if set(s1) != set(s2):
577 577 f.write(b'sets at position %d are unequal\n' % i)
578 578 f.write(b'watchman returned: %r\n' % s1)
579 579 f.write(b'stat returned: %r\n' % s2)
580 580 finally:
581 581 f.close()
582 582
583 583 if isinstance(node1, context.changectx):
584 584 ctx1 = node1
585 585 else:
586 586 ctx1 = self[node1]
587 587 if isinstance(node2, context.changectx):
588 588 ctx2 = node2
589 589 else:
590 590 ctx2 = self[node2]
591 591
592 592 working = ctx2.rev() is None
593 593 parentworking = working and ctx1 == self[b'.']
594 594 match = match or matchmod.always()
595 595
596 596 # Maybe we can use this opportunity to update Watchman's state.
597 597 # Mercurial uses workingcommitctx and/or memctx to represent the part of
598 598 # the workingctx that is to be committed. So don't update the state in
599 599 # that case.
600 600 # HG_PENDING is set in the environment when the dirstate is being updated
601 601 # in the middle of a transaction; we must not update our state in that
602 602 # case, or we risk forgetting about changes in the working copy.
603 603 updatestate = (
604 604 parentworking
605 605 and match.always()
606 606 and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
607 607 and b'HG_PENDING' not in encoding.environ
608 608 )
609 609
610 610 try:
611 611 if self._fsmonitorstate.walk_on_invalidate:
612 612 # Use a short timeout to query the current clock. If that
613 613 # takes too long then we assume that the service will be slow
614 614 # to answer our query.
615 615 # walk_on_invalidate indicates that we prefer to walk the
616 616 # tree ourselves because we can ignore portions that Watchman
617 617 # cannot and we tend to be faster in the warmer buffer cache
618 618 # cases.
619 619 self._watchmanclient.settimeout(0.1)
620 620 else:
621 621 # Give Watchman more time to potentially complete its walk
622 622 # and return the initial clock. In this mode we assume that
623 623 # the filesystem will be slower than parsing a potentially
624 624 # very large Watchman result set.
625 625 self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
626 626 startclock = self._watchmanclient.getcurrentclock()
627 627 except Exception as ex:
628 628 self._watchmanclient.clearconnection()
629 629 _handleunavailable(self.ui, self._fsmonitorstate, ex)
630 630 # boo, Watchman failed. bail
631 631 return orig(
632 632 node1,
633 633 node2,
634 634 match,
635 635 listignored,
636 636 listclean,
637 637 listunknown,
638 638 listsubrepos,
639 639 )
640 640
641 641 if updatestate:
642 642 # We need info about unknown files. This may make things slower the
643 643 # first time, but whatever.
644 644 stateunknown = True
645 645 else:
646 646 stateunknown = listunknown
647 647
648 648 if updatestate:
649 649 ps = poststatus(startclock)
650 650 self.addpostdsstatus(ps)
651 651
652 652 r = orig(
653 653 node1, node2, match, listignored, listclean, stateunknown, listsubrepos
654 654 )
655 655 modified, added, removed, deleted, unknown, ignored, clean = r
656 656
657 657 if not listunknown:
658 658 unknown = []
659 659
660 660 # don't do paranoid checks if we're not going to query Watchman anyway
661 661 full = listclean or match.traversedir is not None
662 662 if self._fsmonitorstate.mode == b'paranoid' and not full:
663 663 # run status again and fall back to the old walk this time
664 664 self.dirstate._fsmonitordisable = True
665 665
666 666 # shut the UI up
667 667 quiet = self.ui.quiet
668 668 self.ui.quiet = True
669 669 fout, ferr = self.ui.fout, self.ui.ferr
670 670 self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
671 671
672 672 try:
673 673 rv2 = orig(
674 674 node1,
675 675 node2,
676 676 match,
677 677 listignored,
678 678 listclean,
679 679 listunknown,
680 680 listsubrepos,
681 681 )
682 682 finally:
683 683 self.dirstate._fsmonitordisable = False
684 684 self.ui.quiet = quiet
685 685 self.ui.fout, self.ui.ferr = fout, ferr
686 686
687 687 # clean isn't tested since it's set to True above
688 688 with self.wlock():
689 689 _cmpsets(
690 690 [modified, added, removed, deleted, unknown, ignored, clean],
691 691 rv2,
692 692 )
693 693 modified, added, removed, deleted, unknown, ignored, clean = rv2
694 694
695 695 return scmutil.status(
696 696 modified, added, removed, deleted, unknown, ignored, clean
697 697 )
698 698
699 699
700 700 class poststatus:
701 701 def __init__(self, startclock):
702 702 self._startclock = pycompat.sysbytes(startclock)
703 703
704 704 def __call__(self, wctx, status):
705 705 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
706 706 hashignore = _hashignore(wctx.repo().dirstate._ignore)
707 707 notefiles = (
708 708 status.modified
709 709 + status.added
710 710 + status.removed
711 711 + status.deleted
712 712 + status.unknown
713 713 )
714 714 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
715 715
716 716
717 717 def makedirstate(repo, dirstate):
718 718 class fsmonitordirstate(dirstate.__class__):
719 719 def _fsmonitorinit(self, repo):
720 720 # _fsmonitordisable is used in paranoid mode
721 721 self._fsmonitordisable = False
722 722 self._fsmonitorstate = repo._fsmonitorstate
723 723 self._watchmanclient = repo._watchmanclient
724 724 self._repo = weakref.proxy(repo)
725 725
726 726 def walk(self, *args, **kwargs):
727 727 orig = super(fsmonitordirstate, self).walk
728 728 if self._fsmonitordisable:
729 729 return orig(*args, **kwargs)
730 730 return overridewalk(orig, self, *args, **kwargs)
731 731
732 732 def rebuild(self, *args, **kwargs):
733 733 self._fsmonitorstate.invalidate()
734 734 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
735 735
736 736 def invalidate(self, *args, **kwargs):
737 737 self._fsmonitorstate.invalidate()
738 738 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
739 739
740 740 dirstate.__class__ = fsmonitordirstate
741 741 dirstate._fsmonitorinit(repo)
742 742
743 743
744 744 def wrapdirstate(orig, self):
745 745 ds = orig(self)
746 746 # only override the dirstate when Watchman is available for the repo
747 if hasattr(self, b'_fsmonitorstate'):
747 if hasattr(self, '_fsmonitorstate'):
748 748 makedirstate(self, ds)
749 749 return ds
750 750
751 751
752 752 def extsetup(ui):
753 753 extensions.wrapfilecache(
754 754 localrepo.localrepository, b'dirstate', wrapdirstate
755 755 )
756 756 if pycompat.isdarwin:
757 757 # An assist for avoiding the dangling-symlink fsevents bug
758 758 extensions.wrapfunction(os, 'symlink', wrapsymlink)
759 759
760 760 extensions.wrapfunction(merge, '_update', wrapupdate)
761 761
762 762
763 763 def wrapsymlink(orig, source, link_name):
764 764 """if we create a dangling symlink, also touch the parent dir
765 765 to encourage fsevents notifications to work more correctly"""
766 766 try:
767 767 return orig(source, link_name)
768 768 finally:
769 769 try:
770 770 os.utime(os.path.dirname(link_name), None)
771 771 except OSError:
772 772 pass
773 773
774 774
775 775 class state_update:
776 776 """This context manager is responsible for dispatching the state-enter
777 777 and state-leave signals to the watchman service. The enter and leave
778 778 methods can be invoked manually (for scenarios where context manager
779 779 semantics are not possible). If parameters oldnode and newnode are None,
780 780 they will be populated based on current working copy in enter and
781 781 leave, respectively. Similarly, if the distance is none, it will be
782 782 calculated based on the oldnode and newnode in the leave method."""
783 783
784 784 def __init__(
785 785 self,
786 786 repo,
787 787 name,
788 788 oldnode=None,
789 789 newnode=None,
790 790 distance=None,
791 791 partial=False,
792 792 ):
793 793 self.repo = repo.unfiltered()
794 794 self.name = name
795 795 self.oldnode = oldnode
796 796 self.newnode = newnode
797 797 self.distance = distance
798 798 self.partial = partial
799 799 self._lock = None
800 800 self.need_leave = False
801 801
802 802 def __enter__(self):
803 803 self.enter()
804 804
805 805 def enter(self):
806 806 # Make sure we have a wlock prior to sending notifications to watchman.
807 807 # We don't want to race with other actors. In the update case,
808 808 # merge.update is going to take the wlock almost immediately. We are
809 809 # effectively extending the lock around several short sanity checks.
810 810 if self.oldnode is None:
811 811 self.oldnode = self.repo[b'.'].node()
812 812
813 813 if self.repo.currentwlock() is None:
814 if hasattr(self.repo, b'wlocknostateupdate'):
814 if hasattr(self.repo, 'wlocknostateupdate'):
815 815 self._lock = self.repo.wlocknostateupdate()
816 816 else:
817 817 self._lock = self.repo.wlock()
818 818 self.need_leave = self._state(b'state-enter', hex(self.oldnode))
819 819 return self
820 820
821 821 def __exit__(self, type_, value, tb):
822 822 abort = True if type_ else False
823 823 self.exit(abort=abort)
824 824
825 825 def exit(self, abort=False):
826 826 try:
827 827 if self.need_leave:
828 828 status = b'failed' if abort else b'ok'
829 829 if self.newnode is None:
830 830 self.newnode = self.repo[b'.'].node()
831 831 if self.distance is None:
832 832 self.distance = calcdistance(
833 833 self.repo, self.oldnode, self.newnode
834 834 )
835 835 self._state(b'state-leave', hex(self.newnode), status=status)
836 836 finally:
837 837 self.need_leave = False
838 838 if self._lock:
839 839 self._lock.release()
840 840
841 841 def _state(self, cmd, commithash, status=b'ok'):
842 if not hasattr(self.repo, b'_watchmanclient'):
842 if not hasattr(self.repo, '_watchmanclient'):
843 843 return False
844 844 try:
845 845 self.repo._watchmanclient.command(
846 846 cmd,
847 847 {
848 848 b'name': self.name,
849 849 b'metadata': {
850 850 # the target revision
851 851 b'rev': commithash,
852 852 # approximate number of commits between current and target
853 853 b'distance': self.distance if self.distance else 0,
854 854 # success/failure (only really meaningful for state-leave)
855 855 b'status': status,
856 856 # whether the working copy parent is changing
857 857 b'partial': self.partial,
858 858 },
859 859 },
860 860 )
861 861 return True
862 862 except Exception as e:
863 863 # Swallow any errors; fire and forget
864 864 self.repo.ui.log(
865 865 b'watchman', b'Exception %s while running %s\n', e, cmd
866 866 )
867 867 return False
868 868
869 869
870 870 # Estimate the distance between two nodes
871 871 def calcdistance(repo, oldnode, newnode):
872 872 anc = repo.changelog.ancestor(oldnode, newnode)
873 873 ancrev = repo[anc].rev()
874 874 distance = abs(repo[oldnode].rev() - ancrev) + abs(
875 875 repo[newnode].rev() - ancrev
876 876 )
877 877 return distance
878 878
879 879
880 880 # Bracket working copy updates with calls to the watchman state-enter
881 881 # and state-leave commands. This allows clients to perform more intelligent
882 882 # settling during bulk file change scenarios
883 883 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
884 884 def wrapupdate(
885 885 orig,
886 886 repo,
887 887 node,
888 888 branchmerge,
889 889 force,
890 890 ancestor=None,
891 891 mergeancestor=False,
892 892 labels=None,
893 893 matcher=None,
894 894 **kwargs
895 895 ):
896 896
897 897 distance = 0
898 898 partial = True
899 899 oldnode = repo[b'.'].node()
900 900 newnode = repo[node].node()
901 901 if matcher is None or matcher.always():
902 902 partial = False
903 903 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
904 904
905 905 with state_update(
906 906 repo,
907 907 name=b"hg.update",
908 908 oldnode=oldnode,
909 909 newnode=newnode,
910 910 distance=distance,
911 911 partial=partial,
912 912 ):
913 913 return orig(
914 914 repo,
915 915 node,
916 916 branchmerge,
917 917 force,
918 918 ancestor,
919 919 mergeancestor,
920 920 labels,
921 921 matcher,
922 922 **kwargs
923 923 )
924 924
925 925
926 926 def repo_has_depth_one_nested_repo(repo):
927 927 for f in repo.wvfs.listdir():
928 928 if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
929 929 msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
930 930 repo.ui.debug(msg % f)
931 931 return True
932 932 return False
933 933
934 934
935 935 def reposetup(ui, repo):
936 936 # We don't work with largefiles or inotify
937 937 exts = extensions.enabled()
938 938 for ext in _blacklist:
939 939 if ext in exts:
940 940 ui.warn(
941 941 _(
942 942 b'The fsmonitor extension is incompatible with the %s '
943 943 b'extension and has been disabled.\n'
944 944 )
945 945 % ext
946 946 )
947 947 return
948 948
949 949 if repo.local():
950 950 # We don't work with subrepos either.
951 951 #
952 952 # if repo[None].substate can cause a dirstate parse, which is too
953 953 # slow. Instead, look for a file called hgsubstate,
954 954 if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
955 955 return
956 956
957 957 if repo_has_depth_one_nested_repo(repo):
958 958 return
959 959
960 960 fsmonitorstate = state.state(repo)
961 961 if fsmonitorstate.mode == b'off':
962 962 return
963 963
964 964 try:
965 965 client = watchmanclient.client(repo.ui, repo.root)
966 966 except Exception as ex:
967 967 _handleunavailable(ui, fsmonitorstate, ex)
968 968 return
969 969
970 970 repo._fsmonitorstate = fsmonitorstate
971 971 repo._watchmanclient = client
972 972
973 973 dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
974 974 if cached:
975 975 # at this point since fsmonitorstate wasn't present,
976 976 # repo.dirstate is not a fsmonitordirstate
977 977 makedirstate(repo, dirstate)
978 978
979 979 class fsmonitorrepo(repo.__class__):
980 980 def status(self, *args, **kwargs):
981 981 orig = super(fsmonitorrepo, self).status
982 982 return overridestatus(orig, self, *args, **kwargs)
983 983
984 984 def wlocknostateupdate(self, *args, **kwargs):
985 985 return super(fsmonitorrepo, self).wlock(*args, **kwargs)
986 986
987 987 def wlock(self, *args, **kwargs):
988 988 l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
989 989 if not ui.configbool(
990 990 b"experimental", b"fsmonitor.transaction_notify"
991 991 ):
992 992 return l
993 993 if l.held != 1:
994 994 return l
995 995 origrelease = l.releasefn
996 996
997 997 def staterelease():
998 998 if origrelease:
999 999 origrelease()
1000 1000 if l.stateupdate:
1001 1001 l.stateupdate.exit()
1002 1002 l.stateupdate = None
1003 1003
1004 1004 try:
1005 1005 l.stateupdate = None
1006 1006 l.stateupdate = state_update(self, name=b"hg.transaction")
1007 1007 l.stateupdate.enter()
1008 1008 l.releasefn = staterelease
1009 1009 except Exception as e:
1010 1010 # Swallow any errors; fire and forget
1011 1011 self.ui.log(
1012 1012 b'watchman', b'Exception in state update %s\n', e
1013 1013 )
1014 1014 return l
1015 1015
1016 1016 repo.__class__ = fsmonitorrepo
@@ -1,449 +1,449 b''
1 1 import os
2 2 import shutil
3 3 import stat
4 4 import time
5 5
6 6 from mercurial.i18n import _
7 7 from mercurial.node import bin, hex
8 8 from mercurial.pycompat import open
9 9 from mercurial import (
10 10 error,
11 11 pycompat,
12 12 util,
13 13 )
14 14 from mercurial.utils import hashutil
15 15 from . import (
16 16 constants,
17 17 shallowutil,
18 18 )
19 19
20 20
21 21 class basestore:
22 22 def __init__(self, repo, path, reponame, shared=False):
23 23 """Creates a remotefilelog store object for the given repo name.
24 24
25 25 `path` - The file path where this store keeps its data
26 26 `reponame` - The name of the repo. This is used to partition data from
27 27 many repos.
28 28 `shared` - True if this store is a shared cache of data from the central
29 29 server, for many repos on this machine. False means this store is for
30 30 the local data for one repo.
31 31 """
32 32 self.repo = repo
33 33 self.ui = repo.ui
34 34 self._path = path
35 35 self._reponame = reponame
36 36 self._shared = shared
37 37 self._uid = os.getuid() if not pycompat.iswindows else None
38 38
39 39 self._validatecachelog = self.ui.config(
40 40 b"remotefilelog", b"validatecachelog"
41 41 )
42 42 self._validatecache = self.ui.config(
43 43 b"remotefilelog", b"validatecache", b'on'
44 44 )
45 45 if self._validatecache not in (b'on', b'strict', b'off'):
46 46 self._validatecache = b'on'
47 47 if self._validatecache == b'off':
48 48 self._validatecache = False
49 49
50 50 if shared:
51 51 shallowutil.mkstickygroupdir(self.ui, path)
52 52
53 53 def getmissing(self, keys):
54 54 missing = []
55 55 for name, node in keys:
56 56 filepath = self._getfilepath(name, node)
57 57 exists = os.path.exists(filepath)
58 58 if (
59 59 exists
60 60 and self._validatecache == b'strict'
61 61 and not self._validatekey(filepath, b'contains')
62 62 ):
63 63 exists = False
64 64 if not exists:
65 65 missing.append((name, node))
66 66
67 67 return missing
68 68
69 69 # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
70 70
71 71 def markledger(self, ledger, options=None):
72 72 if options and options.get(constants.OPTION_PACKSONLY):
73 73 return
74 74 if self._shared:
75 75 for filename, nodes in self._getfiles():
76 76 for node in nodes:
77 77 ledger.markdataentry(self, filename, node)
78 78 ledger.markhistoryentry(self, filename, node)
79 79
80 80 def cleanup(self, ledger):
81 81 ui = self.ui
82 82 entries = ledger.sources.get(self, [])
83 83 count = 0
84 84 progress = ui.makeprogress(
85 85 _(b"cleaning up"), unit=b"files", total=len(entries)
86 86 )
87 87 for entry in entries:
88 88 if entry.gced or (entry.datarepacked and entry.historyrepacked):
89 89 progress.update(count)
90 90 path = self._getfilepath(entry.filename, entry.node)
91 91 util.tryunlink(path)
92 92 count += 1
93 93 progress.complete()
94 94
95 95 # Clean up the repo cache directory.
96 96 self._cleanupdirectory(self._getrepocachepath())
97 97
98 98 # BELOW THIS ARE NON-STANDARD APIS
99 99
100 100 def _cleanupdirectory(self, rootdir):
101 101 """Removes the empty directories and unnecessary files within the root
102 102 directory recursively. Note that this method does not remove the root
103 103 directory itself."""
104 104
105 105 oldfiles = set()
106 106 otherfiles = set()
107 107 # osutil.listdir returns stat information which saves some rmdir/listdir
108 108 # syscalls.
109 109 for name, mode in util.osutil.listdir(rootdir):
110 110 if stat.S_ISDIR(mode):
111 111 dirpath = os.path.join(rootdir, name)
112 112 self._cleanupdirectory(dirpath)
113 113
114 114 # Now that the directory specified by dirpath is potentially
115 115 # empty, try and remove it.
116 116 try:
117 117 os.rmdir(dirpath)
118 118 except OSError:
119 119 pass
120 120
121 121 elif stat.S_ISREG(mode):
122 122 if name.endswith(b'_old'):
123 123 oldfiles.add(name[:-4])
124 124 else:
125 125 otherfiles.add(name)
126 126
127 127 # Remove the files which end with suffix '_old' and have no
128 128 # corresponding file without the suffix '_old'. See addremotefilelognode
129 129 # method for the generation/purpose of files with '_old' suffix.
130 130 for filename in oldfiles - otherfiles:
131 131 filepath = os.path.join(rootdir, filename + b'_old')
132 132 util.tryunlink(filepath)
133 133
134 134 def _getfiles(self):
135 135 """Return a list of (filename, [node,...]) for all the revisions that
136 136 exist in the store.
137 137
138 138 This is useful for obtaining a list of all the contents of the store
139 139 when performing a repack to another store, since the store API requires
140 140 name+node keys and not namehash+node keys.
141 141 """
142 142 existing = {}
143 143 for filenamehash, node in self._listkeys():
144 144 existing.setdefault(filenamehash, []).append(node)
145 145
146 146 filenamemap = self._resolvefilenames(existing.keys())
147 147
148 148 for filename, sha in filenamemap.items():
149 149 yield (filename, existing[sha])
150 150
151 151 def _resolvefilenames(self, hashes):
152 152 """Given a list of filename hashes that are present in the
153 153 remotefilelog store, return a mapping from filename->hash.
154 154
155 155 This is useful when converting remotefilelog blobs into other storage
156 156 formats.
157 157 """
158 158 if not hashes:
159 159 return {}
160 160
161 161 filenames = {}
162 162 missingfilename = set(hashes)
163 163
164 164 # Start with a full manifest, since it'll cover the majority of files
165 165 for filename in self.repo[b'tip'].manifest():
166 166 sha = hashutil.sha1(filename).digest()
167 167 if sha in missingfilename:
168 168 filenames[filename] = sha
169 169 missingfilename.discard(sha)
170 170
171 171 # Scan the changelog until we've found every file name
172 172 cl = self.repo.unfiltered().changelog
173 173 for rev in range(len(cl) - 1, -1, -1):
174 174 if not missingfilename:
175 175 break
176 176 files = cl.readfiles(cl.node(rev))
177 177 for filename in files:
178 178 sha = hashutil.sha1(filename).digest()
179 179 if sha in missingfilename:
180 180 filenames[filename] = sha
181 181 missingfilename.discard(sha)
182 182
183 183 return filenames
184 184
185 185 def _getrepocachepath(self):
186 186 return (
187 187 os.path.join(self._path, self._reponame)
188 188 if self._shared
189 189 else self._path
190 190 )
191 191
192 192 def _listkeys(self):
193 193 """List all the remotefilelog keys that exist in the store.
194 194
195 195 Returns a iterator of (filename hash, filecontent hash) tuples.
196 196 """
197 197
198 198 for root, dirs, files in os.walk(self._getrepocachepath()):
199 199 for filename in files:
200 200 if len(filename) != 40:
201 201 continue
202 202 node = filename
203 203 if self._shared:
204 204 # .../1a/85ffda..be21
205 205 filenamehash = root[-41:-39] + root[-38:]
206 206 else:
207 207 filenamehash = root[-40:]
208 208 yield (bin(filenamehash), bin(node))
209 209
210 210 def _getfilepath(self, name, node):
211 211 node = hex(node)
212 212 if self._shared:
213 213 key = shallowutil.getcachekey(self._reponame, name, node)
214 214 else:
215 215 key = shallowutil.getlocalkey(name, node)
216 216
217 217 return os.path.join(self._path, key)
218 218
219 219 def _getdata(self, name, node):
220 220 filepath = self._getfilepath(name, node)
221 221 try:
222 222 data = shallowutil.readfile(filepath)
223 223 if self._validatecache and not self._validatedata(data, filepath):
224 224 if self._validatecachelog:
225 225 with open(self._validatecachelog, b'ab+') as f:
226 226 f.write(b"corrupt %s during read\n" % filepath)
227 227 os.rename(filepath, filepath + b".corrupt")
228 228 raise KeyError(b"corrupt local cache file %s" % filepath)
229 229 except IOError:
230 230 raise KeyError(
231 231 b"no file found at %s for %s:%s" % (filepath, name, hex(node))
232 232 )
233 233
234 234 return data
235 235
236 236 def addremotefilelognode(self, name, node, data):
237 237 filepath = self._getfilepath(name, node)
238 238
239 239 oldumask = os.umask(0o002)
240 240 try:
241 241 # if this node already exists, save the old version for
242 242 # recovery/debugging purposes.
243 243 if os.path.exists(filepath):
244 244 newfilename = filepath + b'_old'
245 245 # newfilename can be read-only and shutil.copy will fail.
246 246 # Delete newfilename to avoid it
247 247 if os.path.exists(newfilename):
248 248 shallowutil.unlinkfile(newfilename)
249 249 shutil.copy(filepath, newfilename)
250 250
251 251 shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
252 252 shallowutil.writefile(filepath, data, readonly=True)
253 253
254 254 if self._validatecache:
255 255 if not self._validatekey(filepath, b'write'):
256 256 raise error.Abort(
257 257 _(b"local cache write was corrupted %s") % filepath
258 258 )
259 259 finally:
260 260 os.umask(oldumask)
261 261
262 262 def markrepo(self, path):
263 263 """Call this to add the given repo path to the store's list of
264 264 repositories that are using it. This is useful later when doing garbage
265 265 collection, since it allows us to insecpt the repos to see what nodes
266 266 they want to be kept alive in the store.
267 267 """
268 268 repospath = os.path.join(self._path, b"repos")
269 269 with open(repospath, b'ab') as reposfile:
270 270 reposfile.write(os.path.dirname(path) + b"\n")
271 271
272 272 repospathstat = os.stat(repospath)
273 273 if repospathstat.st_uid == self._uid:
274 274 os.chmod(repospath, 0o0664)
275 275
276 276 def _validatekey(self, path, action):
277 277 with open(path, b'rb') as f:
278 278 data = f.read()
279 279
280 280 if self._validatedata(data, path):
281 281 return True
282 282
283 283 if self._validatecachelog:
284 284 with open(self._validatecachelog, b'ab+') as f:
285 285 f.write(b"corrupt %s during %s\n" % (path, action))
286 286
287 287 os.rename(path, path + b".corrupt")
288 288 return False
289 289
290 290 def _validatedata(self, data, path):
291 291 try:
292 292 if len(data) > 0:
293 293 # see remotefilelogserver.createfileblob for the format
294 294 offset, size, flags = shallowutil.parsesizeflags(data)
295 295 if len(data) <= size:
296 296 # it is truncated
297 297 return False
298 298
299 299 # extract the node from the metadata
300 300 offset += size
301 301 datanode = data[offset : offset + 20]
302 302
303 303 # and compare against the path
304 304 if os.path.basename(path) == hex(datanode):
305 305 # Content matches the intended path
306 306 return True
307 307 return False
308 308 except (ValueError, shallowutil.BadRemotefilelogHeader):
309 309 pass
310 310
311 311 return False
312 312
313 313 def gc(self, keepkeys):
314 314 ui = self.ui
315 315 cachepath = self._path
316 316
317 317 # prune cache
318 318 queue = pycompat.queue.PriorityQueue()
319 319 originalsize = 0
320 320 size = 0
321 321 count = 0
322 322 removed = 0
323 323
324 324 # keep files newer than a day even if they aren't needed
325 325 limit = time.time() - (60 * 60 * 24)
326 326
327 327 progress = ui.makeprogress(
328 328 _(b"removing unnecessary files"), unit=b"files"
329 329 )
330 330 progress.update(0)
331 331 for root, dirs, files in os.walk(cachepath):
332 332 for file in files:
333 333 if file == b'repos':
334 334 continue
335 335
336 336 # Don't delete pack files
337 337 if b'/packs/' in root:
338 338 continue
339 339
340 340 progress.update(count)
341 341 path = os.path.join(root, file)
342 342 key = os.path.relpath(path, cachepath)
343 343 count += 1
344 344 try:
345 345 pathstat = os.stat(path)
346 346 except FileNotFoundError:
347 347 msg = _(
348 348 b"warning: file %s was removed by another process\n"
349 349 )
350 350 ui.warn(msg % path)
351 351 continue
352 352
353 353 originalsize += pathstat.st_size
354 354
355 355 if key in keepkeys or pathstat.st_atime > limit:
356 356 queue.put((pathstat.st_atime, path, pathstat))
357 357 size += pathstat.st_size
358 358 else:
359 359 try:
360 360 shallowutil.unlinkfile(path)
361 361 except FileNotFoundError:
362 362 msg = _(
363 363 b"warning: file %s was removed by another "
364 364 b"process\n"
365 365 )
366 366 ui.warn(msg % path)
367 367 continue
368 368 removed += 1
369 369 progress.complete()
370 370
371 371 # remove oldest files until under limit
372 372 limit = ui.configbytes(b"remotefilelog", b"cachelimit")
373 373 if size > limit:
374 374 excess = size - limit
375 375 progress = ui.makeprogress(
376 376 _(b"enforcing cache limit"), unit=b"bytes", total=excess
377 377 )
378 378 removedexcess = 0
379 379 while queue and size > limit and size > 0:
380 380 progress.update(removedexcess)
381 381 atime, oldpath, oldpathstat = queue.get()
382 382 try:
383 383 shallowutil.unlinkfile(oldpath)
384 384 except FileNotFoundError:
385 385 msg = _(
386 386 b"warning: file %s was removed by another process\n"
387 387 )
388 388 ui.warn(msg % oldpath)
389 389 size -= oldpathstat.st_size
390 390 removed += 1
391 391 removedexcess += oldpathstat.st_size
392 392 progress.complete()
393 393
394 394 ui.status(
395 395 _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
396 396 % (
397 397 removed,
398 398 count,
399 399 float(originalsize) / 1024.0 / 1024.0 / 1024.0,
400 400 float(size) / 1024.0 / 1024.0 / 1024.0,
401 401 )
402 402 )
403 403
404 404
405 405 class baseunionstore:
406 406 def __init__(self, *args, **kwargs):
407 407 # If one of the functions that iterates all of the stores is about to
408 408 # throw a KeyError, try this many times with a full refresh between
409 409 # attempts. A repack operation may have moved data from one store to
410 410 # another while we were running.
411 411 self.numattempts = kwargs.get('numretries', 0) + 1
412 412 # If not-None, call this function on every retry and if the attempts are
413 413 # exhausted.
414 414 self.retrylog = kwargs.get('retrylog', None)
415 415
416 416 def markforrefresh(self):
417 417 for store in self.stores:
418 if hasattr(store, b'markforrefresh'):
418 if hasattr(store, 'markforrefresh'):
419 419 store.markforrefresh()
420 420
421 421 @staticmethod
422 422 def retriable(fn):
423 423 def noop(*args):
424 424 pass
425 425
426 426 def wrapped(self, *args, **kwargs):
427 427 retrylog = self.retrylog or noop
428 428 funcname = fn.__name__
429 429 i = 0
430 430 while i < self.numattempts:
431 431 if i > 0:
432 432 retrylog(
433 433 b're-attempting (n=%d) %s\n'
434 434 % (i, pycompat.sysbytes(funcname))
435 435 )
436 436 self.markforrefresh()
437 437 i += 1
438 438 try:
439 439 return fn(self, *args, **kwargs)
440 440 except KeyError:
441 441 if i == self.numattempts:
442 442 # retries exhausted
443 443 retrylog(
444 444 b'retries exhausted in %s, raising KeyError\n'
445 445 % pycompat.sysbytes(funcname)
446 446 )
447 447 raise
448 448
449 449 return wrapped
@@ -1,670 +1,670 b''
1 1 # fileserverclient.py - client for communicating with the cache process
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import io
10 10 import os
11 11 import threading
12 12 import time
13 13 import zlib
14 14
15 15 from mercurial.i18n import _
16 16 from mercurial.node import bin, hex
17 17 from mercurial import (
18 18 error,
19 19 pycompat,
20 20 revlog,
21 21 sshpeer,
22 22 util,
23 23 wireprotov1peer,
24 24 )
25 25 from mercurial.utils import (
26 26 hashutil,
27 27 procutil,
28 28 )
29 29
30 30 from . import (
31 31 constants,
32 32 contentstore,
33 33 metadatastore,
34 34 )
35 35
36 36 _sshv1peer = sshpeer.sshv1peer
37 37
38 38 # Statistics for debugging
39 39 fetchcost = 0
40 40 fetches = 0
41 41 fetched = 0
42 42 fetchmisses = 0
43 43
44 44 _lfsmod = None
45 45
46 46
47 47 def getcachekey(reponame, file, id):
48 48 pathhash = hex(hashutil.sha1(file).digest())
49 49 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
50 50
51 51
52 52 def getlocalkey(file, id):
53 53 pathhash = hex(hashutil.sha1(file).digest())
54 54 return os.path.join(pathhash, id)
55 55
56 56
57 57 def peersetup(ui, peer):
58 58 class remotefilepeer(peer.__class__):
59 59 @wireprotov1peer.batchable
60 60 def x_rfl_getfile(self, file, node):
61 61 if not self.capable(b'x_rfl_getfile'):
62 62 raise error.Abort(
63 63 b'configured remotefile server does not support getfile'
64 64 )
65 65
66 66 def decode(d):
67 67 code, data = d.split(b'\0', 1)
68 68 if int(code):
69 69 raise error.LookupError(file, node, data)
70 70 return data
71 71
72 72 return {b'file': file, b'node': node}, decode
73 73
74 74 @wireprotov1peer.batchable
75 75 def x_rfl_getflogheads(self, path):
76 76 if not self.capable(b'x_rfl_getflogheads'):
77 77 raise error.Abort(
78 78 b'configured remotefile server does not '
79 79 b'support getflogheads'
80 80 )
81 81
82 82 def decode(d):
83 83 return d.split(b'\n') if d else []
84 84
85 85 return {b'path': path}, decode
86 86
87 87 def _updatecallstreamopts(self, command, opts):
88 88 if command != b'getbundle':
89 89 return
90 90 if (
91 91 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
92 92 not in self.capabilities()
93 93 ):
94 94 return
95 95 if not hasattr(self, '_localrepo'):
96 96 return
97 97 if (
98 98 constants.SHALLOWREPO_REQUIREMENT
99 99 not in self._localrepo.requirements
100 100 ):
101 101 return
102 102
103 103 bundlecaps = opts.get(b'bundlecaps')
104 104 if bundlecaps:
105 105 bundlecaps = [bundlecaps]
106 106 else:
107 107 bundlecaps = []
108 108
109 109 # shallow, includepattern, and excludepattern are a hacky way of
110 110 # carrying over data from the local repo to this getbundle
111 111 # command. We need to do it this way because bundle1 getbundle
112 112 # doesn't provide any other place we can hook in to manipulate
113 113 # getbundle args before it goes across the wire. Once we get rid
114 114 # of bundle1, we can use bundle2's _pullbundle2extraprepare to
115 115 # do this more cleanly.
116 116 bundlecaps.append(constants.BUNDLE2_CAPABLITY)
117 117 if self._localrepo.includepattern:
118 118 patterns = b'\0'.join(self._localrepo.includepattern)
119 119 includecap = b"includepattern=" + patterns
120 120 bundlecaps.append(includecap)
121 121 if self._localrepo.excludepattern:
122 122 patterns = b'\0'.join(self._localrepo.excludepattern)
123 123 excludecap = b"excludepattern=" + patterns
124 124 bundlecaps.append(excludecap)
125 125 opts[b'bundlecaps'] = b','.join(bundlecaps)
126 126
127 127 def _sendrequest(self, command, args, **opts):
128 128 self._updatecallstreamopts(command, args)
129 129 return super(remotefilepeer, self)._sendrequest(
130 130 command, args, **opts
131 131 )
132 132
133 133 def _callstream(self, command, **opts):
134 134 supertype = super(remotefilepeer, self)
135 135 if not hasattr(supertype, '_sendrequest'):
136 136 self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
137 137 return super(remotefilepeer, self)._callstream(command, **opts)
138 138
139 139 peer.__class__ = remotefilepeer
140 140
141 141
142 142 class cacheconnection:
143 143 """The connection for communicating with the remote cache. Performs
144 144 gets and sets by communicating with an external process that has the
145 145 cache-specific implementation.
146 146 """
147 147
148 148 def __init__(self):
149 149 self.pipeo = self.pipei = self.pipee = None
150 150 self.subprocess = None
151 151 self.connected = False
152 152
153 153 def connect(self, cachecommand):
154 154 if self.pipeo:
155 155 raise error.Abort(_(b"cache connection already open"))
156 156 self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4(
157 157 cachecommand
158 158 )
159 159 self.connected = True
160 160
161 161 def close(self):
162 162 def tryclose(pipe):
163 163 try:
164 164 pipe.close()
165 165 except Exception:
166 166 pass
167 167
168 168 if self.connected:
169 169 try:
170 170 self.pipei.write(b"exit\n")
171 171 except Exception:
172 172 pass
173 173 tryclose(self.pipei)
174 174 self.pipei = None
175 175 tryclose(self.pipeo)
176 176 self.pipeo = None
177 177 tryclose(self.pipee)
178 178 self.pipee = None
179 179 try:
180 180 # Wait for process to terminate, making sure to avoid deadlock.
181 181 # See https://docs.python.org/2/library/subprocess.html for
182 182 # warnings about wait() and deadlocking.
183 183 self.subprocess.communicate()
184 184 except Exception:
185 185 pass
186 186 self.subprocess = None
187 187 self.connected = False
188 188
189 189 def request(self, request, flush=True):
190 190 if self.connected:
191 191 try:
192 192 self.pipei.write(request)
193 193 if flush:
194 194 self.pipei.flush()
195 195 except IOError:
196 196 self.close()
197 197
198 198 def receiveline(self):
199 199 if not self.connected:
200 200 return None
201 201 try:
202 202 result = self.pipeo.readline()[:-1]
203 203 if not result:
204 204 self.close()
205 205 except IOError:
206 206 self.close()
207 207
208 208 return result
209 209
210 210
211 211 def _getfilesbatch(
212 212 remote, receivemissing, progresstick, missed, idmap, batchsize
213 213 ):
214 214 # Over http(s), iterbatch is a streamy method and we can start
215 215 # looking at results early. This means we send one (potentially
216 216 # large) request, but then we show nice progress as we process
217 217 # file results, rather than showing chunks of $batchsize in
218 218 # progress.
219 219 #
220 220 # Over ssh, iterbatch isn't streamy because batch() wasn't
221 221 # explicitly designed as a streaming method. In the future we
222 222 # should probably introduce a streambatch() method upstream and
223 223 # use that for this.
224 224 with remote.commandexecutor() as e:
225 225 futures = []
226 226 for m in missed:
227 227 futures.append(
228 228 e.callcommand(
229 229 b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]}
230 230 )
231 231 )
232 232
233 233 for i, m in enumerate(missed):
234 234 r = futures[i].result()
235 235 futures[i] = None # release memory
236 236 file_ = idmap[m]
237 237 node = m[-40:]
238 238 receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node)
239 239 progresstick()
240 240
241 241
242 242 def _getfiles_optimistic(
243 243 remote, receivemissing, progresstick, missed, idmap, step
244 244 ):
245 245 remote._callstream(b"x_rfl_getfiles")
246 246 i = 0
247 247 pipeo = remote._pipeo
248 248 pipei = remote._pipei
249 249 while i < len(missed):
250 250 # issue a batch of requests
251 251 start = i
252 252 end = min(len(missed), start + step)
253 253 i = end
254 254 for missingid in missed[start:end]:
255 255 # issue new request
256 256 versionid = missingid[-40:]
257 257 file = idmap[missingid]
258 258 sshrequest = b"%s%s\n" % (versionid, file)
259 259 pipeo.write(sshrequest)
260 260 pipeo.flush()
261 261
262 262 # receive batch results
263 263 for missingid in missed[start:end]:
264 264 versionid = missingid[-40:]
265 265 file = idmap[missingid]
266 266 receivemissing(pipei, file, versionid)
267 267 progresstick()
268 268
269 269 # End the command
270 270 pipeo.write(b'\n')
271 271 pipeo.flush()
272 272
273 273
274 274 def _getfiles_threaded(
275 275 remote, receivemissing, progresstick, missed, idmap, step
276 276 ):
277 277 remote._callstream(b"x_rfl_getfiles")
278 278 pipeo = remote._pipeo
279 279 pipei = remote._pipei
280 280
281 281 def writer():
282 282 for missingid in missed:
283 283 versionid = missingid[-40:]
284 284 file = idmap[missingid]
285 285 sshrequest = b"%s%s\n" % (versionid, file)
286 286 pipeo.write(sshrequest)
287 287 pipeo.flush()
288 288
289 289 writerthread = threading.Thread(target=writer)
290 290 writerthread.daemon = True
291 291 writerthread.start()
292 292
293 293 for missingid in missed:
294 294 versionid = missingid[-40:]
295 295 file = idmap[missingid]
296 296 receivemissing(pipei, file, versionid)
297 297 progresstick()
298 298
299 299 writerthread.join()
300 300 # End the command
301 301 pipeo.write(b'\n')
302 302 pipeo.flush()
303 303
304 304
305 305 class fileserverclient:
306 306 """A client for requesting files from the remote file server."""
307 307
308 308 def __init__(self, repo):
309 309 ui = repo.ui
310 310 self.repo = repo
311 311 self.ui = ui
312 312 self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess")
313 313 if self.cacheprocess:
314 314 self.cacheprocess = util.expandpath(self.cacheprocess)
315 315
316 316 # This option causes remotefilelog to pass the full file path to the
317 317 # cacheprocess instead of a hashed key.
318 318 self.cacheprocesspasspath = ui.configbool(
319 319 b"remotefilelog", b"cacheprocess.includepath"
320 320 )
321 321
322 322 self.debugoutput = ui.configbool(b"remotefilelog", b"debug")
323 323
324 324 self.remotecache = cacheconnection()
325 325
326 326 def setstore(self, datastore, historystore, writedata, writehistory):
327 327 self.datastore = datastore
328 328 self.historystore = historystore
329 329 self.writedata = writedata
330 330 self.writehistory = writehistory
331 331
332 332 def _connect(self):
333 333 return self.repo.connectionpool.get(self.repo.fallbackpath)
334 334
335 335 def request(self, fileids):
336 336 """Takes a list of filename/node pairs and fetches them from the
337 337 server. Files are stored in the local cache.
338 338 A list of nodes that the server couldn't find is returned.
339 339 If the connection fails, an exception is raised.
340 340 """
341 341 if not self.remotecache.connected:
342 342 self.connect()
343 343 cache = self.remotecache
344 344 writedata = self.writedata
345 345
346 346 repo = self.repo
347 347 total = len(fileids)
348 348 request = b"get\n%d\n" % total
349 349 idmap = {}
350 350 reponame = repo.name
351 351 for file, id in fileids:
352 352 fullid = getcachekey(reponame, file, id)
353 353 if self.cacheprocesspasspath:
354 354 request += file + b'\0'
355 355 request += fullid + b"\n"
356 356 idmap[fullid] = file
357 357
358 358 cache.request(request)
359 359
360 360 progress = self.ui.makeprogress(_(b'downloading'), total=total)
361 361 progress.update(0)
362 362
363 363 missed = []
364 364 while True:
365 365 missingid = cache.receiveline()
366 366 if not missingid:
367 367 missedset = set(missed)
368 368 for missingid in idmap:
369 369 if not missingid in missedset:
370 370 missed.append(missingid)
371 371 self.ui.warn(
372 372 _(
373 373 b"warning: cache connection closed early - "
374 374 + b"falling back to server\n"
375 375 )
376 376 )
377 377 break
378 378 if missingid == b"0":
379 379 break
380 380 if missingid.startswith(b"_hits_"):
381 381 # receive progress reports
382 382 parts = missingid.split(b"_")
383 383 progress.increment(int(parts[2]))
384 384 continue
385 385
386 386 missed.append(missingid)
387 387
388 388 global fetchmisses
389 389 fetchmisses += len(missed)
390 390
391 391 fromcache = total - len(missed)
392 392 progress.update(fromcache, total=total)
393 393 self.ui.log(
394 394 b"remotefilelog",
395 395 b"remote cache hit rate is %r of %r\n",
396 396 fromcache,
397 397 total,
398 398 hit=fromcache,
399 399 total=total,
400 400 )
401 401
402 402 oldumask = os.umask(0o002)
403 403 try:
404 404 # receive cache misses from master
405 405 if missed:
406 406 # When verbose is true, sshpeer prints 'running ssh...'
407 407 # to stdout, which can interfere with some command
408 408 # outputs
409 409 verbose = self.ui.verbose
410 410 self.ui.verbose = False
411 411 try:
412 412 with self._connect() as conn:
413 413 remote = conn.peer
414 414 if remote.capable(
415 415 constants.NETWORK_CAP_LEGACY_SSH_GETFILES
416 416 ):
417 417 if not isinstance(remote, _sshv1peer):
418 418 raise error.Abort(
419 419 b'remotefilelog requires ssh servers'
420 420 )
421 421 step = self.ui.configint(
422 422 b'remotefilelog', b'getfilesstep'
423 423 )
424 424 getfilestype = self.ui.config(
425 425 b'remotefilelog', b'getfilestype'
426 426 )
427 427 if getfilestype == b'threaded':
428 428 _getfiles = _getfiles_threaded
429 429 else:
430 430 _getfiles = _getfiles_optimistic
431 431 _getfiles(
432 432 remote,
433 433 self.receivemissing,
434 434 progress.increment,
435 435 missed,
436 436 idmap,
437 437 step,
438 438 )
439 439 elif remote.capable(b"x_rfl_getfile"):
440 440 if remote.capable(b'batch'):
441 441 batchdefault = 100
442 442 else:
443 443 batchdefault = 10
444 444 batchsize = self.ui.configint(
445 445 b'remotefilelog', b'batchsize', batchdefault
446 446 )
447 447 self.ui.debug(
448 448 b'requesting %d files from '
449 449 b'remotefilelog server...\n' % len(missed)
450 450 )
451 451 _getfilesbatch(
452 452 remote,
453 453 self.receivemissing,
454 454 progress.increment,
455 455 missed,
456 456 idmap,
457 457 batchsize,
458 458 )
459 459 else:
460 460 raise error.Abort(
461 461 b"configured remotefilelog server"
462 462 b" does not support remotefilelog"
463 463 )
464 464
465 465 self.ui.log(
466 466 b"remotefilefetchlog",
467 467 b"Success\n",
468 468 fetched_files=progress.pos - fromcache,
469 469 total_to_fetch=total - fromcache,
470 470 )
471 471 except Exception:
472 472 self.ui.log(
473 473 b"remotefilefetchlog",
474 474 b"Fail\n",
475 475 fetched_files=progress.pos - fromcache,
476 476 total_to_fetch=total - fromcache,
477 477 )
478 478 raise
479 479 finally:
480 480 self.ui.verbose = verbose
481 481 # send to memcache
482 482 request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed))
483 483 cache.request(request)
484 484
485 485 progress.complete()
486 486
487 487 # mark ourselves as a user of this cache
488 488 writedata.markrepo(self.repo.path)
489 489 finally:
490 490 os.umask(oldumask)
491 491
492 492 def receivemissing(self, pipe, filename, node):
493 493 line = pipe.readline()[:-1]
494 494 if not line:
495 495 raise error.ResponseError(
496 496 _(b"error downloading file contents:"),
497 497 _(b"connection closed early"),
498 498 )
499 499 size = int(line)
500 500 data = pipe.read(size)
501 501 if len(data) != size:
502 502 raise error.ResponseError(
503 503 _(b"error downloading file contents:"),
504 504 _(b"only received %s of %s bytes") % (len(data), size),
505 505 )
506 506
507 507 self.writedata.addremotefilelognode(
508 508 filename, bin(node), zlib.decompress(data)
509 509 )
510 510
511 511 def connect(self):
512 512 if self.cacheprocess:
513 513 cmd = b"%s %s" % (self.cacheprocess, self.writedata._path)
514 514 self.remotecache.connect(cmd)
515 515 else:
516 516 # If no cache process is specified, we fake one that always
517 517 # returns cache misses. This enables tests to run easily
518 518 # and may eventually allow us to be a drop in replacement
519 519 # for the largefiles extension.
520 520 class simplecache:
521 521 def __init__(self):
522 522 self.missingids = []
523 523 self.connected = True
524 524
525 525 def close(self):
526 526 pass
527 527
528 528 def request(self, value, flush=True):
529 529 lines = value.split(b"\n")
530 530 if lines[0] != b"get":
531 531 return
532 532 self.missingids = lines[2:-1]
533 533 self.missingids.append(b'0')
534 534
535 535 def receiveline(self):
536 536 if len(self.missingids) > 0:
537 537 return self.missingids.pop(0)
538 538 return None
539 539
540 540 self.remotecache = simplecache()
541 541
542 542 def close(self):
543 543 if fetches:
544 544 msg = (
545 545 b"%d files fetched over %d fetches - "
546 546 + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n"
547 547 ) % (
548 548 fetched,
549 549 fetches,
550 550 fetchmisses,
551 551 float(fetched - fetchmisses) / float(fetched) * 100.0,
552 552 fetchcost,
553 553 )
554 554 if self.debugoutput:
555 555 self.ui.warn(msg)
556 556 self.ui.log(
557 557 b"remotefilelog.prefetch",
558 558 msg.replace(b"%", b"%%"),
559 559 remotefilelogfetched=fetched,
560 560 remotefilelogfetches=fetches,
561 561 remotefilelogfetchmisses=fetchmisses,
562 562 remotefilelogfetchtime=fetchcost * 1000,
563 563 )
564 564
565 565 if self.remotecache.connected:
566 566 self.remotecache.close()
567 567
568 568 def prefetch(
569 569 self, fileids, force=False, fetchdata=True, fetchhistory=False
570 570 ):
571 571 """downloads the given file versions to the cache"""
572 572 repo = self.repo
573 573 idstocheck = []
574 574 for file, id in fileids:
575 575 # hack
576 576 # - we don't use .hgtags
577 577 # - workingctx produces ids with length 42,
578 578 # which we skip since they aren't in any cache
579 579 if (
580 580 file == b'.hgtags'
581 581 or len(id) == 42
582 582 or not repo.shallowmatch(file)
583 583 ):
584 584 continue
585 585
586 586 idstocheck.append((file, bin(id)))
587 587
588 588 datastore = self.datastore
589 589 historystore = self.historystore
590 590 if force:
591 591 datastore = contentstore.unioncontentstore(*repo.shareddatastores)
592 592 historystore = metadatastore.unionmetadatastore(
593 593 *repo.sharedhistorystores
594 594 )
595 595
596 596 missingids = set()
597 597 if fetchdata:
598 598 missingids.update(datastore.getmissing(idstocheck))
599 599 if fetchhistory:
600 600 missingids.update(historystore.getmissing(idstocheck))
601 601
602 602 # partition missing nodes into nullid and not-nullid so we can
603 603 # warn about this filtering potentially shadowing bugs.
604 604 nullids = len(
605 605 [None for unused, id in missingids if id == self.repo.nullid]
606 606 )
607 607 if nullids:
608 608 missingids = [
609 609 (f, id) for f, id in missingids if id != self.repo.nullid
610 610 ]
611 611 repo.ui.develwarn(
612 612 (
613 613 b'remotefilelog not fetching %d null revs'
614 614 b' - this is likely hiding bugs' % nullids
615 615 ),
616 616 config=b'remotefilelog-ext',
617 617 )
618 618 if missingids:
619 619 global fetches, fetched, fetchcost
620 620 fetches += 1
621 621
622 622 # We want to be able to detect excess individual file downloads, so
623 623 # let's log that information for debugging.
624 624 if fetches >= 15 and fetches < 18:
625 625 if fetches == 15:
626 626 fetchwarning = self.ui.config(
627 627 b'remotefilelog', b'fetchwarning'
628 628 )
629 629 if fetchwarning:
630 630 self.ui.warn(fetchwarning + b'\n')
631 631 self.logstacktrace()
632 632 missingids = [(file, hex(id)) for file, id in sorted(missingids)]
633 633 fetched += len(missingids)
634 634 start = time.time()
635 635 missingids = self.request(missingids)
636 636 if missingids:
637 637 raise error.Abort(
638 638 _(b"unable to download %d files") % len(missingids)
639 639 )
640 640 fetchcost += time.time() - start
641 641 self._lfsprefetch(fileids)
642 642
643 643 def _lfsprefetch(self, fileids):
644 if not _lfsmod or not hasattr(self.repo.svfs, b'lfslocalblobstore'):
644 if not _lfsmod or not hasattr(self.repo.svfs, 'lfslocalblobstore'):
645 645 return
646 646 if not _lfsmod.wrapper.candownload(self.repo):
647 647 return
648 648 pointers = []
649 649 store = self.repo.svfs.lfslocalblobstore
650 650 for file, id in fileids:
651 651 node = bin(id)
652 652 rlog = self.repo.file(file)
653 653 if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
654 654 text = rlog.rawdata(node)
655 655 p = _lfsmod.pointer.deserialize(text)
656 656 oid = p.oid()
657 657 if not store.has(oid):
658 658 pointers.append(p)
659 659 if len(pointers) > 0:
660 660 self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
661 661 assert all(store.has(p.oid()) for p in pointers)
662 662
663 663 def logstacktrace(self):
664 664 import traceback
665 665
666 666 self.ui.log(
667 667 b'remotefilelog',
668 668 b'excess remotefilelog fetching:\n%s\n',
669 669 b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()),
670 670 )
General Comments 0
You need to be logged in to leave comments. Login now