##// END OF EJS Templates
safehasattr: drop usage in favor of hasattr...
marmoute -
r51821:d718eddf default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -1,125 +1,124 b''
1 1 # __init__.py - asv benchmark suite
2 2 #
3 3 # Copyright 2016 Logilab SA <contact@logilab.fr>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # "historical portability" policy of contrib/benchmarks:
9 9 #
10 10 # We have to make this code work correctly with current mercurial stable branch
11 11 # and if possible with reasonable cost with early Mercurial versions.
12 12
13 13 '''ASV (https://asv.readthedocs.io) benchmark suite
14 14
15 15 Benchmark are parameterized against reference repositories found in the
16 16 directory pointed by the REPOS_DIR environment variable.
17 17
18 18 Invocation example:
19 19
20 20 $ export REPOS_DIR=~/hgperf/repos
21 21 # run suite on given revision
22 22 $ asv --config contrib/asv.conf.json run REV
23 23 # run suite on new changesets found in stable and default branch
24 24 $ asv --config contrib/asv.conf.json run NEW
25 25 # display a comparative result table of benchmark results between two given
26 26 # revisions
27 27 $ asv --config contrib/asv.conf.json compare REV1 REV2
28 28 # compute regression detection and generate ASV static website
29 29 $ asv --config contrib/asv.conf.json publish
30 30 # serve the static website
31 31 $ asv --config contrib/asv.conf.json preview
32 32 '''
33 33
34 34
35 35 import functools
36 36 import os
37 37 import re
38 38
39 39 from mercurial import (
40 40 extensions,
41 41 hg,
42 42 ui as uimod,
43 util,
44 43 )
45 44
46 45 basedir = os.path.abspath(
47 46 os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)
48 47 )
49 48 reposdir = os.environ['REPOS_DIR']
50 49 reposnames = [
51 50 name
52 51 for name in os.listdir(reposdir)
53 52 if os.path.isdir(os.path.join(reposdir, name, ".hg"))
54 53 ]
55 54 if not reposnames:
56 55 raise ValueError("No repositories found in $REPO_DIR")
57 56 outputre = re.compile(
58 57 (
59 58 r'! wall (\d+.\d+) comb \d+.\d+ user \d+.\d+ sys '
60 59 r'\d+.\d+ \(best of \d+\)'
61 60 )
62 61 )
63 62
64 63
65 64 def runperfcommand(reponame, command, *args, **kwargs):
66 65 os.environ["HGRCPATH"] = os.environ.get("ASVHGRCPATH", "")
67 66 # for "historical portability"
68 67 # ui.load() has been available since d83ca85
69 if util.safehasattr(uimod.ui, "load"):
68 if hasattr(uimod.ui, "load"):
70 69 ui = uimod.ui.load()
71 70 else:
72 71 ui = uimod.ui()
73 72 repo = hg.repository(ui, os.path.join(reposdir, reponame))
74 73 perfext = extensions.load(
75 74 ui, 'perfext', os.path.join(basedir, 'contrib', 'perf.py')
76 75 )
77 76 cmd = getattr(perfext, command)
78 77 ui.pushbuffer()
79 78 cmd(ui, repo, *args, **kwargs)
80 79 output = ui.popbuffer()
81 80 match = outputre.search(output)
82 81 if not match:
83 82 raise ValueError("Invalid output {}".format(output))
84 83 return float(match.group(1))
85 84
86 85
87 86 def perfbench(repos=reposnames, name=None, params=None):
88 87 """decorator to declare ASV benchmark based on contrib/perf.py extension
89 88
90 89 An ASV benchmark is a python function with the given attributes:
91 90
92 91 __name__: should start with track_, time_ or mem_ to be collected by ASV
93 92 params and param_name: parameter matrix to display multiple graphs on the
94 93 same page.
95 94 pretty_name: If defined it's displayed in web-ui instead of __name__
96 95 (useful for revsets)
97 96 the module name is prepended to the benchmark name and displayed as
98 97 "category" in webui.
99 98
100 99 Benchmarks are automatically parameterized with repositories found in the
101 100 REPOS_DIR environment variable.
102 101
103 102 `params` is the param matrix in the form of a list of tuple
104 103 (param_name, [value0, value1])
105 104
106 105 For example [(x, [a, b]), (y, [c, d])] declare benchmarks for
107 106 (a, c), (a, d), (b, c) and (b, d).
108 107 """
109 108 params = list(params or [])
110 109 params.insert(0, ("repo", repos))
111 110
112 111 def decorator(func):
113 112 @functools.wraps(func)
114 113 def wrapped(repo, *args):
115 114 def perf(command, *a, **kw):
116 115 return runperfcommand(repo, command, *a, **kw)
117 116
118 117 return func(perf, *args)
119 118
120 119 wrapped.params = [p[1] for p in params]
121 120 wrapped.param_names = [p[0] for p in params]
122 121 wrapped.pretty_name = name
123 122 return wrapped
124 123
125 124 return decorator
@@ -1,1162 +1,1162 b''
1 1 # absorb.py
2 2 #
3 3 # Copyright 2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """apply working directory changes to changesets (EXPERIMENTAL)
9 9
10 10 The absorb extension provides a command to use annotate information to
11 11 amend modified chunks into the corresponding non-public changesets.
12 12
13 13 ::
14 14
15 15 [absorb]
16 16 # only check 50 recent non-public changesets at most
17 17 max-stack-size = 50
18 18 # whether to add noise to new commits to avoid obsolescence cycle
19 19 add-noise = 1
20 20 # make `amend --correlated` a shortcut to the main command
21 21 amend-flag = correlated
22 22
23 23 [color]
24 24 absorb.description = yellow
25 25 absorb.node = blue bold
26 26 absorb.path = bold
27 27 """
28 28
29 29 # TODO:
30 30 # * Rename config items to [commands] namespace
31 31 # * Converge getdraftstack() with other code in core
32 32 # * move many attributes on fixupstate to be private
33 33
34 34
35 35 import collections
36 36
37 37 from mercurial.i18n import _
38 38 from mercurial.node import (
39 39 hex,
40 40 short,
41 41 )
42 42 from mercurial import (
43 43 cmdutil,
44 44 commands,
45 45 context,
46 46 crecord,
47 47 error,
48 48 linelog,
49 49 mdiff,
50 50 obsolete,
51 51 patch,
52 52 phases,
53 53 pycompat,
54 54 registrar,
55 55 rewriteutil,
56 56 scmutil,
57 57 util,
58 58 )
59 59 from mercurial.utils import stringutil
60 60
61 61 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
62 62 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
63 63 # be specifying the version(s) of Mercurial they are tested with, or
64 64 # leave the attribute unspecified.
65 65 testedwith = b'ships-with-hg-core'
66 66
67 67 cmdtable = {}
68 68 command = registrar.command(cmdtable)
69 69
70 70 configtable = {}
71 71 configitem = registrar.configitem(configtable)
72 72
73 73 configitem(b'absorb', b'add-noise', default=True)
74 74 configitem(b'absorb', b'amend-flag', default=None)
75 75 configitem(b'absorb', b'max-stack-size', default=50)
76 76
77 77 colortable = {
78 78 b'absorb.description': b'yellow',
79 79 b'absorb.node': b'blue bold',
80 80 b'absorb.path': b'bold',
81 81 }
82 82
83 83 defaultdict = collections.defaultdict
84 84
85 85
86 86 class nullui:
87 87 """blank ui object doing nothing"""
88 88
89 89 debugflag = False
90 90 verbose = False
91 91 quiet = True
92 92
93 93 def __getitem__(name):
94 94 def nullfunc(*args, **kwds):
95 95 return
96 96
97 97 return nullfunc
98 98
99 99
100 100 class emptyfilecontext:
101 101 """minimal filecontext representing an empty file"""
102 102
103 103 def __init__(self, repo):
104 104 self._repo = repo
105 105
106 106 def data(self):
107 107 return b''
108 108
109 109 def node(self):
110 110 return self._repo.nullid
111 111
112 112
113 113 def uniq(lst):
114 114 """list -> list. remove duplicated items without changing the order"""
115 115 seen = set()
116 116 result = []
117 117 for x in lst:
118 118 if x not in seen:
119 119 seen.add(x)
120 120 result.append(x)
121 121 return result
122 122
123 123
124 124 def getdraftstack(headctx, limit=None):
125 125 """(ctx, int?) -> [ctx]. get a linear stack of non-public changesets.
126 126
127 127 changesets are sorted in topo order, oldest first.
128 128 return at most limit items, if limit is a positive number.
129 129
130 130 merges are considered as non-draft as well. i.e. every commit
131 131 returned has and only has 1 parent.
132 132 """
133 133 ctx = headctx
134 134 result = []
135 135 while ctx.phase() != phases.public:
136 136 if limit and len(result) >= limit:
137 137 break
138 138 parents = ctx.parents()
139 139 if len(parents) != 1:
140 140 break
141 141 result.append(ctx)
142 142 ctx = parents[0]
143 143 result.reverse()
144 144 return result
145 145
146 146
147 147 def getfilestack(stack, path, seenfctxs=None):
148 148 """([ctx], str, set) -> [fctx], {ctx: fctx}
149 149
150 150 stack is a list of contexts, from old to new. usually they are what
151 151 "getdraftstack" returns.
152 152
153 153 follows renames, but not copies.
154 154
155 155 seenfctxs is a set of filecontexts that will be considered "immutable".
156 156 they are usually what this function returned in earlier calls, useful
157 157 to avoid issues that a file was "moved" to multiple places and was then
158 158 modified differently, like: "a" was copied to "b", "a" was also copied to
159 159 "c" and then "a" was deleted, then both "b" and "c" were "moved" from "a"
160 160 and we enforce only one of them to be able to affect "a"'s content.
161 161
162 162 return an empty list and an empty dict, if the specified path does not
163 163 exist in stack[-1] (the top of the stack).
164 164
165 165 otherwise, return a list of de-duplicated filecontexts, and the map to
166 166 convert ctx in the stack to fctx, for possible mutable fctxs. the first item
167 167 of the list would be outside the stack and should be considered immutable.
168 168 the remaining items are within the stack.
169 169
170 170 for example, given the following changelog and corresponding filelog
171 171 revisions:
172 172
173 173 changelog: 3----4----5----6----7
174 174 filelog: x 0----1----1----2 (x: no such file yet)
175 175
176 176 - if stack = [5, 6, 7], returns ([0, 1, 2], {5: 1, 6: 1, 7: 2})
177 177 - if stack = [3, 4, 5], returns ([e, 0, 1], {4: 0, 5: 1}), where "e" is a
178 178 dummy empty filecontext.
179 179 - if stack = [2], returns ([], {})
180 180 - if stack = [7], returns ([1, 2], {7: 2})
181 181 - if stack = [6, 7], returns ([1, 2], {6: 1, 7: 2}), although {6: 1} can be
182 182 removed, since 1 is immutable.
183 183 """
184 184 if seenfctxs is None:
185 185 seenfctxs = set()
186 186 assert stack
187 187
188 188 if path not in stack[-1]:
189 189 return [], {}
190 190
191 191 fctxs = []
192 192 fctxmap = {}
193 193
194 194 pctx = stack[0].p1() # the public (immutable) ctx we stop at
195 195 for ctx in reversed(stack):
196 196 if path not in ctx: # the file is added in the next commit
197 197 pctx = ctx
198 198 break
199 199 fctx = ctx[path]
200 200 fctxs.append(fctx)
201 201 if fctx in seenfctxs: # treat fctx as the immutable one
202 202 pctx = None # do not add another immutable fctx
203 203 break
204 204 fctxmap[ctx] = fctx # only for mutable fctxs
205 205 copy = fctx.copysource()
206 206 if copy:
207 207 path = copy # follow rename
208 208 if path in ctx: # but do not follow copy
209 209 pctx = ctx.p1()
210 210 break
211 211
212 212 if pctx is not None: # need an extra immutable fctx
213 213 if path in pctx:
214 214 fctxs.append(pctx[path])
215 215 else:
216 216 fctxs.append(emptyfilecontext(pctx.repo()))
217 217
218 218 fctxs.reverse()
219 219 # note: we rely on a property of hg: filerev is not reused for linear
220 220 # history. i.e. it's impossible to have:
221 221 # changelog: 4----5----6 (linear, no merges)
222 222 # filelog: 1----2----1
223 223 # ^ reuse filerev (impossible)
224 224 # because parents are part of the hash. if that's not true, we need to
225 225 # remove uniq and find a different way to identify fctxs.
226 226 return uniq(fctxs), fctxmap
227 227
228 228
229 229 class overlaystore(patch.filestore):
230 230 """read-only, hybrid store based on a dict and ctx.
231 231 memworkingcopy: {path: content}, overrides file contents.
232 232 """
233 233
234 234 def __init__(self, basectx, memworkingcopy):
235 235 self.basectx = basectx
236 236 self.memworkingcopy = memworkingcopy
237 237
238 238 def getfile(self, path):
239 239 """comply with mercurial.patch.filestore.getfile"""
240 240 if path not in self.basectx:
241 241 return None, None, None
242 242 fctx = self.basectx[path]
243 243 if path in self.memworkingcopy:
244 244 content = self.memworkingcopy[path]
245 245 else:
246 246 content = fctx.data()
247 247 mode = (fctx.islink(), fctx.isexec())
248 248 copy = fctx.copysource()
249 249 return content, mode, copy
250 250
251 251
252 252 def overlaycontext(memworkingcopy, ctx, parents=None, extra=None, desc=None):
253 253 """({path: content}, ctx, (p1node, p2node)?, {}?) -> memctx
254 254 memworkingcopy overrides file contents.
255 255 """
256 256 # parents must contain 2 items: (node1, node2)
257 257 if parents is None:
258 258 parents = ctx.repo().changelog.parents(ctx.node())
259 259 if extra is None:
260 260 extra = ctx.extra()
261 261 if desc is None:
262 262 desc = ctx.description()
263 263 date = ctx.date()
264 264 user = ctx.user()
265 265 files = set(ctx.files()).union(memworkingcopy)
266 266 store = overlaystore(ctx, memworkingcopy)
267 267 return context.memctx(
268 268 repo=ctx.repo(),
269 269 parents=parents,
270 270 text=desc,
271 271 files=files,
272 272 filectxfn=store,
273 273 user=user,
274 274 date=date,
275 275 branch=None,
276 276 extra=extra,
277 277 )
278 278
279 279
280 280 class filefixupstate:
281 281 """state needed to apply fixups to a single file
282 282
283 283 internally, it keeps file contents of several revisions and a linelog.
284 284
285 285 the linelog uses odd revision numbers for original contents (fctxs passed
286 286 to __init__), and even revision numbers for fixups, like:
287 287
288 288 linelog rev 1: self.fctxs[0] (from an immutable "public" changeset)
289 289 linelog rev 2: fixups made to self.fctxs[0]
290 290 linelog rev 3: self.fctxs[1] (a child of fctxs[0])
291 291 linelog rev 4: fixups made to self.fctxs[1]
292 292 ...
293 293
294 294 a typical use is like:
295 295
296 296 1. call diffwith, to calculate self.fixups
297 297 2. (optionally), present self.fixups to the user, or change it
298 298 3. call apply, to apply changes
299 299 4. read results from "finalcontents", or call getfinalcontent
300 300 """
301 301
302 302 def __init__(self, fctxs, path, ui=None, **opts):
303 303 """([fctx], ui or None) -> None
304 304
305 305 fctxs should be linear, and sorted by topo order - oldest first.
306 306 fctxs[0] will be considered as "immutable" and will not be changed.
307 307 """
308 308 self.fctxs = fctxs
309 309 self.path = path
310 310 self.ui = ui or nullui()
311 311 self.opts = opts
312 312
313 313 # following fields are built from fctxs. they exist for perf reason
314 314 self.contents = [f.data() for f in fctxs]
315 315 self.contentlines = pycompat.maplist(mdiff.splitnewlines, self.contents)
316 316 self.linelog = self._buildlinelog()
317 317 if self.ui.debugflag:
318 318 assert self._checkoutlinelog() == self.contents
319 319
320 320 # following fields will be filled later
321 321 self.chunkstats = [0, 0] # [adopted, total : int]
322 322 self.targetlines = [] # [str]
323 323 self.fixups = [] # [(linelog rev, a1, a2, b1, b2)]
324 324 self.finalcontents = [] # [str]
325 325 self.ctxaffected = set()
326 326
327 327 def diffwith(self, targetfctx, fm=None):
328 328 """calculate fixups needed by examining the differences between
329 329 self.fctxs[-1] and targetfctx, chunk by chunk.
330 330
331 331 targetfctx is the target state we move towards. we may or may not be
332 332 able to get there because not all modified chunks can be amended into
333 333 a non-public fctx unambiguously.
334 334
335 335 call this only once, before apply().
336 336
337 337 update self.fixups, self.chunkstats, and self.targetlines.
338 338 """
339 339 a = self.contents[-1]
340 340 alines = self.contentlines[-1]
341 341 b = targetfctx.data()
342 342 blines = mdiff.splitnewlines(b)
343 343 self.targetlines = blines
344 344
345 345 self.linelog.annotate(self.linelog.maxrev)
346 346 annotated = self.linelog.annotateresult # [(linelog rev, linenum)]
347 347 assert len(annotated) == len(alines)
348 348 # add a dummy end line to make insertion at the end easier
349 349 if annotated:
350 350 dummyendline = (annotated[-1][0], annotated[-1][1] + 1)
351 351 annotated.append(dummyendline)
352 352
353 353 # analyse diff blocks
354 354 for chunk in self._alldiffchunks(a, b, alines, blines):
355 355 newfixups = self._analysediffchunk(chunk, annotated)
356 356 self.chunkstats[0] += bool(newfixups) # 1 or 0
357 357 self.chunkstats[1] += 1
358 358 self.fixups += newfixups
359 359 if fm is not None:
360 360 self._showchanges(fm, alines, blines, chunk, newfixups)
361 361
362 362 def apply(self):
363 363 """apply self.fixups. update self.linelog, self.finalcontents.
364 364
365 365 call this only once, before getfinalcontent(), after diffwith().
366 366 """
367 367 # the following is unnecessary, as it's done by "diffwith":
368 368 # self.linelog.annotate(self.linelog.maxrev)
369 369 for rev, a1, a2, b1, b2 in reversed(self.fixups):
370 370 blines = self.targetlines[b1:b2]
371 371 if self.ui.debugflag:
372 372 idx = (max(rev - 1, 0)) // 2
373 373 self.ui.write(
374 374 _(b'%s: chunk %d:%d -> %d lines\n')
375 375 % (short(self.fctxs[idx].node()), a1, a2, len(blines))
376 376 )
377 377 self.linelog.replacelines(rev, a1, a2, b1, b2)
378 378 if self.opts.get('edit_lines', False):
379 379 self.finalcontents = self._checkoutlinelogwithedits()
380 380 else:
381 381 self.finalcontents = self._checkoutlinelog()
382 382
383 383 def getfinalcontent(self, fctx):
384 384 """(fctx) -> str. get modified file content for a given filecontext"""
385 385 idx = self.fctxs.index(fctx)
386 386 return self.finalcontents[idx]
387 387
388 388 def _analysediffchunk(self, chunk, annotated):
389 389 """analyse a different chunk and return new fixups found
390 390
391 391 return [] if no lines from the chunk can be safely applied.
392 392
393 393 the chunk (or lines) cannot be safely applied, if, for example:
394 394 - the modified (deleted) lines belong to a public changeset
395 395 (self.fctxs[0])
396 396 - the chunk is a pure insertion and the adjacent lines (at most 2
397 397 lines) belong to different non-public changesets, or do not belong
398 398 to any non-public changesets.
399 399 - the chunk is modifying lines from different changesets.
400 400 in this case, if the number of lines deleted equals to the number
401 401 of lines added, assume it's a simple 1:1 map (could be wrong).
402 402 otherwise, give up.
403 403 - the chunk is modifying lines from a single non-public changeset,
404 404 but other revisions touch the area as well. i.e. the lines are
405 405 not continuous as seen from the linelog.
406 406 """
407 407 a1, a2, b1, b2 = chunk
408 408 # find involved indexes from annotate result
409 409 involved = annotated[a1:a2]
410 410 if not involved and annotated: # a1 == a2 and a is not empty
411 411 # pure insertion, check nearby lines. ignore lines belong
412 412 # to the public (first) changeset (i.e. annotated[i][0] == 1)
413 413 nearbylinenums = {a2, max(0, a1 - 1)}
414 414 involved = [
415 415 annotated[i] for i in nearbylinenums if annotated[i][0] != 1
416 416 ]
417 417 involvedrevs = list({r for r, l in involved})
418 418 newfixups = []
419 419 if len(involvedrevs) == 1 and self._iscontinuous(a1, a2 - 1, True):
420 420 # chunk belongs to a single revision
421 421 rev = involvedrevs[0]
422 422 if rev > 1:
423 423 fixuprev = rev + 1
424 424 newfixups.append((fixuprev, a1, a2, b1, b2))
425 425 elif a2 - a1 == b2 - b1 or b1 == b2:
426 426 # 1:1 line mapping, or chunk was deleted
427 427 for i in range(a1, a2):
428 428 rev, linenum = annotated[i]
429 429 if rev > 1:
430 430 if b1 == b2: # deletion, simply remove that single line
431 431 nb1 = nb2 = 0
432 432 else: # 1:1 line mapping, change the corresponding rev
433 433 nb1 = b1 + i - a1
434 434 nb2 = nb1 + 1
435 435 fixuprev = rev + 1
436 436 newfixups.append((fixuprev, i, i + 1, nb1, nb2))
437 437 return self._optimizefixups(newfixups)
438 438
439 439 @staticmethod
440 440 def _alldiffchunks(a, b, alines, blines):
441 441 """like mdiff.allblocks, but only care about differences"""
442 442 blocks = mdiff.allblocks(a, b, lines1=alines, lines2=blines)
443 443 for chunk, btype in blocks:
444 444 if btype != b'!':
445 445 continue
446 446 yield chunk
447 447
448 448 def _buildlinelog(self):
449 449 """calculate the initial linelog based on self.content{,line}s.
450 450 this is similar to running a partial "annotate".
451 451 """
452 452 llog = linelog.linelog()
453 453 a, alines = b'', []
454 454 for i in range(len(self.contents)):
455 455 b, blines = self.contents[i], self.contentlines[i]
456 456 llrev = i * 2 + 1
457 457 chunks = self._alldiffchunks(a, b, alines, blines)
458 458 for a1, a2, b1, b2 in reversed(list(chunks)):
459 459 llog.replacelines(llrev, a1, a2, b1, b2)
460 460 a, alines = b, blines
461 461 return llog
462 462
463 463 def _checkoutlinelog(self):
464 464 """() -> [str]. check out file contents from linelog"""
465 465 contents = []
466 466 for i in range(len(self.contents)):
467 467 rev = (i + 1) * 2
468 468 self.linelog.annotate(rev)
469 469 content = b''.join(map(self._getline, self.linelog.annotateresult))
470 470 contents.append(content)
471 471 return contents
472 472
473 473 def _checkoutlinelogwithedits(self):
474 474 """() -> [str]. prompt all lines for edit"""
475 475 alllines = self.linelog.getalllines()
476 476 # header
477 477 editortext = (
478 478 _(
479 479 b'HG: editing %s\nHG: "y" means the line to the right '
480 480 b'exists in the changeset to the top\nHG:\n'
481 481 )
482 482 % self.fctxs[-1].path()
483 483 )
484 484 # [(idx, fctx)]. hide the dummy emptyfilecontext
485 485 visiblefctxs = [
486 486 (i, f)
487 487 for i, f in enumerate(self.fctxs)
488 488 if not isinstance(f, emptyfilecontext)
489 489 ]
490 490 for i, (j, f) in enumerate(visiblefctxs):
491 491 editortext += _(b'HG: %s/%s %s %s\n') % (
492 492 b'|' * i,
493 493 b'-' * (len(visiblefctxs) - i + 1),
494 494 short(f.node()),
495 495 f.description().split(b'\n', 1)[0],
496 496 )
497 497 editortext += _(b'HG: %s\n') % (b'|' * len(visiblefctxs))
498 498 # figure out the lifetime of a line, this is relatively inefficient,
499 499 # but probably fine
500 500 lineset = defaultdict(lambda: set()) # {(llrev, linenum): {llrev}}
501 501 for i, f in visiblefctxs:
502 502 self.linelog.annotate((i + 1) * 2)
503 503 for l in self.linelog.annotateresult:
504 504 lineset[l].add(i)
505 505 # append lines
506 506 for l in alllines:
507 507 editortext += b' %s : %s' % (
508 508 b''.join(
509 509 [
510 510 (b'y' if i in lineset[l] else b' ')
511 511 for i, _f in visiblefctxs
512 512 ]
513 513 ),
514 514 self._getline(l),
515 515 )
516 516 # run editor
517 517 editedtext = self.ui.edit(editortext, b'', action=b'absorb')
518 518 if not editedtext:
519 519 raise error.InputError(_(b'empty editor text'))
520 520 # parse edited result
521 521 contents = [b''] * len(self.fctxs)
522 522 leftpadpos = 4
523 523 colonpos = leftpadpos + len(visiblefctxs) + 1
524 524 for l in mdiff.splitnewlines(editedtext):
525 525 if l.startswith(b'HG:'):
526 526 continue
527 527 if l[colonpos - 1 : colonpos + 2] != b' : ':
528 528 raise error.InputError(_(b'malformed line: %s') % l)
529 529 linecontent = l[colonpos + 2 :]
530 530 for i, ch in enumerate(
531 531 pycompat.bytestr(l[leftpadpos : colonpos - 1])
532 532 ):
533 533 if ch == b'y':
534 534 contents[visiblefctxs[i][0]] += linecontent
535 535 # chunkstats is hard to calculate if anything changes, therefore
536 536 # set them to just a simple value (1, 1).
537 537 if editedtext != editortext:
538 538 self.chunkstats = [1, 1]
539 539 return contents
540 540
541 541 def _getline(self, lineinfo):
542 542 """((rev, linenum)) -> str. convert rev+line number to line content"""
543 543 rev, linenum = lineinfo
544 544 if rev & 1: # odd: original line taken from fctxs
545 545 return self.contentlines[rev // 2][linenum]
546 546 else: # even: fixup line from targetfctx
547 547 return self.targetlines[linenum]
548 548
549 549 def _iscontinuous(self, a1, a2, closedinterval=False):
550 550 """(a1, a2 : int) -> bool
551 551
552 552 check if these lines are continuous. i.e. no other insertions or
553 553 deletions (from other revisions) among these lines.
554 554
555 555 closedinterval decides whether a2 should be included or not. i.e. is
556 556 it [a1, a2), or [a1, a2] ?
557 557 """
558 558 if a1 >= a2:
559 559 return True
560 560 llog = self.linelog
561 561 offset1 = llog.getoffset(a1)
562 562 offset2 = llog.getoffset(a2) + int(closedinterval)
563 563 linesinbetween = llog.getalllines(offset1, offset2)
564 564 return len(linesinbetween) == a2 - a1 + int(closedinterval)
565 565
566 566 def _optimizefixups(self, fixups):
567 567 """[(rev, a1, a2, b1, b2)] -> [(rev, a1, a2, b1, b2)].
568 568 merge adjacent fixups to make them less fragmented.
569 569 """
570 570 result = []
571 571 pcurrentchunk = [[-1, -1, -1, -1, -1]]
572 572
573 573 def pushchunk():
574 574 if pcurrentchunk[0][0] != -1:
575 575 result.append(tuple(pcurrentchunk[0]))
576 576
577 577 for i, chunk in enumerate(fixups):
578 578 rev, a1, a2, b1, b2 = chunk
579 579 lastrev = pcurrentchunk[0][0]
580 580 lasta2 = pcurrentchunk[0][2]
581 581 lastb2 = pcurrentchunk[0][4]
582 582 if (
583 583 a1 == lasta2
584 584 and b1 == lastb2
585 585 and rev == lastrev
586 586 and self._iscontinuous(max(a1 - 1, 0), a1)
587 587 ):
588 588 # merge into currentchunk
589 589 pcurrentchunk[0][2] = a2
590 590 pcurrentchunk[0][4] = b2
591 591 else:
592 592 pushchunk()
593 593 pcurrentchunk[0] = list(chunk)
594 594 pushchunk()
595 595 return result
596 596
597 597 def _showchanges(self, fm, alines, blines, chunk, fixups):
598 598 def trim(line):
599 599 if line.endswith(b'\n'):
600 600 line = line[:-1]
601 601 return line
602 602
603 603 # this is not optimized for perf but _showchanges only gets executed
604 604 # with an extra command-line flag.
605 605 a1, a2, b1, b2 = chunk
606 606 aidxs, bidxs = [0] * (a2 - a1), [0] * (b2 - b1)
607 607 for idx, fa1, fa2, fb1, fb2 in fixups:
608 608 for i in range(fa1, fa2):
609 609 aidxs[i - a1] = (max(idx, 1) - 1) // 2
610 610 for i in range(fb1, fb2):
611 611 bidxs[i - b1] = (max(idx, 1) - 1) // 2
612 612
613 613 fm.startitem()
614 614 fm.write(
615 615 b'hunk',
616 616 b' %s\n',
617 617 b'@@ -%d,%d +%d,%d @@' % (a1, a2 - a1, b1, b2 - b1),
618 618 label=b'diff.hunk',
619 619 )
620 620 fm.data(path=self.path, linetype=b'hunk')
621 621
622 622 def writeline(idx, diffchar, line, linetype, linelabel):
623 623 fm.startitem()
624 624 node = b''
625 625 if idx:
626 626 ctx = self.fctxs[idx]
627 627 fm.context(fctx=ctx)
628 628 node = ctx.hex()
629 629 self.ctxaffected.add(ctx.changectx())
630 630 fm.write(b'node', b'%-7.7s ', node, label=b'absorb.node')
631 631 fm.write(
632 632 b'diffchar ' + linetype,
633 633 b'%s%s\n',
634 634 diffchar,
635 635 line,
636 636 label=linelabel,
637 637 )
638 638 fm.data(path=self.path, linetype=linetype)
639 639
640 640 for i in range(a1, a2):
641 641 writeline(
642 642 aidxs[i - a1],
643 643 b'-',
644 644 trim(alines[i]),
645 645 b'deleted',
646 646 b'diff.deleted',
647 647 )
648 648 for i in range(b1, b2):
649 649 writeline(
650 650 bidxs[i - b1],
651 651 b'+',
652 652 trim(blines[i]),
653 653 b'inserted',
654 654 b'diff.inserted',
655 655 )
656 656
657 657
658 658 class fixupstate:
659 659 """state needed to run absorb
660 660
661 661 internally, it keeps paths and filefixupstates.
662 662
663 663 a typical use is like filefixupstates:
664 664
665 665 1. call diffwith, to calculate fixups
666 666 2. (optionally), present fixups to the user, or edit fixups
667 667 3. call apply, to apply changes to memory
668 668 4. call commit, to commit changes to hg database
669 669 """
670 670
671 671 def __init__(self, stack, ui=None, **opts):
672 672 """([ctx], ui or None) -> None
673 673
674 674 stack: should be linear, and sorted by topo order - oldest first.
675 675 all commits in stack are considered mutable.
676 676 """
677 677 assert stack
678 678 self.ui = ui or nullui()
679 679 self.opts = opts
680 680 self.stack = stack
681 681 self.repo = stack[-1].repo().unfiltered()
682 682
683 683 # following fields will be filled later
684 684 self.paths = [] # [str]
685 685 self.status = None # ctx.status output
686 686 self.fctxmap = {} # {path: {ctx: fctx}}
687 687 self.fixupmap = {} # {path: filefixupstate}
688 688 self.replacemap = {} # {oldnode: newnode or None}
689 689 self.finalnode = None # head after all fixups
690 690 self.ctxaffected = set() # ctx that will be absorbed into
691 691
692 692 def diffwith(self, targetctx, match=None, fm=None):
693 693 """diff and prepare fixups. update self.fixupmap, self.paths"""
694 694 # only care about modified files
695 695 self.status = self.stack[-1].status(targetctx, match)
696 696 self.paths = []
697 697 # but if --edit-lines is used, the user may want to edit files
698 698 # even if they are not modified
699 699 editopt = self.opts.get('edit_lines')
700 700 if not self.status.modified and editopt and match:
701 701 interestingpaths = match.files()
702 702 else:
703 703 interestingpaths = self.status.modified
704 704 # prepare the filefixupstate
705 705 seenfctxs = set()
706 706 # sorting is necessary to eliminate ambiguity for the "double move"
707 707 # case: "hg cp A B; hg cp A C; hg rm A", then only "B" can affect "A".
708 708 for path in sorted(interestingpaths):
709 709 self.ui.debug(b'calculating fixups for %s\n' % path)
710 710 targetfctx = targetctx[path]
711 711 fctxs, ctx2fctx = getfilestack(self.stack, path, seenfctxs)
712 712 # ignore symbolic links or binary, or unchanged files
713 713 if any(
714 714 f.islink() or stringutil.binary(f.data())
715 715 for f in [targetfctx] + fctxs
716 716 if not isinstance(f, emptyfilecontext)
717 717 ):
718 718 continue
719 719 if targetfctx.data() == fctxs[-1].data() and not editopt:
720 720 continue
721 721 seenfctxs.update(fctxs[1:])
722 722 self.fctxmap[path] = ctx2fctx
723 723 fstate = filefixupstate(fctxs, path, ui=self.ui, **self.opts)
724 724 if fm is not None:
725 725 fm.startitem()
726 726 fm.plain(b'showing changes for ')
727 727 fm.write(b'path', b'%s\n', path, label=b'absorb.path')
728 728 fm.data(linetype=b'path')
729 729 fstate.diffwith(targetfctx, fm)
730 730 self.fixupmap[path] = fstate
731 731 self.paths.append(path)
732 732 self.ctxaffected.update(fstate.ctxaffected)
733 733
734 734 def apply(self):
735 735 """apply fixups to individual filefixupstates"""
736 736 for path, state in self.fixupmap.items():
737 737 if self.ui.debugflag:
738 738 self.ui.write(_(b'applying fixups to %s\n') % path)
739 739 state.apply()
740 740
741 741 @property
742 742 def chunkstats(self):
743 743 """-> {path: chunkstats}. collect chunkstats from filefixupstates"""
744 744 return {path: state.chunkstats for path, state in self.fixupmap.items()}
745 745
746 746 def commit(self):
747 747 """commit changes. update self.finalnode, self.replacemap"""
748 748 with self.repo.transaction(b'absorb') as tr:
749 749 self._commitstack()
750 750 self._movebookmarks(tr)
751 751 if self.repo[b'.'].node() in self.replacemap:
752 752 self._moveworkingdirectoryparent()
753 753 self._cleanupoldcommits()
754 754 return self.finalnode
755 755
756 756 def printchunkstats(self):
757 757 """print things like '1 of 2 chunk(s) applied'"""
758 758 ui = self.ui
759 759 chunkstats = self.chunkstats
760 760 if ui.verbose:
761 761 # chunkstats for each file
762 762 for path, stat in chunkstats.items():
763 763 if stat[0]:
764 764 ui.write(
765 765 _(b'%s: %d of %d chunk(s) applied\n')
766 766 % (path, stat[0], stat[1])
767 767 )
768 768 elif not ui.quiet:
769 769 # a summary for all files
770 770 stats = chunkstats.values()
771 771 applied, total = (sum(s[i] for s in stats) for i in (0, 1))
772 772 ui.write(_(b'%d of %d chunk(s) applied\n') % (applied, total))
773 773
774 774 def _commitstack(self):
775 775 """make new commits. update self.finalnode, self.replacemap.
776 776 it is splitted from "commit" to avoid too much indentation.
777 777 """
778 778 # last node (20-char) committed by us
779 779 lastcommitted = None
780 780 # p1 which overrides the parent of the next commit, "None" means use
781 781 # the original parent unchanged
782 782 nextp1 = None
783 783 for ctx in self.stack:
784 784 memworkingcopy = self._getnewfilecontents(ctx)
785 785 if not memworkingcopy and not lastcommitted:
786 786 # nothing changed, nothing commited
787 787 nextp1 = ctx
788 788 continue
789 789 willbecomenoop = ctx.files() and self._willbecomenoop(
790 790 memworkingcopy, ctx, nextp1
791 791 )
792 792 if self.skip_empty_successor and willbecomenoop:
793 793 # changeset is no longer necessary
794 794 self.replacemap[ctx.node()] = None
795 795 msg = _(b'became empty and was dropped')
796 796 else:
797 797 # changeset needs re-commit
798 798 nodestr = self._commitsingle(memworkingcopy, ctx, p1=nextp1)
799 799 lastcommitted = self.repo[nodestr]
800 800 nextp1 = lastcommitted
801 801 self.replacemap[ctx.node()] = lastcommitted.node()
802 802 if memworkingcopy:
803 803 if willbecomenoop:
804 804 msg = _(b'%d file(s) changed, became empty as %s')
805 805 else:
806 806 msg = _(b'%d file(s) changed, became %s')
807 807 msg = msg % (
808 808 len(memworkingcopy),
809 809 self._ctx2str(lastcommitted),
810 810 )
811 811 else:
812 812 msg = _(b'became %s') % self._ctx2str(lastcommitted)
813 813 if self.ui.verbose and msg:
814 814 self.ui.write(_(b'%s: %s\n') % (self._ctx2str(ctx), msg))
815 815 self.finalnode = lastcommitted and lastcommitted.node()
816 816
817 817 def _ctx2str(self, ctx):
818 818 if self.ui.debugflag:
819 819 return b'%d:%s' % (ctx.rev(), ctx.hex())
820 820 else:
821 821 return b'%d:%s' % (ctx.rev(), short(ctx.node()))
822 822
823 823 def _getnewfilecontents(self, ctx):
824 824 """(ctx) -> {path: str}
825 825
826 826 fetch file contents from filefixupstates.
827 827 return the working copy overrides - files different from ctx.
828 828 """
829 829 result = {}
830 830 for path in self.paths:
831 831 ctx2fctx = self.fctxmap[path] # {ctx: fctx}
832 832 if ctx not in ctx2fctx:
833 833 continue
834 834 fctx = ctx2fctx[ctx]
835 835 content = fctx.data()
836 836 newcontent = self.fixupmap[path].getfinalcontent(fctx)
837 837 if content != newcontent:
838 838 result[fctx.path()] = newcontent
839 839 return result
840 840
841 841 def _movebookmarks(self, tr):
842 842 repo = self.repo
843 843 needupdate = [
844 844 (name, self.replacemap[hsh])
845 845 for name, hsh in repo._bookmarks.items()
846 846 if hsh in self.replacemap
847 847 ]
848 848 changes = []
849 849 for name, hsh in needupdate:
850 850 if hsh:
851 851 changes.append((name, hsh))
852 852 if self.ui.verbose:
853 853 self.ui.write(
854 854 _(b'moving bookmark %s to %s\n') % (name, hex(hsh))
855 855 )
856 856 else:
857 857 changes.append((name, None))
858 858 if self.ui.verbose:
859 859 self.ui.write(_(b'deleting bookmark %s\n') % name)
860 860 repo._bookmarks.applychanges(repo, tr, changes)
861 861
862 862 def _moveworkingdirectoryparent(self):
863 863 if not self.finalnode:
864 864 # Find the latest not-{obsoleted,stripped} parent.
865 865 revs = self.repo.revs(b'max(::. - %ln)', self.replacemap.keys())
866 866 ctx = self.repo[revs.first()]
867 867 self.finalnode = ctx.node()
868 868 else:
869 869 ctx = self.repo[self.finalnode]
870 870
871 871 dirstate = self.repo.dirstate
872 872 # dirstate.rebuild invalidates fsmonitorstate, causing "hg status" to
873 873 # be slow. in absorb's case, no need to invalidate fsmonitorstate.
874 874 noop = lambda: 0
875 875 restore = noop
876 if util.safehasattr(dirstate, '_fsmonitorstate'):
876 if hasattr(dirstate, '_fsmonitorstate'):
877 877 bak = dirstate._fsmonitorstate.invalidate
878 878
879 879 def restore():
880 880 dirstate._fsmonitorstate.invalidate = bak
881 881
882 882 dirstate._fsmonitorstate.invalidate = noop
883 883 try:
884 884 with dirstate.changing_parents(self.repo):
885 885 dirstate.rebuild(ctx.node(), ctx.manifest(), self.paths)
886 886 finally:
887 887 restore()
888 888
889 889 @staticmethod
890 890 def _willbecomenoop(memworkingcopy, ctx, pctx=None):
891 891 """({path: content}, ctx, ctx) -> bool. test if a commit will be noop
892 892
893 893 if it will become an empty commit (does not change anything, after the
894 894 memworkingcopy overrides), return True. otherwise return False.
895 895 """
896 896 if not pctx:
897 897 parents = ctx.parents()
898 898 if len(parents) != 1:
899 899 return False
900 900 pctx = parents[0]
901 901 if ctx.branch() != pctx.branch():
902 902 return False
903 903 if ctx.extra().get(b'close'):
904 904 return False
905 905 # ctx changes more files (not a subset of memworkingcopy)
906 906 if not set(ctx.files()).issubset(set(memworkingcopy)):
907 907 return False
908 908 for path, content in memworkingcopy.items():
909 909 if path not in pctx or path not in ctx:
910 910 return False
911 911 fctx = ctx[path]
912 912 pfctx = pctx[path]
913 913 if pfctx.flags() != fctx.flags():
914 914 return False
915 915 if pfctx.data() != content:
916 916 return False
917 917 return True
918 918
919 919 def _commitsingle(self, memworkingcopy, ctx, p1=None):
920 920 """(ctx, {path: content}, node) -> node. make a single commit
921 921
922 922 the commit is a clone from ctx, with a (optionally) different p1, and
923 923 different file contents replaced by memworkingcopy.
924 924 """
925 925 parents = p1 and (p1, self.repo.nullid)
926 926 extra = ctx.extra()
927 927 if self._useobsolete and self.ui.configbool(b'absorb', b'add-noise'):
928 928 extra[b'absorb_source'] = ctx.hex()
929 929
930 930 desc = rewriteutil.update_hash_refs(
931 931 ctx.repo(),
932 932 ctx.description(),
933 933 {
934 934 oldnode: [newnode]
935 935 for oldnode, newnode in self.replacemap.items()
936 936 },
937 937 )
938 938 mctx = overlaycontext(
939 939 memworkingcopy, ctx, parents, extra=extra, desc=desc
940 940 )
941 941 return mctx.commit()
942 942
943 943 @util.propertycache
944 944 def _useobsolete(self):
945 945 """() -> bool"""
946 946 return obsolete.isenabled(self.repo, obsolete.createmarkersopt)
947 947
948 948 def _cleanupoldcommits(self):
949 949 replacements = {
950 950 k: ([v] if v is not None else [])
951 951 for k, v in self.replacemap.items()
952 952 }
953 953 if replacements:
954 954 scmutil.cleanupnodes(
955 955 self.repo, replacements, operation=b'absorb', fixphase=True
956 956 )
957 957
958 958 @util.propertycache
959 959 def skip_empty_successor(self):
960 960 return rewriteutil.skip_empty_successor(self.ui, b'absorb')
961 961
962 962
963 963 def _parsechunk(hunk):
964 964 """(crecord.uihunk or patch.recordhunk) -> (path, (a1, a2, [bline]))"""
965 965 if type(hunk) not in (crecord.uihunk, patch.recordhunk):
966 966 return None, None
967 967 path = hunk.header.filename()
968 968 a1 = hunk.fromline + len(hunk.before) - 1
969 969 # remove before and after context
970 970 hunk.before = hunk.after = []
971 971 buf = util.stringio()
972 972 hunk.write(buf)
973 973 patchlines = mdiff.splitnewlines(buf.getvalue())
974 974 # hunk.prettystr() will update hunk.removed
975 975 a2 = a1 + hunk.removed
976 976 blines = [l[1:] for l in patchlines[1:] if not l.startswith(b'-')]
977 977 return path, (a1, a2, blines)
978 978
979 979
980 980 def overlaydiffcontext(ctx, chunks):
981 981 """(ctx, [crecord.uihunk]) -> memctx
982 982
983 983 return a memctx with some [1] patches (chunks) applied to ctx.
984 984 [1]: modifications are handled. renames, mode changes, etc. are ignored.
985 985 """
986 986 # sadly the applying-patch logic is hardly reusable, and messy:
987 987 # 1. the core logic "_applydiff" is too heavy - it writes .rej files, it
988 988 # needs a file stream of a patch and will re-parse it, while we have
989 989 # structured hunk objects at hand.
990 990 # 2. a lot of different implementations about "chunk" (patch.hunk,
991 991 # patch.recordhunk, crecord.uihunk)
992 992 # as we only care about applying changes to modified files, no mode
993 993 # change, no binary diff, and no renames, it's probably okay to
994 994 # re-invent the logic using much simpler code here.
995 995 memworkingcopy = {} # {path: content}
996 996 patchmap = defaultdict(lambda: []) # {path: [(a1, a2, [bline])]}
997 997 for path, info in map(_parsechunk, chunks):
998 998 if not path or not info:
999 999 continue
1000 1000 patchmap[path].append(info)
1001 1001 for path, patches in patchmap.items():
1002 1002 if path not in ctx or not patches:
1003 1003 continue
1004 1004 patches.sort(reverse=True)
1005 1005 lines = mdiff.splitnewlines(ctx[path].data())
1006 1006 for a1, a2, blines in patches:
1007 1007 lines[a1:a2] = blines
1008 1008 memworkingcopy[path] = b''.join(lines)
1009 1009 return overlaycontext(memworkingcopy, ctx)
1010 1010
1011 1011
1012 1012 def absorb(ui, repo, stack=None, targetctx=None, pats=None, **opts):
1013 1013 """pick fixup chunks from targetctx, apply them to stack.
1014 1014
1015 1015 if targetctx is None, the working copy context will be used.
1016 1016 if stack is None, the current draft stack will be used.
1017 1017 return fixupstate.
1018 1018 """
1019 1019 if stack is None:
1020 1020 limit = ui.configint(b'absorb', b'max-stack-size')
1021 1021 headctx = repo[b'.']
1022 1022 if len(headctx.parents()) > 1:
1023 1023 raise error.InputError(_(b'cannot absorb into a merge'))
1024 1024 stack = getdraftstack(headctx, limit)
1025 1025 if limit and len(stack) >= limit:
1026 1026 ui.warn(
1027 1027 _(
1028 1028 b'absorb: only the recent %d changesets will '
1029 1029 b'be analysed\n'
1030 1030 )
1031 1031 % limit
1032 1032 )
1033 1033 if not stack:
1034 1034 raise error.InputError(_(b'no mutable changeset to change'))
1035 1035 if targetctx is None: # default to working copy
1036 1036 targetctx = repo[None]
1037 1037 if pats is None:
1038 1038 pats = ()
1039 1039
1040 1040 state = fixupstate(stack, ui=ui, **opts)
1041 1041 matcher = scmutil.match(targetctx, pats, pycompat.byteskwargs(opts))
1042 1042 if opts.get('interactive'):
1043 1043 diff = patch.diff(repo, stack[-1].node(), targetctx.node(), matcher)
1044 1044 origchunks = patch.parsepatch(diff)
1045 1045 chunks = cmdutil.recordfilter(ui, origchunks, matcher)[0]
1046 1046 targetctx = overlaydiffcontext(stack[-1], chunks)
1047 1047 if opts.get('edit_lines'):
1048 1048 # If we're going to open the editor, don't ask the user to confirm
1049 1049 # first
1050 1050 opts['apply_changes'] = True
1051 1051 fm = None
1052 1052 if opts.get('print_changes') or not opts.get('apply_changes'):
1053 1053 fm = ui.formatter(b'absorb', pycompat.byteskwargs(opts))
1054 1054 state.diffwith(targetctx, matcher, fm)
1055 1055 if fm is not None:
1056 1056 fm.startitem()
1057 1057 fm.write(
1058 1058 b"count", b"\n%d changesets affected\n", len(state.ctxaffected)
1059 1059 )
1060 1060 fm.data(linetype=b'summary')
1061 1061 for ctx in reversed(stack):
1062 1062 if ctx not in state.ctxaffected:
1063 1063 continue
1064 1064 fm.startitem()
1065 1065 fm.context(ctx=ctx)
1066 1066 fm.data(linetype=b'changeset')
1067 1067 fm.write(b'node', b'%-7.7s ', ctx.hex(), label=b'absorb.node')
1068 1068 descfirstline = stringutil.firstline(ctx.description())
1069 1069 fm.write(
1070 1070 b'descfirstline',
1071 1071 b'%s\n',
1072 1072 descfirstline,
1073 1073 label=b'absorb.description',
1074 1074 )
1075 1075 fm.end()
1076 1076 if not opts.get('dry_run'):
1077 1077 if (
1078 1078 not opts.get('apply_changes')
1079 1079 and state.ctxaffected
1080 1080 and ui.promptchoice(
1081 1081 b"apply changes (y/N)? $$ &Yes $$ &No", default=1
1082 1082 )
1083 1083 ):
1084 1084 raise error.CanceledError(_(b'absorb cancelled\n'))
1085 1085
1086 1086 state.apply()
1087 1087 if state.commit():
1088 1088 state.printchunkstats()
1089 1089 elif not ui.quiet:
1090 1090 ui.write(_(b'nothing applied\n'))
1091 1091 return state
1092 1092
1093 1093
1094 1094 @command(
1095 1095 b'absorb',
1096 1096 [
1097 1097 (
1098 1098 b'a',
1099 1099 b'apply-changes',
1100 1100 None,
1101 1101 _(b'apply changes without prompting for confirmation'),
1102 1102 ),
1103 1103 (
1104 1104 b'p',
1105 1105 b'print-changes',
1106 1106 None,
1107 1107 _(b'always print which changesets are modified by which changes'),
1108 1108 ),
1109 1109 (
1110 1110 b'i',
1111 1111 b'interactive',
1112 1112 None,
1113 1113 _(b'interactively select which chunks to apply'),
1114 1114 ),
1115 1115 (
1116 1116 b'e',
1117 1117 b'edit-lines',
1118 1118 None,
1119 1119 _(
1120 1120 b'edit what lines belong to which changesets before commit '
1121 1121 b'(EXPERIMENTAL)'
1122 1122 ),
1123 1123 ),
1124 1124 ]
1125 1125 + commands.dryrunopts
1126 1126 + commands.templateopts
1127 1127 + commands.walkopts,
1128 1128 _(b'hg absorb [OPTION] [FILE]...'),
1129 1129 helpcategory=command.CATEGORY_COMMITTING,
1130 1130 helpbasic=True,
1131 1131 )
1132 1132 def absorbcmd(ui, repo, *pats, **opts):
1133 1133 """incorporate corrections into the stack of draft changesets
1134 1134
1135 1135 absorb analyzes each change in your working directory and attempts to
1136 1136 amend the changed lines into the changesets in your stack that first
1137 1137 introduced those lines.
1138 1138
1139 1139 If absorb cannot find an unambiguous changeset to amend for a change,
1140 1140 that change will be left in the working directory, untouched. They can be
1141 1141 observed by :hg:`status` or :hg:`diff` afterwards. In other words,
1142 1142 absorb does not write to the working directory.
1143 1143
1144 1144 Changesets outside the revset `::. and not public() and not merge()` will
1145 1145 not be changed.
1146 1146
1147 1147 Changesets that become empty after applying the changes will be deleted.
1148 1148
1149 1149 By default, absorb will show what it plans to do and prompt for
1150 1150 confirmation. If you are confident that the changes will be absorbed
1151 1151 to the correct place, run :hg:`absorb -a` to apply the changes
1152 1152 immediately.
1153 1153
1154 1154 Returns 0 on success, 1 if all chunks were ignored and nothing amended.
1155 1155 """
1156 1156 with repo.wlock(), repo.lock():
1157 1157 if not opts['dry_run']:
1158 1158 cmdutil.checkunfinished(repo)
1159 1159
1160 1160 state = absorb(ui, repo, pats=pats, **opts)
1161 1161 if sum(s[0] for s in state.chunkstats.values()) == 0:
1162 1162 return 1
@@ -1,1251 +1,1251 b''
1 1 # bugzilla.py - bugzilla integration for mercurial
2 2 #
3 3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
4 4 # Copyright 2011-4 Jim Hague <jim.hague@acm.org>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''hooks for integrating with the Bugzilla bug tracker
10 10
11 11 This hook extension adds comments on bugs in Bugzilla when changesets
12 12 that refer to bugs by Bugzilla ID are seen. The comment is formatted using
13 13 the Mercurial template mechanism.
14 14
15 15 The bug references can optionally include an update for Bugzilla of the
16 16 hours spent working on the bug. Bugs can also be marked fixed.
17 17
18 18 Four basic modes of access to Bugzilla are provided:
19 19
20 20 1. Access via the Bugzilla REST-API. Requires bugzilla 5.0 or later.
21 21
22 22 2. Access via the Bugzilla XMLRPC interface. Requires Bugzilla 3.4 or later.
23 23
24 24 3. Check data via the Bugzilla XMLRPC interface and submit bug change
25 25 via email to Bugzilla email interface. Requires Bugzilla 3.4 or later.
26 26
27 27 4. Writing directly to the Bugzilla database. Only Bugzilla installations
28 28 using MySQL are supported. Requires Python MySQLdb.
29 29
30 30 Writing directly to the database is susceptible to schema changes, and
31 31 relies on a Bugzilla contrib script to send out bug change
32 32 notification emails. This script runs as the user running Mercurial,
33 33 must be run on the host with the Bugzilla install, and requires
34 34 permission to read Bugzilla configuration details and the necessary
35 35 MySQL user and password to have full access rights to the Bugzilla
36 36 database. For these reasons this access mode is now considered
37 37 deprecated, and will not be updated for new Bugzilla versions going
38 38 forward. Only adding comments is supported in this access mode.
39 39
40 40 Access via XMLRPC needs a Bugzilla username and password to be specified
41 41 in the configuration. Comments are added under that username. Since the
42 42 configuration must be readable by all Mercurial users, it is recommended
43 43 that the rights of that user are restricted in Bugzilla to the minimum
44 44 necessary to add comments. Marking bugs fixed requires Bugzilla 4.0 and later.
45 45
46 46 Access via XMLRPC/email uses XMLRPC to query Bugzilla, but sends
47 47 email to the Bugzilla email interface to submit comments to bugs.
48 48 The From: address in the email is set to the email address of the Mercurial
49 49 user, so the comment appears to come from the Mercurial user. In the event
50 50 that the Mercurial user email is not recognized by Bugzilla as a Bugzilla
51 51 user, the email associated with the Bugzilla username used to log into
52 52 Bugzilla is used instead as the source of the comment. Marking bugs fixed
53 53 works on all supported Bugzilla versions.
54 54
55 55 Access via the REST-API needs either a Bugzilla username and password
56 56 or an apikey specified in the configuration. Comments are made under
57 57 the given username or the user associated with the apikey in Bugzilla.
58 58
59 59 Configuration items common to all access modes:
60 60
61 61 bugzilla.version
62 62 The access type to use. Values recognized are:
63 63
64 64 :``restapi``: Bugzilla REST-API, Bugzilla 5.0 and later.
65 65 :``xmlrpc``: Bugzilla XMLRPC interface.
66 66 :``xmlrpc+email``: Bugzilla XMLRPC and email interfaces.
67 67 :``3.0``: MySQL access, Bugzilla 3.0 and later.
68 68 :``2.18``: MySQL access, Bugzilla 2.18 and up to but not
69 69 including 3.0.
70 70 :``2.16``: MySQL access, Bugzilla 2.16 and up to but not
71 71 including 2.18.
72 72
73 73 bugzilla.regexp
74 74 Regular expression to match bug IDs for update in changeset commit message.
75 75 It must contain one "()" named group ``<ids>`` containing the bug
76 76 IDs separated by non-digit characters. It may also contain
77 77 a named group ``<hours>`` with a floating-point number giving the
78 78 hours worked on the bug. If no named groups are present, the first
79 79 "()" group is assumed to contain the bug IDs, and work time is not
80 80 updated. The default expression matches ``Bug 1234``, ``Bug no. 1234``,
81 81 ``Bug number 1234``, ``Bugs 1234,5678``, ``Bug 1234 and 5678`` and
82 82 variations thereof, followed by an hours number prefixed by ``h`` or
83 83 ``hours``, e.g. ``hours 1.5``. Matching is case insensitive.
84 84
85 85 bugzilla.fixregexp
86 86 Regular expression to match bug IDs for marking fixed in changeset
87 87 commit message. This must contain a "()" named group ``<ids>` containing
88 88 the bug IDs separated by non-digit characters. It may also contain
89 89 a named group ``<hours>`` with a floating-point number giving the
90 90 hours worked on the bug. If no named groups are present, the first
91 91 "()" group is assumed to contain the bug IDs, and work time is not
92 92 updated. The default expression matches ``Fixes 1234``, ``Fixes bug 1234``,
93 93 ``Fixes bugs 1234,5678``, ``Fixes 1234 and 5678`` and
94 94 variations thereof, followed by an hours number prefixed by ``h`` or
95 95 ``hours``, e.g. ``hours 1.5``. Matching is case insensitive.
96 96
97 97 bugzilla.fixstatus
98 98 The status to set a bug to when marking fixed. Default ``RESOLVED``.
99 99
100 100 bugzilla.fixresolution
101 101 The resolution to set a bug to when marking fixed. Default ``FIXED``.
102 102
103 103 bugzilla.style
104 104 The style file to use when formatting comments.
105 105
106 106 bugzilla.template
107 107 Template to use when formatting comments. Overrides style if
108 108 specified. In addition to the usual Mercurial keywords, the
109 109 extension specifies:
110 110
111 111 :``{bug}``: The Bugzilla bug ID.
112 112 :``{root}``: The full pathname of the Mercurial repository.
113 113 :``{webroot}``: Stripped pathname of the Mercurial repository.
114 114 :``{hgweb}``: Base URL for browsing Mercurial repositories.
115 115
116 116 Default ``changeset {node|short} in repo {root} refers to bug
117 117 {bug}.\\ndetails:\\n\\t{desc|tabindent}``
118 118
119 119 bugzilla.strip
120 120 The number of path separator characters to strip from the front of
121 121 the Mercurial repository path (``{root}`` in templates) to produce
122 122 ``{webroot}``. For example, a repository with ``{root}``
123 123 ``/var/local/my-project`` with a strip of 2 gives a value for
124 124 ``{webroot}`` of ``my-project``. Default 0.
125 125
126 126 web.baseurl
127 127 Base URL for browsing Mercurial repositories. Referenced from
128 128 templates as ``{hgweb}``.
129 129
130 130 Configuration items common to XMLRPC+email and MySQL access modes:
131 131
132 132 bugzilla.usermap
133 133 Path of file containing Mercurial committer email to Bugzilla user email
134 134 mappings. If specified, the file should contain one mapping per
135 135 line::
136 136
137 137 committer = Bugzilla user
138 138
139 139 See also the ``[usermap]`` section.
140 140
141 141 The ``[usermap]`` section is used to specify mappings of Mercurial
142 142 committer email to Bugzilla user email. See also ``bugzilla.usermap``.
143 143 Contains entries of the form ``committer = Bugzilla user``.
144 144
145 145 XMLRPC and REST-API access mode configuration:
146 146
147 147 bugzilla.bzurl
148 148 The base URL for the Bugzilla installation.
149 149 Default ``http://localhost/bugzilla``.
150 150
151 151 bugzilla.user
152 152 The username to use to log into Bugzilla via XMLRPC. Default
153 153 ``bugs``.
154 154
155 155 bugzilla.password
156 156 The password for Bugzilla login.
157 157
158 158 REST-API access mode uses the options listed above as well as:
159 159
160 160 bugzilla.apikey
161 161 An apikey generated on the Bugzilla instance for api access.
162 162 Using an apikey removes the need to store the user and password
163 163 options.
164 164
165 165 XMLRPC+email access mode uses the XMLRPC access mode configuration items,
166 166 and also:
167 167
168 168 bugzilla.bzemail
169 169 The Bugzilla email address.
170 170
171 171 In addition, the Mercurial email settings must be configured. See the
172 172 documentation in hgrc(5), sections ``[email]`` and ``[smtp]``.
173 173
174 174 MySQL access mode configuration:
175 175
176 176 bugzilla.host
177 177 Hostname of the MySQL server holding the Bugzilla database.
178 178 Default ``localhost``.
179 179
180 180 bugzilla.db
181 181 Name of the Bugzilla database in MySQL. Default ``bugs``.
182 182
183 183 bugzilla.user
184 184 Username to use to access MySQL server. Default ``bugs``.
185 185
186 186 bugzilla.password
187 187 Password to use to access MySQL server.
188 188
189 189 bugzilla.timeout
190 190 Database connection timeout (seconds). Default 5.
191 191
192 192 bugzilla.bzuser
193 193 Fallback Bugzilla user name to record comments with, if changeset
194 194 committer cannot be found as a Bugzilla user.
195 195
196 196 bugzilla.bzdir
197 197 Bugzilla install directory. Used by default notify. Default
198 198 ``/var/www/html/bugzilla``.
199 199
200 200 bugzilla.notify
201 201 The command to run to get Bugzilla to send bug change notification
202 202 emails. Substitutes from a map with 3 keys, ``bzdir``, ``id`` (bug
203 203 id) and ``user`` (committer bugzilla email). Default depends on
204 204 version; from 2.18 it is "cd %(bzdir)s && perl -T
205 205 contrib/sendbugmail.pl %(id)s %(user)s".
206 206
207 207 Activating the extension::
208 208
209 209 [extensions]
210 210 bugzilla =
211 211
212 212 [hooks]
213 213 # run bugzilla hook on every change pulled or pushed in here
214 214 incoming.bugzilla = python:hgext.bugzilla.hook
215 215
216 216 Example configurations:
217 217
218 218 XMLRPC example configuration. This uses the Bugzilla at
219 219 ``http://my-project.org/bugzilla``, logging in as user
220 220 ``bugmail@my-project.org`` with password ``plugh``. It is used with a
221 221 collection of Mercurial repositories in ``/var/local/hg/repos/``,
222 222 with a web interface at ``http://my-project.org/hg``. ::
223 223
224 224 [bugzilla]
225 225 bzurl=http://my-project.org/bugzilla
226 226 user=bugmail@my-project.org
227 227 password=plugh
228 228 version=xmlrpc
229 229 template=Changeset {node|short} in {root|basename}.
230 230 {hgweb}/{webroot}/rev/{node|short}\\n
231 231 {desc}\\n
232 232 strip=5
233 233
234 234 [web]
235 235 baseurl=http://my-project.org/hg
236 236
237 237 XMLRPC+email example configuration. This uses the Bugzilla at
238 238 ``http://my-project.org/bugzilla``, logging in as user
239 239 ``bugmail@my-project.org`` with password ``plugh``. It is used with a
240 240 collection of Mercurial repositories in ``/var/local/hg/repos/``,
241 241 with a web interface at ``http://my-project.org/hg``. Bug comments
242 242 are sent to the Bugzilla email address
243 243 ``bugzilla@my-project.org``. ::
244 244
245 245 [bugzilla]
246 246 bzurl=http://my-project.org/bugzilla
247 247 user=bugmail@my-project.org
248 248 password=plugh
249 249 version=xmlrpc+email
250 250 bzemail=bugzilla@my-project.org
251 251 template=Changeset {node|short} in {root|basename}.
252 252 {hgweb}/{webroot}/rev/{node|short}\\n
253 253 {desc}\\n
254 254 strip=5
255 255
256 256 [web]
257 257 baseurl=http://my-project.org/hg
258 258
259 259 [usermap]
260 260 user@emaildomain.com=user.name@bugzilladomain.com
261 261
262 262 MySQL example configuration. This has a local Bugzilla 3.2 installation
263 263 in ``/opt/bugzilla-3.2``. The MySQL database is on ``localhost``,
264 264 the Bugzilla database name is ``bugs`` and MySQL is
265 265 accessed with MySQL username ``bugs`` password ``XYZZY``. It is used
266 266 with a collection of Mercurial repositories in ``/var/local/hg/repos/``,
267 267 with a web interface at ``http://my-project.org/hg``. ::
268 268
269 269 [bugzilla]
270 270 host=localhost
271 271 password=XYZZY
272 272 version=3.0
273 273 bzuser=unknown@domain.com
274 274 bzdir=/opt/bugzilla-3.2
275 275 template=Changeset {node|short} in {root|basename}.
276 276 {hgweb}/{webroot}/rev/{node|short}\\n
277 277 {desc}\\n
278 278 strip=5
279 279
280 280 [web]
281 281 baseurl=http://my-project.org/hg
282 282
283 283 [usermap]
284 284 user@emaildomain.com=user.name@bugzilladomain.com
285 285
286 286 All the above add a comment to the Bugzilla bug record of the form::
287 287
288 288 Changeset 3b16791d6642 in repository-name.
289 289 http://my-project.org/hg/repository-name/rev/3b16791d6642
290 290
291 291 Changeset commit comment. Bug 1234.
292 292 '''
293 293
294 294
295 295 import json
296 296 import re
297 297 import time
298 298
299 299 from mercurial.i18n import _
300 300 from mercurial.node import short
301 301 from mercurial import (
302 302 error,
303 303 logcmdutil,
304 304 mail,
305 305 pycompat,
306 306 registrar,
307 307 url,
308 308 util,
309 309 )
310 310 from mercurial.utils import (
311 311 procutil,
312 312 stringutil,
313 313 )
314 314
315 315 xmlrpclib = util.xmlrpclib
316 316
317 317 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
318 318 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
319 319 # be specifying the version(s) of Mercurial they are tested with, or
320 320 # leave the attribute unspecified.
321 321 testedwith = b'ships-with-hg-core'
322 322
323 323 configtable = {}
324 324 configitem = registrar.configitem(configtable)
325 325
326 326 configitem(
327 327 b'bugzilla',
328 328 b'apikey',
329 329 default=b'',
330 330 )
331 331 configitem(
332 332 b'bugzilla',
333 333 b'bzdir',
334 334 default=b'/var/www/html/bugzilla',
335 335 )
336 336 configitem(
337 337 b'bugzilla',
338 338 b'bzemail',
339 339 default=None,
340 340 )
341 341 configitem(
342 342 b'bugzilla',
343 343 b'bzurl',
344 344 default=b'http://localhost/bugzilla/',
345 345 )
346 346 configitem(
347 347 b'bugzilla',
348 348 b'bzuser',
349 349 default=None,
350 350 )
351 351 configitem(
352 352 b'bugzilla',
353 353 b'db',
354 354 default=b'bugs',
355 355 )
356 356 configitem(
357 357 b'bugzilla',
358 358 b'fixregexp',
359 359 default=(
360 360 br'fix(?:es)?\s*(?:bugs?\s*)?,?\s*'
361 361 br'(?:nos?\.?|num(?:ber)?s?)?\s*'
362 362 br'(?P<ids>(?:#?\d+\s*(?:,?\s*(?:and)?)?\s*)+)'
363 363 br'\.?\s*(?:h(?:ours?)?\s*(?P<hours>\d*(?:\.\d+)?))?'
364 364 ),
365 365 )
366 366 configitem(
367 367 b'bugzilla',
368 368 b'fixresolution',
369 369 default=b'FIXED',
370 370 )
371 371 configitem(
372 372 b'bugzilla',
373 373 b'fixstatus',
374 374 default=b'RESOLVED',
375 375 )
376 376 configitem(
377 377 b'bugzilla',
378 378 b'host',
379 379 default=b'localhost',
380 380 )
381 381 configitem(
382 382 b'bugzilla',
383 383 b'notify',
384 384 default=configitem.dynamicdefault,
385 385 )
386 386 configitem(
387 387 b'bugzilla',
388 388 b'password',
389 389 default=None,
390 390 )
391 391 configitem(
392 392 b'bugzilla',
393 393 b'regexp',
394 394 default=(
395 395 br'bugs?\s*,?\s*(?:#|nos?\.?|num(?:ber)?s?)?\s*'
396 396 br'(?P<ids>(?:\d+\s*(?:,?\s*(?:and)?)?\s*)+)'
397 397 br'\.?\s*(?:h(?:ours?)?\s*(?P<hours>\d*(?:\.\d+)?))?'
398 398 ),
399 399 )
400 400 configitem(
401 401 b'bugzilla',
402 402 b'strip',
403 403 default=0,
404 404 )
405 405 configitem(
406 406 b'bugzilla',
407 407 b'style',
408 408 default=None,
409 409 )
410 410 configitem(
411 411 b'bugzilla',
412 412 b'template',
413 413 default=None,
414 414 )
415 415 configitem(
416 416 b'bugzilla',
417 417 b'timeout',
418 418 default=5,
419 419 )
420 420 configitem(
421 421 b'bugzilla',
422 422 b'user',
423 423 default=b'bugs',
424 424 )
425 425 configitem(
426 426 b'bugzilla',
427 427 b'usermap',
428 428 default=None,
429 429 )
430 430 configitem(
431 431 b'bugzilla',
432 432 b'version',
433 433 default=None,
434 434 )
435 435
436 436
437 437 class bzaccess:
438 438 '''Base class for access to Bugzilla.'''
439 439
440 440 def __init__(self, ui):
441 441 self.ui = ui
442 442 usermap = self.ui.config(b'bugzilla', b'usermap')
443 443 if usermap:
444 444 self.ui.readconfig(usermap, sections=[b'usermap'])
445 445
446 446 def map_committer(self, user):
447 447 '''map name of committer to Bugzilla user name.'''
448 448 for committer, bzuser in self.ui.configitems(b'usermap'):
449 449 if committer.lower() == user.lower():
450 450 return bzuser
451 451 return user
452 452
453 453 # Methods to be implemented by access classes.
454 454 #
455 455 # 'bugs' is a dict keyed on bug id, where values are a dict holding
456 456 # updates to bug state. Recognized dict keys are:
457 457 #
458 458 # 'hours': Value, float containing work hours to be updated.
459 459 # 'fix': If key present, bug is to be marked fixed. Value ignored.
460 460
461 461 def filter_real_bug_ids(self, bugs):
462 462 '''remove bug IDs that do not exist in Bugzilla from bugs.'''
463 463
464 464 def filter_cset_known_bug_ids(self, node, bugs):
465 465 '''remove bug IDs where node occurs in comment text from bugs.'''
466 466
467 467 def updatebug(self, bugid, newstate, text, committer):
468 468 """update the specified bug. Add comment text and set new states.
469 469
470 470 If possible add the comment as being from the committer of
471 471 the changeset. Otherwise use the default Bugzilla user.
472 472 """
473 473
474 474 def notify(self, bugs, committer):
475 475 """Force sending of Bugzilla notification emails.
476 476
477 477 Only required if the access method does not trigger notification
478 478 emails automatically.
479 479 """
480 480
481 481
482 482 # Bugzilla via direct access to MySQL database.
483 483 class bzmysql(bzaccess):
484 484 """Support for direct MySQL access to Bugzilla.
485 485
486 486 The earliest Bugzilla version this is tested with is version 2.16.
487 487
488 488 If your Bugzilla is version 3.4 or above, you are strongly
489 489 recommended to use the XMLRPC access method instead.
490 490 """
491 491
492 492 @staticmethod
493 493 def sql_buglist(ids):
494 494 '''return SQL-friendly list of bug ids'''
495 495 return b'(' + b','.join(map(str, ids)) + b')'
496 496
497 497 _MySQLdb = None
498 498
499 499 def __init__(self, ui):
500 500 try:
501 501 import MySQLdb as mysql
502 502
503 503 bzmysql._MySQLdb = mysql
504 504 except ImportError as err:
505 505 raise error.Abort(
506 506 _(b'python mysql support not available: %s') % err
507 507 )
508 508
509 509 bzaccess.__init__(self, ui)
510 510
511 511 host = self.ui.config(b'bugzilla', b'host')
512 512 user = self.ui.config(b'bugzilla', b'user')
513 513 passwd = self.ui.config(b'bugzilla', b'password')
514 514 db = self.ui.config(b'bugzilla', b'db')
515 515 timeout = int(self.ui.config(b'bugzilla', b'timeout'))
516 516 self.ui.note(
517 517 _(b'connecting to %s:%s as %s, password %s\n')
518 518 % (host, db, user, b'*' * len(passwd))
519 519 )
520 520 self.conn = bzmysql._MySQLdb.connect(
521 521 host=host, user=user, passwd=passwd, db=db, connect_timeout=timeout
522 522 )
523 523 self.cursor = self.conn.cursor()
524 524 self.longdesc_id = self.get_longdesc_id()
525 525 self.user_ids = {}
526 526 self.default_notify = b"cd %(bzdir)s && ./processmail %(id)s %(user)s"
527 527
528 528 def run(self, *args, **kwargs):
529 529 '''run a query.'''
530 530 self.ui.note(_(b'query: %s %s\n') % (args, kwargs))
531 531 try:
532 532 self.cursor.execute(*args, **kwargs)
533 533 except bzmysql._MySQLdb.MySQLError:
534 534 self.ui.note(_(b'failed query: %s %s\n') % (args, kwargs))
535 535 raise
536 536
537 537 def get_longdesc_id(self):
538 538 '''get identity of longdesc field'''
539 539 self.run(b'select fieldid from fielddefs where name = "longdesc"')
540 540 ids = self.cursor.fetchall()
541 541 if len(ids) != 1:
542 542 raise error.Abort(_(b'unknown database schema'))
543 543 return ids[0][0]
544 544
545 545 def filter_real_bug_ids(self, bugs):
546 546 '''filter not-existing bugs from set.'''
547 547 self.run(
548 548 b'select bug_id from bugs where bug_id in %s'
549 549 % bzmysql.sql_buglist(bugs.keys())
550 550 )
551 551 existing = [id for (id,) in self.cursor.fetchall()]
552 552 for id in bugs.keys():
553 553 if id not in existing:
554 554 self.ui.status(_(b'bug %d does not exist\n') % id)
555 555 del bugs[id]
556 556
557 557 def filter_cset_known_bug_ids(self, node, bugs):
558 558 '''filter bug ids that already refer to this changeset from set.'''
559 559 self.run(
560 560 '''select bug_id from longdescs where
561 561 bug_id in %s and thetext like "%%%s%%"'''
562 562 % (bzmysql.sql_buglist(bugs.keys()), short(node))
563 563 )
564 564 for (id,) in self.cursor.fetchall():
565 565 self.ui.status(
566 566 _(b'bug %d already knows about changeset %s\n')
567 567 % (id, short(node))
568 568 )
569 569 del bugs[id]
570 570
571 571 def notify(self, bugs, committer):
572 572 '''tell bugzilla to send mail.'''
573 573 self.ui.status(_(b'telling bugzilla to send mail:\n'))
574 574 (user, userid) = self.get_bugzilla_user(committer)
575 575 for id in bugs.keys():
576 576 self.ui.status(_(b' bug %s\n') % id)
577 577 cmdfmt = self.ui.config(b'bugzilla', b'notify', self.default_notify)
578 578 bzdir = self.ui.config(b'bugzilla', b'bzdir')
579 579 try:
580 580 # Backwards-compatible with old notify string, which
581 581 # took one string. This will throw with a new format
582 582 # string.
583 583 cmd = cmdfmt % id
584 584 except TypeError:
585 585 cmd = cmdfmt % {b'bzdir': bzdir, b'id': id, b'user': user}
586 586 self.ui.note(_(b'running notify command %s\n') % cmd)
587 587 fp = procutil.popen(b'(%s) 2>&1' % cmd, b'rb')
588 588 out = util.fromnativeeol(fp.read())
589 589 ret = fp.close()
590 590 if ret:
591 591 self.ui.warn(out)
592 592 raise error.Abort(
593 593 _(b'bugzilla notify command %s') % procutil.explainexit(ret)
594 594 )
595 595 self.ui.status(_(b'done\n'))
596 596
597 597 def get_user_id(self, user):
598 598 '''look up numeric bugzilla user id.'''
599 599 try:
600 600 return self.user_ids[user]
601 601 except KeyError:
602 602 try:
603 603 userid = int(user)
604 604 except ValueError:
605 605 self.ui.note(_(b'looking up user %s\n') % user)
606 606 self.run(
607 607 '''select userid from profiles
608 608 where login_name like %s''',
609 609 user,
610 610 )
611 611 all = self.cursor.fetchall()
612 612 if len(all) != 1:
613 613 raise KeyError(user)
614 614 userid = int(all[0][0])
615 615 self.user_ids[user] = userid
616 616 return userid
617 617
618 618 def get_bugzilla_user(self, committer):
619 619 """See if committer is a registered bugzilla user. Return
620 620 bugzilla username and userid if so. If not, return default
621 621 bugzilla username and userid."""
622 622 user = self.map_committer(committer)
623 623 try:
624 624 userid = self.get_user_id(user)
625 625 except KeyError:
626 626 try:
627 627 defaultuser = self.ui.config(b'bugzilla', b'bzuser')
628 628 if not defaultuser:
629 629 raise error.Abort(
630 630 _(b'cannot find bugzilla user id for %s') % user
631 631 )
632 632 userid = self.get_user_id(defaultuser)
633 633 user = defaultuser
634 634 except KeyError:
635 635 raise error.Abort(
636 636 _(b'cannot find bugzilla user id for %s or %s')
637 637 % (user, defaultuser)
638 638 )
639 639 return (user, userid)
640 640
641 641 def updatebug(self, bugid, newstate, text, committer):
642 642 """update bug state with comment text.
643 643
644 644 Try adding comment as committer of changeset, otherwise as
645 645 default bugzilla user."""
646 646 if len(newstate) > 0:
647 647 self.ui.warn(_(b"Bugzilla/MySQL cannot update bug state\n"))
648 648
649 649 (user, userid) = self.get_bugzilla_user(committer)
650 650 now = time.strftime('%Y-%m-%d %H:%M:%S')
651 651 self.run(
652 652 '''insert into longdescs
653 653 (bug_id, who, bug_when, thetext)
654 654 values (%s, %s, %s, %s)''',
655 655 (bugid, userid, now, text),
656 656 )
657 657 self.run(
658 658 '''insert into bugs_activity (bug_id, who, bug_when, fieldid)
659 659 values (%s, %s, %s, %s)''',
660 660 (bugid, userid, now, self.longdesc_id),
661 661 )
662 662 self.conn.commit()
663 663
664 664
665 665 class bzmysql_2_18(bzmysql):
666 666 '''support for bugzilla 2.18 series.'''
667 667
668 668 def __init__(self, ui):
669 669 bzmysql.__init__(self, ui)
670 670 self.default_notify = (
671 671 b"cd %(bzdir)s && perl -T contrib/sendbugmail.pl %(id)s %(user)s"
672 672 )
673 673
674 674
675 675 class bzmysql_3_0(bzmysql_2_18):
676 676 '''support for bugzilla 3.0 series.'''
677 677
678 678 def __init__(self, ui):
679 679 bzmysql_2_18.__init__(self, ui)
680 680
681 681 def get_longdesc_id(self):
682 682 '''get identity of longdesc field'''
683 683 self.run(b'select id from fielddefs where name = "longdesc"')
684 684 ids = self.cursor.fetchall()
685 685 if len(ids) != 1:
686 686 raise error.Abort(_(b'unknown database schema'))
687 687 return ids[0][0]
688 688
689 689
690 690 # Bugzilla via XMLRPC interface.
691 691
692 692
693 693 class cookietransportrequest:
694 694 """A Transport request method that retains cookies over its lifetime.
695 695
696 696 The regular xmlrpclib transports ignore cookies. Which causes
697 697 a bit of a problem when you need a cookie-based login, as with
698 698 the Bugzilla XMLRPC interface prior to 4.4.3.
699 699
700 700 So this is a helper for defining a Transport which looks for
701 701 cookies being set in responses and saves them to add to all future
702 702 requests.
703 703 """
704 704
705 705 # Inspiration drawn from
706 706 # http://blog.godson.in/2010/09/how-to-make-python-xmlrpclib-client.html
707 707 # http://www.itkovian.net/base/transport-class-for-pythons-xml-rpc-lib/
708 708
709 709 cookies = []
710 710
711 711 def send_cookies(self, connection):
712 712 if self.cookies:
713 713 for cookie in self.cookies:
714 714 connection.putheader(b"Cookie", cookie)
715 715
716 716 def request(self, host, handler, request_body, verbose=0):
717 717 self.verbose = verbose
718 718 self.accept_gzip_encoding = False
719 719
720 720 # issue XML-RPC request
721 721 h = self.make_connection(host)
722 722 if verbose:
723 723 h.set_debuglevel(1)
724 724
725 725 self.send_request(h, handler, request_body)
726 726 self.send_host(h, host)
727 727 self.send_cookies(h)
728 728 self.send_user_agent(h)
729 729 self.send_content(h, request_body)
730 730
731 731 # Deal with differences between Python 2.6 and 2.7.
732 732 # In the former h is a HTTP(S). In the latter it's a
733 733 # HTTP(S)Connection. Luckily, the 2.6 implementation of
734 734 # HTTP(S) has an underlying HTTP(S)Connection, so extract
735 735 # that and use it.
736 736 try:
737 737 response = h.getresponse()
738 738 except AttributeError:
739 739 response = h._conn.getresponse()
740 740
741 741 # Add any cookie definitions to our list.
742 742 for header in response.msg.getallmatchingheaders(b"Set-Cookie"):
743 743 val = header.split(b": ", 1)[1]
744 744 cookie = val.split(b";", 1)[0]
745 745 self.cookies.append(cookie)
746 746
747 747 if response.status != 200:
748 748 raise xmlrpclib.ProtocolError(
749 749 host + handler,
750 750 response.status,
751 751 response.reason,
752 752 response.msg.headers,
753 753 )
754 754
755 755 payload = response.read()
756 756 parser, unmarshaller = self.getparser()
757 757 parser.feed(payload)
758 758 parser.close()
759 759
760 760 return unmarshaller.close()
761 761
762 762
763 763 # The explicit calls to the underlying xmlrpclib __init__() methods are
764 764 # necessary. The xmlrpclib.Transport classes are old-style classes, and
765 765 # it turns out their __init__() doesn't get called when doing multiple
766 766 # inheritance with a new-style class.
767 767 class cookietransport(cookietransportrequest, xmlrpclib.Transport):
768 768 def __init__(self, use_datetime=0):
769 if util.safehasattr(xmlrpclib.Transport, "__init__"):
769 if hasattr(xmlrpclib.Transport, "__init__"):
770 770 xmlrpclib.Transport.__init__(self, use_datetime)
771 771
772 772
773 773 class cookiesafetransport(cookietransportrequest, xmlrpclib.SafeTransport):
774 774 def __init__(self, use_datetime=0):
775 if util.safehasattr(xmlrpclib.Transport, "__init__"):
775 if hasattr(xmlrpclib.Transport, "__init__"):
776 776 xmlrpclib.SafeTransport.__init__(self, use_datetime)
777 777
778 778
779 779 class bzxmlrpc(bzaccess):
780 780 """Support for access to Bugzilla via the Bugzilla XMLRPC API.
781 781
782 782 Requires a minimum Bugzilla version 3.4.
783 783 """
784 784
785 785 def __init__(self, ui):
786 786 bzaccess.__init__(self, ui)
787 787
788 788 bzweb = self.ui.config(b'bugzilla', b'bzurl')
789 789 bzweb = bzweb.rstrip(b"/") + b"/xmlrpc.cgi"
790 790
791 791 user = self.ui.config(b'bugzilla', b'user')
792 792 passwd = self.ui.config(b'bugzilla', b'password')
793 793
794 794 self.fixstatus = self.ui.config(b'bugzilla', b'fixstatus')
795 795 self.fixresolution = self.ui.config(b'bugzilla', b'fixresolution')
796 796
797 797 self.bzproxy = xmlrpclib.ServerProxy(
798 798 pycompat.strurl(bzweb), self.transport(bzweb)
799 799 )
800 800 ver = self.bzproxy.Bugzilla.version()[b'version'].split(b'.')
801 801 self.bzvermajor = int(ver[0])
802 802 self.bzverminor = int(ver[1])
803 803 login = self.bzproxy.User.login(
804 804 {b'login': user, b'password': passwd, b'restrict_login': True}
805 805 )
806 806 self.bztoken = login.get(b'token', b'')
807 807
808 808 def transport(self, uri):
809 809 if util.urlreq.urlparse(uri, b"http")[0] == b"https":
810 810 return cookiesafetransport()
811 811 else:
812 812 return cookietransport()
813 813
814 814 def get_bug_comments(self, id):
815 815 """Return a string with all comment text for a bug."""
816 816 c = self.bzproxy.Bug.comments(
817 817 {b'ids': [id], b'include_fields': [b'text'], b'token': self.bztoken}
818 818 )
819 819 return b''.join(
820 820 [t[b'text'] for t in c[b'bugs'][b'%d' % id][b'comments']]
821 821 )
822 822
823 823 def filter_real_bug_ids(self, bugs):
824 824 probe = self.bzproxy.Bug.get(
825 825 {
826 826 b'ids': sorted(bugs.keys()),
827 827 b'include_fields': [],
828 828 b'permissive': True,
829 829 b'token': self.bztoken,
830 830 }
831 831 )
832 832 for badbug in probe[b'faults']:
833 833 id = badbug[b'id']
834 834 self.ui.status(_(b'bug %d does not exist\n') % id)
835 835 del bugs[id]
836 836
837 837 def filter_cset_known_bug_ids(self, node, bugs):
838 838 for id in sorted(bugs.keys()):
839 839 if self.get_bug_comments(id).find(short(node)) != -1:
840 840 self.ui.status(
841 841 _(b'bug %d already knows about changeset %s\n')
842 842 % (id, short(node))
843 843 )
844 844 del bugs[id]
845 845
846 846 def updatebug(self, bugid, newstate, text, committer):
847 847 args = {}
848 848 if b'hours' in newstate:
849 849 args[b'work_time'] = newstate[b'hours']
850 850
851 851 if self.bzvermajor >= 4:
852 852 args[b'ids'] = [bugid]
853 853 args[b'comment'] = {b'body': text}
854 854 if b'fix' in newstate:
855 855 args[b'status'] = self.fixstatus
856 856 args[b'resolution'] = self.fixresolution
857 857 args[b'token'] = self.bztoken
858 858 self.bzproxy.Bug.update(args)
859 859 else:
860 860 if b'fix' in newstate:
861 861 self.ui.warn(
862 862 _(
863 863 b"Bugzilla/XMLRPC needs Bugzilla 4.0 or later "
864 864 b"to mark bugs fixed\n"
865 865 )
866 866 )
867 867 args[b'id'] = bugid
868 868 args[b'comment'] = text
869 869 self.bzproxy.Bug.add_comment(args)
870 870
871 871
872 872 class bzxmlrpcemail(bzxmlrpc):
873 873 """Read data from Bugzilla via XMLRPC, send updates via email.
874 874
875 875 Advantages of sending updates via email:
876 876 1. Comments can be added as any user, not just logged in user.
877 877 2. Bug statuses or other fields not accessible via XMLRPC can
878 878 potentially be updated.
879 879
880 880 There is no XMLRPC function to change bug status before Bugzilla
881 881 4.0, so bugs cannot be marked fixed via XMLRPC before Bugzilla 4.0.
882 882 But bugs can be marked fixed via email from 3.4 onwards.
883 883 """
884 884
885 885 # The email interface changes subtly between 3.4 and 3.6. In 3.4,
886 886 # in-email fields are specified as '@<fieldname> = <value>'. In
887 887 # 3.6 this becomes '@<fieldname> <value>'. And fieldname @bug_id
888 888 # in 3.4 becomes @id in 3.6. 3.6 and 4.0 both maintain backwards
889 889 # compatibility, but rather than rely on this use the new format for
890 890 # 4.0 onwards.
891 891
892 892 def __init__(self, ui):
893 893 bzxmlrpc.__init__(self, ui)
894 894
895 895 self.bzemail = self.ui.config(b'bugzilla', b'bzemail')
896 896 if not self.bzemail:
897 897 raise error.Abort(_(b"configuration 'bzemail' missing"))
898 898 mail.validateconfig(self.ui)
899 899
900 900 def makecommandline(self, fieldname, value):
901 901 if self.bzvermajor >= 4:
902 902 return b"@%s %s" % (fieldname, pycompat.bytestr(value))
903 903 else:
904 904 if fieldname == b"id":
905 905 fieldname = b"bug_id"
906 906 return b"@%s = %s" % (fieldname, pycompat.bytestr(value))
907 907
908 908 def send_bug_modify_email(self, bugid, commands, comment, committer):
909 909 """send modification message to Bugzilla bug via email.
910 910
911 911 The message format is documented in the Bugzilla email_in.pl
912 912 specification. commands is a list of command lines, comment is the
913 913 comment text.
914 914
915 915 To stop users from crafting commit comments with
916 916 Bugzilla commands, specify the bug ID via the message body, rather
917 917 than the subject line, and leave a blank line after it.
918 918 """
919 919 user = self.map_committer(committer)
920 920 matches = self.bzproxy.User.get(
921 921 {b'match': [user], b'token': self.bztoken}
922 922 )
923 923 if not matches[b'users']:
924 924 user = self.ui.config(b'bugzilla', b'user')
925 925 matches = self.bzproxy.User.get(
926 926 {b'match': [user], b'token': self.bztoken}
927 927 )
928 928 if not matches[b'users']:
929 929 raise error.Abort(
930 930 _(b"default bugzilla user %s email not found") % user
931 931 )
932 932 user = matches[b'users'][0][b'email']
933 933 commands.append(self.makecommandline(b"id", bugid))
934 934
935 935 text = b"\n".join(commands) + b"\n\n" + comment
936 936
937 937 _charsets = mail._charsets(self.ui)
938 938 user = mail.addressencode(self.ui, user, _charsets)
939 939 bzemail = mail.addressencode(self.ui, self.bzemail, _charsets)
940 940 msg = mail.mimeencode(self.ui, text, _charsets)
941 941 msg[b'From'] = user
942 942 msg[b'To'] = bzemail
943 943 msg[b'Subject'] = mail.headencode(
944 944 self.ui, b"Bug modification", _charsets
945 945 )
946 946 sendmail = mail.connect(self.ui)
947 947 sendmail(user, bzemail, msg.as_string())
948 948
949 949 def updatebug(self, bugid, newstate, text, committer):
950 950 cmds = []
951 951 if b'hours' in newstate:
952 952 cmds.append(self.makecommandline(b"work_time", newstate[b'hours']))
953 953 if b'fix' in newstate:
954 954 cmds.append(self.makecommandline(b"bug_status", self.fixstatus))
955 955 cmds.append(self.makecommandline(b"resolution", self.fixresolution))
956 956 self.send_bug_modify_email(bugid, cmds, text, committer)
957 957
958 958
959 959 class NotFound(LookupError):
960 960 pass
961 961
962 962
963 963 class bzrestapi(bzaccess):
964 964 """Read and write bugzilla data using the REST API available since
965 965 Bugzilla 5.0.
966 966 """
967 967
968 968 def __init__(self, ui):
969 969 bzaccess.__init__(self, ui)
970 970 bz = self.ui.config(b'bugzilla', b'bzurl')
971 971 self.bzroot = b'/'.join([bz, b'rest'])
972 972 self.apikey = self.ui.config(b'bugzilla', b'apikey')
973 973 self.user = self.ui.config(b'bugzilla', b'user')
974 974 self.passwd = self.ui.config(b'bugzilla', b'password')
975 975 self.fixstatus = self.ui.config(b'bugzilla', b'fixstatus')
976 976 self.fixresolution = self.ui.config(b'bugzilla', b'fixresolution')
977 977
978 978 def apiurl(self, targets, include_fields=None):
979 979 url = b'/'.join([self.bzroot] + [pycompat.bytestr(t) for t in targets])
980 980 qv = {}
981 981 if self.apikey:
982 982 qv[b'api_key'] = self.apikey
983 983 elif self.user and self.passwd:
984 984 qv[b'login'] = self.user
985 985 qv[b'password'] = self.passwd
986 986 if include_fields:
987 987 qv[b'include_fields'] = include_fields
988 988 if qv:
989 989 url = b'%s?%s' % (url, util.urlreq.urlencode(qv))
990 990 return url
991 991
992 992 def _fetch(self, burl):
993 993 try:
994 994 resp = url.open(self.ui, burl)
995 995 return pycompat.json_loads(resp.read())
996 996 except util.urlerr.httperror as inst:
997 997 if inst.code == 401:
998 998 raise error.Abort(_(b'authorization failed'))
999 999 if inst.code == 404:
1000 1000 raise NotFound()
1001 1001 else:
1002 1002 raise
1003 1003
1004 1004 def _submit(self, burl, data, method=b'POST'):
1005 1005 data = json.dumps(data)
1006 1006 if method == b'PUT':
1007 1007
1008 1008 class putrequest(util.urlreq.request):
1009 1009 def get_method(self):
1010 1010 return b'PUT'
1011 1011
1012 1012 request_type = putrequest
1013 1013 else:
1014 1014 request_type = util.urlreq.request
1015 1015 req = request_type(burl, data, {b'Content-Type': b'application/json'})
1016 1016 try:
1017 1017 resp = url.opener(self.ui).open(req)
1018 1018 return pycompat.json_loads(resp.read())
1019 1019 except util.urlerr.httperror as inst:
1020 1020 if inst.code == 401:
1021 1021 raise error.Abort(_(b'authorization failed'))
1022 1022 if inst.code == 404:
1023 1023 raise NotFound()
1024 1024 else:
1025 1025 raise
1026 1026
1027 1027 def filter_real_bug_ids(self, bugs):
1028 1028 '''remove bug IDs that do not exist in Bugzilla from bugs.'''
1029 1029 badbugs = set()
1030 1030 for bugid in bugs:
1031 1031 burl = self.apiurl((b'bug', bugid), include_fields=b'status')
1032 1032 try:
1033 1033 self._fetch(burl)
1034 1034 except NotFound:
1035 1035 badbugs.add(bugid)
1036 1036 for bugid in badbugs:
1037 1037 del bugs[bugid]
1038 1038
1039 1039 def filter_cset_known_bug_ids(self, node, bugs):
1040 1040 '''remove bug IDs where node occurs in comment text from bugs.'''
1041 1041 sn = short(node)
1042 1042 for bugid in bugs.keys():
1043 1043 burl = self.apiurl(
1044 1044 (b'bug', bugid, b'comment'), include_fields=b'text'
1045 1045 )
1046 1046 result = self._fetch(burl)
1047 1047 comments = result[b'bugs'][pycompat.bytestr(bugid)][b'comments']
1048 1048 if any(sn in c[b'text'] for c in comments):
1049 1049 self.ui.status(
1050 1050 _(b'bug %d already knows about changeset %s\n')
1051 1051 % (bugid, sn)
1052 1052 )
1053 1053 del bugs[bugid]
1054 1054
1055 1055 def updatebug(self, bugid, newstate, text, committer):
1056 1056 """update the specified bug. Add comment text and set new states.
1057 1057
1058 1058 If possible add the comment as being from the committer of
1059 1059 the changeset. Otherwise use the default Bugzilla user.
1060 1060 """
1061 1061 bugmod = {}
1062 1062 if b'hours' in newstate:
1063 1063 bugmod[b'work_time'] = newstate[b'hours']
1064 1064 if b'fix' in newstate:
1065 1065 bugmod[b'status'] = self.fixstatus
1066 1066 bugmod[b'resolution'] = self.fixresolution
1067 1067 if bugmod:
1068 1068 # if we have to change the bugs state do it here
1069 1069 bugmod[b'comment'] = {
1070 1070 b'comment': text,
1071 1071 b'is_private': False,
1072 1072 b'is_markdown': False,
1073 1073 }
1074 1074 burl = self.apiurl((b'bug', bugid))
1075 1075 self._submit(burl, bugmod, method=b'PUT')
1076 1076 self.ui.debug(b'updated bug %s\n' % bugid)
1077 1077 else:
1078 1078 burl = self.apiurl((b'bug', bugid, b'comment'))
1079 1079 self._submit(
1080 1080 burl,
1081 1081 {
1082 1082 b'comment': text,
1083 1083 b'is_private': False,
1084 1084 b'is_markdown': False,
1085 1085 },
1086 1086 )
1087 1087 self.ui.debug(b'added comment to bug %s\n' % bugid)
1088 1088
1089 1089 def notify(self, bugs, committer):
1090 1090 """Force sending of Bugzilla notification emails.
1091 1091
1092 1092 Only required if the access method does not trigger notification
1093 1093 emails automatically.
1094 1094 """
1095 1095 pass
1096 1096
1097 1097
1098 1098 class bugzilla:
1099 1099 # supported versions of bugzilla. different versions have
1100 1100 # different schemas.
1101 1101 _versions = {
1102 1102 b'2.16': bzmysql,
1103 1103 b'2.18': bzmysql_2_18,
1104 1104 b'3.0': bzmysql_3_0,
1105 1105 b'xmlrpc': bzxmlrpc,
1106 1106 b'xmlrpc+email': bzxmlrpcemail,
1107 1107 b'restapi': bzrestapi,
1108 1108 }
1109 1109
1110 1110 def __init__(self, ui, repo):
1111 1111 self.ui = ui
1112 1112 self.repo = repo
1113 1113
1114 1114 bzversion = self.ui.config(b'bugzilla', b'version')
1115 1115 try:
1116 1116 bzclass = bugzilla._versions[bzversion]
1117 1117 except KeyError:
1118 1118 raise error.Abort(
1119 1119 _(b'bugzilla version %s not supported') % bzversion
1120 1120 )
1121 1121 self.bzdriver = bzclass(self.ui)
1122 1122
1123 1123 self.bug_re = re.compile(
1124 1124 self.ui.config(b'bugzilla', b'regexp'), re.IGNORECASE
1125 1125 )
1126 1126 self.fix_re = re.compile(
1127 1127 self.ui.config(b'bugzilla', b'fixregexp'), re.IGNORECASE
1128 1128 )
1129 1129 self.split_re = re.compile(br'\D+')
1130 1130
1131 1131 def find_bugs(self, ctx):
1132 1132 """return bugs dictionary created from commit comment.
1133 1133
1134 1134 Extract bug info from changeset comments. Filter out any that are
1135 1135 not known to Bugzilla, and any that already have a reference to
1136 1136 the given changeset in their comments.
1137 1137 """
1138 1138 start = 0
1139 1139 bugs = {}
1140 1140 bugmatch = self.bug_re.search(ctx.description(), start)
1141 1141 fixmatch = self.fix_re.search(ctx.description(), start)
1142 1142 while True:
1143 1143 bugattribs = {}
1144 1144 if not bugmatch and not fixmatch:
1145 1145 break
1146 1146 if not bugmatch:
1147 1147 m = fixmatch
1148 1148 elif not fixmatch:
1149 1149 m = bugmatch
1150 1150 else:
1151 1151 if bugmatch.start() < fixmatch.start():
1152 1152 m = bugmatch
1153 1153 else:
1154 1154 m = fixmatch
1155 1155 start = m.end()
1156 1156 if m is bugmatch:
1157 1157 bugmatch = self.bug_re.search(ctx.description(), start)
1158 1158 if b'fix' in bugattribs:
1159 1159 del bugattribs[b'fix']
1160 1160 else:
1161 1161 fixmatch = self.fix_re.search(ctx.description(), start)
1162 1162 bugattribs[b'fix'] = None
1163 1163
1164 1164 try:
1165 1165 ids = m.group(b'ids')
1166 1166 except IndexError:
1167 1167 ids = m.group(1)
1168 1168 try:
1169 1169 hours = float(m.group(b'hours'))
1170 1170 bugattribs[b'hours'] = hours
1171 1171 except IndexError:
1172 1172 pass
1173 1173 except TypeError:
1174 1174 pass
1175 1175 except ValueError:
1176 1176 self.ui.status(_(b"%s: invalid hours\n") % m.group(b'hours'))
1177 1177
1178 1178 for id in self.split_re.split(ids):
1179 1179 if not id:
1180 1180 continue
1181 1181 bugs[int(id)] = bugattribs
1182 1182 if bugs:
1183 1183 self.bzdriver.filter_real_bug_ids(bugs)
1184 1184 if bugs:
1185 1185 self.bzdriver.filter_cset_known_bug_ids(ctx.node(), bugs)
1186 1186 return bugs
1187 1187
1188 1188 def update(self, bugid, newstate, ctx):
1189 1189 '''update bugzilla bug with reference to changeset.'''
1190 1190
1191 1191 def webroot(root):
1192 1192 """strip leading prefix of repo root and turn into
1193 1193 url-safe path."""
1194 1194 count = int(self.ui.config(b'bugzilla', b'strip'))
1195 1195 root = util.pconvert(root)
1196 1196 while count > 0:
1197 1197 c = root.find(b'/')
1198 1198 if c == -1:
1199 1199 break
1200 1200 root = root[c + 1 :]
1201 1201 count -= 1
1202 1202 return root
1203 1203
1204 1204 mapfile = None
1205 1205 tmpl = self.ui.config(b'bugzilla', b'template')
1206 1206 if not tmpl:
1207 1207 mapfile = self.ui.config(b'bugzilla', b'style')
1208 1208 if not mapfile and not tmpl:
1209 1209 tmpl = _(
1210 1210 b'changeset {node|short} in repo {root} refers '
1211 1211 b'to bug {bug}.\ndetails:\n\t{desc|tabindent}'
1212 1212 )
1213 1213 spec = logcmdutil.templatespec(tmpl, mapfile)
1214 1214 t = logcmdutil.changesettemplater(self.ui, self.repo, spec)
1215 1215 self.ui.pushbuffer()
1216 1216 t.show(
1217 1217 ctx,
1218 1218 changes=ctx.changeset(),
1219 1219 bug=pycompat.bytestr(bugid),
1220 1220 hgweb=self.ui.config(b'web', b'baseurl'),
1221 1221 root=self.repo.root,
1222 1222 webroot=webroot(self.repo.root),
1223 1223 )
1224 1224 data = self.ui.popbuffer()
1225 1225 self.bzdriver.updatebug(
1226 1226 bugid, newstate, data, stringutil.email(ctx.user())
1227 1227 )
1228 1228
1229 1229 def notify(self, bugs, committer):
1230 1230 '''ensure Bugzilla users are notified of bug change.'''
1231 1231 self.bzdriver.notify(bugs, committer)
1232 1232
1233 1233
1234 1234 def hook(ui, repo, hooktype, node=None, **kwargs):
1235 1235 """add comment to bugzilla for each changeset that refers to a
1236 1236 bugzilla bug id. only add a comment once per bug, so same change
1237 1237 seen multiple times does not fill bug with duplicate data."""
1238 1238 if node is None:
1239 1239 raise error.Abort(
1240 1240 _(b'hook type %s does not pass a changeset id') % hooktype
1241 1241 )
1242 1242 try:
1243 1243 bz = bugzilla(ui, repo)
1244 1244 ctx = repo[node]
1245 1245 bugs = bz.find_bugs(ctx)
1246 1246 if bugs:
1247 1247 for bug in bugs:
1248 1248 bz.update(bug, bugs[bug], ctx)
1249 1249 bz.notify(bugs, stringutil.email(ctx.user()))
1250 1250 except Exception as e:
1251 1251 raise error.Abort(_(b'Bugzilla error: %s') % stringutil.forcebytestr(e))
@@ -1,1090 +1,1090 b''
1 1 # This software may be used and distributed according to the terms of the
2 2 # GNU General Public License version 2 or any later version.
3 3
4 4 """advertise pre-generated bundles to seed clones
5 5
6 6 "clonebundles" is a server-side extension used to advertise the existence
7 7 of pre-generated, externally hosted bundle files to clients that are
8 8 cloning so that cloning can be faster, more reliable, and require less
9 9 resources on the server. "pullbundles" is a related feature for sending
10 10 pre-generated bundle files to clients as part of pull operations.
11 11
12 12 Cloning can be a CPU and I/O intensive operation on servers. Traditionally,
13 13 the server, in response to a client's request to clone, dynamically generates
14 14 a bundle containing the entire repository content and sends it to the client.
15 15 There is no caching on the server and the server will have to redundantly
16 16 generate the same outgoing bundle in response to each clone request. For
17 17 servers with large repositories or with high clone volume, the load from
18 18 clones can make scaling the server challenging and costly.
19 19
20 20 This extension provides server operators the ability to offload
21 21 potentially expensive clone load to an external service. Pre-generated
22 22 bundles also allow using more CPU intensive compression, reducing the
23 23 effective bandwidth requirements.
24 24
25 25 Here's how clone bundles work:
26 26
27 27 1. A server operator establishes a mechanism for making bundle files available
28 28 on a hosting service where Mercurial clients can fetch them.
29 29 2. A manifest file listing available bundle URLs and some optional metadata
30 30 is added to the Mercurial repository on the server.
31 31 3. A client initiates a clone against a clone bundles aware server.
32 32 4. The client sees the server is advertising clone bundles and fetches the
33 33 manifest listing available bundles.
34 34 5. The client filters and sorts the available bundles based on what it
35 35 supports and prefers.
36 36 6. The client downloads and applies an available bundle from the
37 37 server-specified URL.
38 38 7. The client reconnects to the original server and performs the equivalent
39 39 of :hg:`pull` to retrieve all repository data not in the bundle. (The
40 40 repository could have been updated between when the bundle was created
41 41 and when the client started the clone.) This may use "pullbundles".
42 42
43 43 Instead of the server generating full repository bundles for every clone
44 44 request, it generates full bundles once and they are subsequently reused to
45 45 bootstrap new clones. The server may still transfer data at clone time.
46 46 However, this is only data that has been added/changed since the bundle was
47 47 created. For large, established repositories, this can reduce server load for
48 48 clones to less than 1% of original.
49 49
50 50 Here's how pullbundles work:
51 51
52 52 1. A manifest file listing available bundles and describing the revisions
53 53 is added to the Mercurial repository on the server.
54 54 2. A new-enough client informs the server that it supports partial pulls
55 55 and initiates a pull.
56 56 3. If the server has pull bundles enabled and sees the client advertising
57 57 partial pulls, it checks for a matching pull bundle in the manifest.
58 58 A bundle matches if the format is supported by the client, the client
59 59 has the required revisions already and needs something from the bundle.
60 60 4. If there is at least one matching bundle, the server sends it to the client.
61 61 5. The client applies the bundle and notices that the server reply was
62 62 incomplete. It initiates another pull.
63 63
64 64 To work, this extension requires the following of server operators:
65 65
66 66 * Generating bundle files of repository content (typically periodically,
67 67 such as once per day).
68 68 * Clone bundles: A file server that clients have network access to and that
69 69 Python knows how to talk to through its normal URL handling facility
70 70 (typically an HTTP/HTTPS server).
71 71 * A process for keeping the bundles manifest in sync with available bundle
72 72 files.
73 73
74 74 Strictly speaking, using a static file hosting server isn't required: a server
75 75 operator could use a dynamic service for retrieving bundle data. However,
76 76 static file hosting services are simple and scalable and should be sufficient
77 77 for most needs.
78 78
79 79 Bundle files can be generated with the :hg:`bundle` command. Typically
80 80 :hg:`bundle --all` is used to produce a bundle of the entire repository.
81 81
82 82 The bundlespec option `stream` (see :hg:`help bundlespec`)
83 83 can be used to produce a special *streaming clonebundle*, typically using
84 84 :hg:`bundle --all --type="none-streamv2"`.
85 85 These are bundle files that are extremely efficient
86 86 to produce and consume (read: fast). However, they are larger than
87 87 traditional bundle formats and require that clients support the exact set
88 88 of repository data store formats in use by the repository that created them.
89 89 Typically, a newer server can serve data that is compatible with older clients.
90 90 However, *streaming clone bundles* don't have this guarantee. **Server
91 91 operators need to be aware that newer versions of Mercurial may produce
92 92 streaming clone bundles incompatible with older Mercurial versions.**
93 93
94 94 A server operator is responsible for creating a ``.hg/clonebundles.manifest``
95 95 file containing the list of available bundle files suitable for seeding
96 96 clones. If this file does not exist, the repository will not advertise the
97 97 existence of clone bundles when clients connect. For pull bundles,
98 98 ``.hg/pullbundles.manifest`` is used.
99 99
100 100 The manifest file contains a newline (\\n) delimited list of entries.
101 101
102 102 Each line in this file defines an available bundle. Lines have the format:
103 103
104 104 <URL> [<key>=<value>[ <key>=<value>]]
105 105
106 106 That is, a URL followed by an optional, space-delimited list of key=value
107 107 pairs describing additional properties of this bundle. Both keys and values
108 108 are URI encoded.
109 109
110 110 For pull bundles, the URL is a path under the ``.hg`` directory of the
111 111 repository.
112 112
113 113 Keys in UPPERCASE are reserved for use by Mercurial and are defined below.
114 114 All non-uppercase keys can be used by site installations. An example use
115 115 for custom properties is to use the *datacenter* attribute to define which
116 116 data center a file is hosted in. Clients could then prefer a server in the
117 117 data center closest to them.
118 118
119 119 The following reserved keys are currently defined:
120 120
121 121 BUNDLESPEC
122 122 A "bundle specification" string that describes the type of the bundle.
123 123
124 124 These are string values that are accepted by the "--type" argument of
125 125 :hg:`bundle`.
126 126
127 127 The values are parsed in strict mode, which means they must be of the
128 128 "<compression>-<type>" form. See
129 129 mercurial.exchange.parsebundlespec() for more details.
130 130
131 131 :hg:`debugbundle --spec` can be used to print the bundle specification
132 132 string for a bundle file. The output of this command can be used verbatim
133 133 for the value of ``BUNDLESPEC`` (it is already escaped).
134 134
135 135 Clients will automatically filter out specifications that are unknown or
136 136 unsupported so they won't attempt to download something that likely won't
137 137 apply.
138 138
139 139 The actual value doesn't impact client behavior beyond filtering:
140 140 clients will still sniff the bundle type from the header of downloaded
141 141 files.
142 142
143 143 **Use of this key is highly recommended**, as it allows clients to
144 144 easily skip unsupported bundles. If this key is not defined, an old
145 145 client may attempt to apply a bundle that it is incapable of reading.
146 146
147 147 REQUIRESNI
148 148 Whether Server Name Indication (SNI) is required to connect to the URL.
149 149 SNI allows servers to use multiple certificates on the same IP. It is
150 150 somewhat common in CDNs and other hosting providers. Older Python
151 151 versions do not support SNI. Defining this attribute enables clients
152 152 with older Python versions to filter this entry without experiencing
153 153 an opaque SSL failure at connection time.
154 154
155 155 If this is defined, it is important to advertise a non-SNI fallback
156 156 URL or clients running old Python releases may not be able to clone
157 157 with the clonebundles facility.
158 158
159 159 Value should be "true".
160 160
161 161 REQUIREDRAM
162 162 Value specifies expected memory requirements to decode the payload.
163 163 Values can have suffixes for common bytes sizes. e.g. "64MB".
164 164
165 165 This key is often used with zstd-compressed bundles using a high
166 166 compression level / window size, which can require 100+ MB of memory
167 167 to decode.
168 168
169 169 heads
170 170 Used for pull bundles. This contains the ``;`` separated changeset
171 171 hashes of the heads of the bundle content.
172 172
173 173 bases
174 174 Used for pull bundles. This contains the ``;`` separated changeset
175 175 hashes of the roots of the bundle content. This can be skipped if
176 176 the bundle was created without ``--base``.
177 177
178 178 Manifests can contain multiple entries. Assuming metadata is defined, clients
179 179 will filter entries from the manifest that they don't support. The remaining
180 180 entries are optionally sorted by client preferences
181 181 (``ui.clonebundleprefers`` config option). The client then attempts
182 182 to fetch the bundle at the first URL in the remaining list.
183 183
184 184 **Errors when downloading a bundle will fail the entire clone operation:
185 185 clients do not automatically fall back to a traditional clone.** The reason
186 186 for this is that if a server is using clone bundles, it is probably doing so
187 187 because the feature is necessary to help it scale. In other words, there
188 188 is an assumption that clone load will be offloaded to another service and
189 189 that the Mercurial server isn't responsible for serving this clone load.
190 190 If that other service experiences issues and clients start mass falling back to
191 191 the original Mercurial server, the added clone load could overwhelm the server
192 192 due to unexpected load and effectively take it offline. Not having clients
193 193 automatically fall back to cloning from the original server mitigates this
194 194 scenario.
195 195
196 196 Because there is no automatic Mercurial server fallback on failure of the
197 197 bundle hosting service, it is important for server operators to view the bundle
198 198 hosting service as an extension of the Mercurial server in terms of
199 199 availability and service level agreements: if the bundle hosting service goes
200 200 down, so does the ability for clients to clone. Note: clients will see a
201 201 message informing them how to bypass the clone bundles facility when a failure
202 202 occurs. So server operators should prepare for some people to follow these
203 203 instructions when a failure occurs, thus driving more load to the original
204 204 Mercurial server when the bundle hosting service fails.
205 205
206 206
207 207 inline clonebundles
208 208 -------------------
209 209
210 210 It is possible to transmit clonebundles inline in case repositories are
211 211 accessed over SSH. This avoids having to setup an external HTTPS server
212 212 and results in the same access control as already present for the SSH setup.
213 213
214 214 Inline clonebundles should be placed into the `.hg/bundle-cache` directory.
215 215 A clonebundle at `.hg/bundle-cache/mybundle.bundle` is referred to
216 216 in the `clonebundles.manifest` file as `peer-bundle-cache://mybundle.bundle`.
217 217
218 218
219 219 auto-generation of clone bundles
220 220 --------------------------------
221 221
222 222 It is possible to set Mercurial to automatically re-generate clone bundles when
223 223 enough new content is available.
224 224
225 225 Mercurial will take care of the process asynchronously. The defined list of
226 226 bundle-type will be generated, uploaded, and advertised. Older bundles will get
227 227 decommissioned as newer ones replace them.
228 228
229 229 Bundles Generation:
230 230 ...................
231 231
232 232 The extension can generate multiple variants of the clone bundle. Each
233 233 different variant will be defined by the "bundle-spec" they use::
234 234
235 235 [clone-bundles]
236 236 auto-generate.formats= zstd-v2, gzip-v2
237 237
238 238 See `hg help bundlespec` for details about available options.
239 239
240 240 By default, new bundles are generated when 5% of the repository contents or at
241 241 least 1000 revisions are not contained in the cached bundles. This option can
242 242 be controlled by the `clone-bundles.trigger.below-bundled-ratio` option
243 243 (default 0.95) and the `clone-bundles.trigger.revs` option (default 1000)::
244 244
245 245 [clone-bundles]
246 246 trigger.below-bundled-ratio=0.95
247 247 trigger.revs=1000
248 248
249 249 This logic can be manually triggered using the `admin::clone-bundles-refresh`
250 250 command, or automatically on each repository change if
251 251 `clone-bundles.auto-generate.on-change` is set to `yes`::
252 252
253 253 [clone-bundles]
254 254 auto-generate.on-change=yes
255 255 auto-generate.formats= zstd-v2, gzip-v2
256 256
257 257 Automatic Inline serving
258 258 ........................
259 259
260 260 The simplest way to serve the generated bundle is through the Mercurial
261 261 protocol. However it is not the most efficient as request will still be served
262 262 by that main server. It is useful in case where authentication is complexe or
263 263 when an efficient mirror system is already in use anyway. See the `inline
264 264 clonebundles` section above for details about inline clonebundles
265 265
266 266 To automatically serve generated bundle through inline clonebundle, simply set
267 267 the following option::
268 268
269 269 auto-generate.serve-inline=yes
270 270
271 271 Enabling this option disable the managed upload and serving explained below.
272 272
273 273 Bundles Upload and Serving:
274 274 ...........................
275 275
276 276 This is the most efficient way to serve automatically generated clone bundles,
277 277 but requires some setup.
278 278
279 279 The generated bundles need to be made available to users through a "public" URL.
280 280 This should be donne through `clone-bundles.upload-command` configuration. The
281 281 value of this command should be a shell command. It will have access to the
282 282 bundle file path through the `$HGCB_BUNDLE_PATH` variable. And the expected
283 283 basename in the "public" URL is accessible at::
284 284
285 285 [clone-bundles]
286 286 upload-command=sftp put $HGCB_BUNDLE_PATH \
287 287 sftp://bundles.host/clone-bundles/$HGCB_BUNDLE_BASENAME
288 288
289 289 If the file was already uploaded, the command must still succeed.
290 290
291 291 After upload, the file should be available at an url defined by
292 292 `clone-bundles.url-template`.
293 293
294 294 [clone-bundles]
295 295 url-template=https://bundles.host/cache/clone-bundles/{basename}
296 296
297 297 Old bundles cleanup:
298 298 ....................
299 299
300 300 When new bundles are generated, the older ones are no longer necessary and can
301 301 be removed from storage. This is done through the `clone-bundles.delete-command`
302 302 configuration. The command is given the url of the artifact to delete through
303 303 the `$HGCB_BUNDLE_URL` environment variable.
304 304
305 305 [clone-bundles]
306 306 delete-command=sftp rm sftp://bundles.host/clone-bundles/$HGCB_BUNDLE_BASENAME
307 307
308 308 If the file was already deleted, the command must still succeed.
309 309 """
310 310
311 311
312 312 import os
313 313 import weakref
314 314
315 315 from mercurial.i18n import _
316 316
317 317 from mercurial import (
318 318 bundlecaches,
319 319 commands,
320 320 error,
321 321 extensions,
322 322 localrepo,
323 323 lock,
324 324 node,
325 325 registrar,
326 326 util,
327 327 wireprotov1server,
328 328 )
329 329
330 330
331 331 from mercurial.utils import (
332 332 procutil,
333 333 )
334 334
335 335 testedwith = b'ships-with-hg-core'
336 336
337 337
338 338 def capabilities(orig, repo, proto):
339 339 caps = orig(repo, proto)
340 340
341 341 # Only advertise if a manifest exists. This does add some I/O to requests.
342 342 # But this should be cheaper than a wasted network round trip due to
343 343 # missing file.
344 344 if repo.vfs.exists(bundlecaches.CB_MANIFEST_FILE):
345 345 caps.append(b'clonebundles')
346 346 caps.append(b'clonebundles_manifest')
347 347
348 348 return caps
349 349
350 350
351 351 def extsetup(ui):
352 352 extensions.wrapfunction(wireprotov1server, '_capabilities', capabilities)
353 353
354 354
355 355 # logic for bundle auto-generation
356 356
357 357
358 358 configtable = {}
359 359 configitem = registrar.configitem(configtable)
360 360
361 361 cmdtable = {}
362 362 command = registrar.command(cmdtable)
363 363
364 364 configitem(b'clone-bundles', b'auto-generate.on-change', default=False)
365 365 configitem(b'clone-bundles', b'auto-generate.formats', default=list)
366 366 configitem(b'clone-bundles', b'auto-generate.serve-inline', default=False)
367 367 configitem(b'clone-bundles', b'trigger.below-bundled-ratio', default=0.95)
368 368 configitem(b'clone-bundles', b'trigger.revs', default=1000)
369 369
370 370 configitem(b'clone-bundles', b'upload-command', default=None)
371 371
372 372 configitem(b'clone-bundles', b'delete-command', default=None)
373 373
374 374 configitem(b'clone-bundles', b'url-template', default=None)
375 375
376 376 configitem(b'devel', b'debug.clonebundles', default=False)
377 377
378 378
379 379 # category for the post-close transaction hooks
380 380 CAT_POSTCLOSE = b"clonebundles-autobundles"
381 381
382 382 # template for bundle file names
383 383 BUNDLE_MASK = (
384 384 b"full-%(bundle_type)s-%(revs)d_revs-%(tip_short)s_tip-%(op_id)s.hg"
385 385 )
386 386
387 387
388 388 # file in .hg/ use to track clonebundles being auto-generated
389 389 AUTO_GEN_FILE = b'clonebundles.auto-gen'
390 390
391 391
392 392 class BundleBase(object):
393 393 """represents the core of properties that matters for us in a bundle
394 394
395 395 :bundle_type: the bundlespec (see hg help bundlespec)
396 396 :revs: the number of revisions in the repo at bundle creation time
397 397 :tip_rev: the rev-num of the tip revision
398 398 :tip_node: the node id of the tip-most revision in the bundle
399 399
400 400 :ready: True if the bundle is ready to be served
401 401 """
402 402
403 403 ready = False
404 404
405 405 def __init__(self, bundle_type, revs, tip_rev, tip_node):
406 406 self.bundle_type = bundle_type
407 407 self.revs = revs
408 408 self.tip_rev = tip_rev
409 409 self.tip_node = tip_node
410 410
411 411 def valid_for(self, repo):
412 412 """is this bundle applicable to the current repository
413 413
414 414 This is useful for detecting bundles made irrelevant by stripping.
415 415 """
416 416 tip_node = node.bin(self.tip_node)
417 417 return repo.changelog.index.get_rev(tip_node) == self.tip_rev
418 418
419 419 def __eq__(self, other):
420 420 left = (self.ready, self.bundle_type, self.tip_rev, self.tip_node)
421 421 right = (other.ready, other.bundle_type, other.tip_rev, other.tip_node)
422 422 return left == right
423 423
424 424 def __neq__(self, other):
425 425 return not self == other
426 426
427 427 def __cmp__(self, other):
428 428 if self == other:
429 429 return 0
430 430 return -1
431 431
432 432
433 433 class RequestedBundle(BundleBase):
434 434 """A bundle that should be generated.
435 435
436 436 Additional attributes compared to BundleBase
437 437 :heads: list of head revisions (as rev-num)
438 438 :op_id: a "unique" identifier for the operation triggering the change
439 439 """
440 440
441 441 def __init__(self, bundle_type, revs, tip_rev, tip_node, head_revs, op_id):
442 442 self.head_revs = head_revs
443 443 self.op_id = op_id
444 444 super(RequestedBundle, self).__init__(
445 445 bundle_type,
446 446 revs,
447 447 tip_rev,
448 448 tip_node,
449 449 )
450 450
451 451 @property
452 452 def suggested_filename(self):
453 453 """A filename that can be used for the generated bundle"""
454 454 data = {
455 455 b'bundle_type': self.bundle_type,
456 456 b'revs': self.revs,
457 457 b'heads': self.head_revs,
458 458 b'tip_rev': self.tip_rev,
459 459 b'tip_node': self.tip_node,
460 460 b'tip_short': self.tip_node[:12],
461 461 b'op_id': self.op_id,
462 462 }
463 463 return BUNDLE_MASK % data
464 464
465 465 def generate_bundle(self, repo, file_path):
466 466 """generate the bundle at `filepath`"""
467 467 commands.bundle(
468 468 repo.ui,
469 469 repo,
470 470 file_path,
471 471 base=[b"null"],
472 472 rev=self.head_revs,
473 473 type=self.bundle_type,
474 474 quiet=True,
475 475 )
476 476
477 477 def generating(self, file_path, hostname=None, pid=None):
478 478 """return a GeneratingBundle object from this object"""
479 479 if pid is None:
480 480 pid = os.getpid()
481 481 if hostname is None:
482 482 hostname = lock._getlockprefix()
483 483 return GeneratingBundle(
484 484 self.bundle_type,
485 485 self.revs,
486 486 self.tip_rev,
487 487 self.tip_node,
488 488 hostname,
489 489 pid,
490 490 file_path,
491 491 )
492 492
493 493
494 494 class GeneratingBundle(BundleBase):
495 495 """A bundle being generated
496 496
497 497 extra attributes compared to BundleBase:
498 498
499 499 :hostname: the hostname of the machine generating the bundle
500 500 :pid: the pid of the process generating the bundle
501 501 :filepath: the target filename of the bundle
502 502
503 503 These attributes exist to help detect stalled generation processes.
504 504 """
505 505
506 506 ready = False
507 507
508 508 def __init__(
509 509 self, bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath
510 510 ):
511 511 self.hostname = hostname
512 512 self.pid = pid
513 513 self.filepath = filepath
514 514 super(GeneratingBundle, self).__init__(
515 515 bundle_type, revs, tip_rev, tip_node
516 516 )
517 517
518 518 @classmethod
519 519 def from_line(cls, line):
520 520 """create an object by deserializing a line from AUTO_GEN_FILE"""
521 521 assert line.startswith(b'PENDING-v1 ')
522 522 (
523 523 __,
524 524 bundle_type,
525 525 revs,
526 526 tip_rev,
527 527 tip_node,
528 528 hostname,
529 529 pid,
530 530 filepath,
531 531 ) = line.split()
532 532 hostname = util.urlreq.unquote(hostname)
533 533 filepath = util.urlreq.unquote(filepath)
534 534 revs = int(revs)
535 535 tip_rev = int(tip_rev)
536 536 pid = int(pid)
537 537 return cls(
538 538 bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath
539 539 )
540 540
541 541 def to_line(self):
542 542 """serialize the object to include as a line in AUTO_GEN_FILE"""
543 543 templ = b"PENDING-v1 %s %d %d %s %s %d %s"
544 544 data = (
545 545 self.bundle_type,
546 546 self.revs,
547 547 self.tip_rev,
548 548 self.tip_node,
549 549 util.urlreq.quote(self.hostname),
550 550 self.pid,
551 551 util.urlreq.quote(self.filepath),
552 552 )
553 553 return templ % data
554 554
555 555 def __eq__(self, other):
556 556 if not super(GeneratingBundle, self).__eq__(other):
557 557 return False
558 558 left = (self.hostname, self.pid, self.filepath)
559 559 right = (other.hostname, other.pid, other.filepath)
560 560 return left == right
561 561
562 562 def uploaded(self, url, basename):
563 563 """return a GeneratedBundle from this object"""
564 564 return GeneratedBundle(
565 565 self.bundle_type,
566 566 self.revs,
567 567 self.tip_rev,
568 568 self.tip_node,
569 569 url,
570 570 basename,
571 571 )
572 572
573 573
574 574 class GeneratedBundle(BundleBase):
575 575 """A bundle that is done being generated and can be served
576 576
577 577 extra attributes compared to BundleBase:
578 578
579 579 :file_url: the url where the bundle is available.
580 580 :basename: the "basename" used to upload (useful for deletion)
581 581
582 582 These attributes exist to generate a bundle manifest
583 583 (.hg/pullbundles.manifest)
584 584 """
585 585
586 586 ready = True
587 587
588 588 def __init__(
589 589 self, bundle_type, revs, tip_rev, tip_node, file_url, basename
590 590 ):
591 591 self.file_url = file_url
592 592 self.basename = basename
593 593 super(GeneratedBundle, self).__init__(
594 594 bundle_type, revs, tip_rev, tip_node
595 595 )
596 596
597 597 @classmethod
598 598 def from_line(cls, line):
599 599 """create an object by deserializing a line from AUTO_GEN_FILE"""
600 600 assert line.startswith(b'DONE-v1 ')
601 601 (
602 602 __,
603 603 bundle_type,
604 604 revs,
605 605 tip_rev,
606 606 tip_node,
607 607 file_url,
608 608 basename,
609 609 ) = line.split()
610 610 revs = int(revs)
611 611 tip_rev = int(tip_rev)
612 612 file_url = util.urlreq.unquote(file_url)
613 613 return cls(bundle_type, revs, tip_rev, tip_node, file_url, basename)
614 614
615 615 def to_line(self):
616 616 """serialize the object to include as a line in AUTO_GEN_FILE"""
617 617 templ = b"DONE-v1 %s %d %d %s %s %s"
618 618 data = (
619 619 self.bundle_type,
620 620 self.revs,
621 621 self.tip_rev,
622 622 self.tip_node,
623 623 util.urlreq.quote(self.file_url),
624 624 self.basename,
625 625 )
626 626 return templ % data
627 627
628 628 def manifest_line(self):
629 629 """serialize the object to include as a line in pullbundles.manifest"""
630 630 templ = b"%s BUNDLESPEC=%s"
631 631 if self.file_url.startswith(b'http'):
632 632 templ += b" REQUIRESNI=true"
633 633 return templ % (self.file_url, self.bundle_type)
634 634
635 635 def __eq__(self, other):
636 636 if not super(GeneratedBundle, self).__eq__(other):
637 637 return False
638 638 return self.file_url == other.file_url
639 639
640 640
641 641 def parse_auto_gen(content):
642 642 """parse the AUTO_GEN_FILE to return a list of Bundle object"""
643 643 bundles = []
644 644 for line in content.splitlines():
645 645 if line.startswith(b'PENDING-v1 '):
646 646 bundles.append(GeneratingBundle.from_line(line))
647 647 elif line.startswith(b'DONE-v1 '):
648 648 bundles.append(GeneratedBundle.from_line(line))
649 649 return bundles
650 650
651 651
652 652 def dumps_auto_gen(bundles):
653 653 """serialize a list of Bundle as a AUTO_GEN_FILE content"""
654 654 lines = []
655 655 for b in bundles:
656 656 lines.append(b"%s\n" % b.to_line())
657 657 lines.sort()
658 658 return b"".join(lines)
659 659
660 660
661 661 def read_auto_gen(repo):
662 662 """read the AUTO_GEN_FILE for the <repo> a list of Bundle object"""
663 663 data = repo.vfs.tryread(AUTO_GEN_FILE)
664 664 if not data:
665 665 return []
666 666 return parse_auto_gen(data)
667 667
668 668
669 669 def write_auto_gen(repo, bundles):
670 670 """write a list of Bundle objects into the repo's AUTO_GEN_FILE"""
671 671 assert repo._cb_lock_ref is not None
672 672 data = dumps_auto_gen(bundles)
673 673 with repo.vfs(AUTO_GEN_FILE, mode=b'wb', atomictemp=True) as f:
674 674 f.write(data)
675 675
676 676
677 677 def generate_manifest(bundles):
678 678 """write a list of Bundle objects into the repo's AUTO_GEN_FILE"""
679 679 bundles = list(bundles)
680 680 bundles.sort(key=lambda b: b.bundle_type)
681 681 lines = []
682 682 for b in bundles:
683 683 lines.append(b"%s\n" % b.manifest_line())
684 684 return b"".join(lines)
685 685
686 686
687 687 def update_ondisk_manifest(repo):
688 688 """update the clonebundle manifest with latest url"""
689 689 with repo.clonebundles_lock():
690 690 bundles = read_auto_gen(repo)
691 691
692 692 per_types = {}
693 693 for b in bundles:
694 694 if not (b.ready and b.valid_for(repo)):
695 695 continue
696 696 current = per_types.get(b.bundle_type)
697 697 if current is not None and current.revs >= b.revs:
698 698 continue
699 699 per_types[b.bundle_type] = b
700 700 manifest = generate_manifest(per_types.values())
701 701 with repo.vfs(
702 702 bundlecaches.CB_MANIFEST_FILE, mode=b"wb", atomictemp=True
703 703 ) as f:
704 704 f.write(manifest)
705 705
706 706
707 707 def update_bundle_list(repo, new_bundles=(), del_bundles=()):
708 708 """modify the repo's AUTO_GEN_FILE
709 709
710 710 This method also regenerates the clone bundle manifest when needed"""
711 711 with repo.clonebundles_lock():
712 712 bundles = read_auto_gen(repo)
713 713 if del_bundles:
714 714 bundles = [b for b in bundles if b not in del_bundles]
715 715 new_bundles = [b for b in new_bundles if b not in bundles]
716 716 bundles.extend(new_bundles)
717 717 write_auto_gen(repo, bundles)
718 718 all_changed = []
719 719 all_changed.extend(new_bundles)
720 720 all_changed.extend(del_bundles)
721 721 if any(b.ready for b in all_changed):
722 722 update_ondisk_manifest(repo)
723 723
724 724
725 725 def cleanup_tmp_bundle(repo, target):
726 726 """remove a GeneratingBundle file and entry"""
727 727 assert not target.ready
728 728 with repo.clonebundles_lock():
729 729 repo.vfs.tryunlink(target.filepath)
730 730 update_bundle_list(repo, del_bundles=[target])
731 731
732 732
733 733 def finalize_one_bundle(repo, target):
734 734 """upload a generated bundle and advertise it in the clonebundles.manifest"""
735 735 with repo.clonebundles_lock():
736 736 bundles = read_auto_gen(repo)
737 737 if target in bundles and target.valid_for(repo):
738 738 result = upload_bundle(repo, target)
739 739 update_bundle_list(repo, new_bundles=[result])
740 740 cleanup_tmp_bundle(repo, target)
741 741
742 742
743 743 def find_outdated_bundles(repo, bundles):
744 744 """finds outdated bundles"""
745 745 olds = []
746 746 per_types = {}
747 747 for b in bundles:
748 748 if not b.valid_for(repo):
749 749 olds.append(b)
750 750 continue
751 751 l = per_types.setdefault(b.bundle_type, [])
752 752 l.append(b)
753 753 for key in sorted(per_types):
754 754 all = per_types[key]
755 755 if len(all) > 1:
756 756 all.sort(key=lambda b: b.revs, reverse=True)
757 757 olds.extend(all[1:])
758 758 return olds
759 759
760 760
761 761 def collect_garbage(repo):
762 762 """finds outdated bundles and get them deleted"""
763 763 with repo.clonebundles_lock():
764 764 bundles = read_auto_gen(repo)
765 765 olds = find_outdated_bundles(repo, bundles)
766 766 for o in olds:
767 767 delete_bundle(repo, o)
768 768 update_bundle_list(repo, del_bundles=olds)
769 769
770 770
771 771 def upload_bundle(repo, bundle):
772 772 """upload the result of a GeneratingBundle and return a GeneratedBundle
773 773
774 774 The upload is done using the `clone-bundles.upload-command`
775 775 """
776 776 inline = repo.ui.config(b'clone-bundles', b'auto-generate.serve-inline')
777 777 basename = repo.vfs.basename(bundle.filepath)
778 778 if inline:
779 779 dest_dir = repo.vfs.join(bundlecaches.BUNDLE_CACHE_DIR)
780 780 repo.vfs.makedirs(dest_dir)
781 781 dest = repo.vfs.join(dest_dir, basename)
782 782 util.copyfiles(bundle.filepath, dest, hardlink=True)
783 783 url = bundlecaches.CLONEBUNDLESCHEME + basename
784 784 return bundle.uploaded(url, basename)
785 785 else:
786 786 cmd = repo.ui.config(b'clone-bundles', b'upload-command')
787 787 url = repo.ui.config(b'clone-bundles', b'url-template')
788 788 filepath = procutil.shellquote(bundle.filepath)
789 789 variables = {
790 790 b'HGCB_BUNDLE_PATH': filepath,
791 791 b'HGCB_BUNDLE_BASENAME': basename,
792 792 }
793 793 env = procutil.shellenviron(environ=variables)
794 794 ret = repo.ui.system(cmd, environ=env)
795 795 if ret:
796 796 raise error.Abort(b"command returned status %d: %s" % (ret, cmd))
797 797 url = (
798 798 url.decode('utf8')
799 799 .format(basename=basename.decode('utf8'))
800 800 .encode('utf8')
801 801 )
802 802 return bundle.uploaded(url, basename)
803 803
804 804
805 805 def delete_bundle(repo, bundle):
806 806 """delete a bundle from storage"""
807 807 assert bundle.ready
808 808
809 809 inline = bundle.file_url.startswith(bundlecaches.CLONEBUNDLESCHEME)
810 810
811 811 if inline:
812 812 msg = b'clone-bundles: deleting inline bundle %s\n'
813 813 else:
814 814 msg = b'clone-bundles: deleting bundle %s\n'
815 815 msg %= bundle.basename
816 816 if repo.ui.configbool(b'devel', b'debug.clonebundles'):
817 817 repo.ui.write(msg)
818 818 else:
819 819 repo.ui.debug(msg)
820 820
821 821 if inline:
822 822 inline_path = repo.vfs.join(
823 823 bundlecaches.BUNDLE_CACHE_DIR,
824 824 bundle.basename,
825 825 )
826 826 util.tryunlink(inline_path)
827 827 else:
828 828 cmd = repo.ui.config(b'clone-bundles', b'delete-command')
829 829 variables = {
830 830 b'HGCB_BUNDLE_URL': bundle.file_url,
831 831 b'HGCB_BASENAME': bundle.basename,
832 832 }
833 833 env = procutil.shellenviron(environ=variables)
834 834 ret = repo.ui.system(cmd, environ=env)
835 835 if ret:
836 836 raise error.Abort(b"command returned status %d: %s" % (ret, cmd))
837 837
838 838
839 839 def auto_bundle_needed_actions(repo, bundles, op_id):
840 840 """find the list of bundles that need action
841 841
842 842 returns a list of RequestedBundle objects that need to be generated and
843 843 uploaded."""
844 844 create_bundles = []
845 845 delete_bundles = []
846 846 repo = repo.filtered(b"immutable")
847 847 targets = repo.ui.configlist(b'clone-bundles', b'auto-generate.formats')
848 848 ratio = float(
849 849 repo.ui.config(b'clone-bundles', b'trigger.below-bundled-ratio')
850 850 )
851 851 abs_revs = repo.ui.configint(b'clone-bundles', b'trigger.revs')
852 852 revs = len(repo.changelog)
853 853 generic_data = {
854 854 'revs': revs,
855 855 'head_revs': repo.changelog.headrevs(),
856 856 'tip_rev': repo.changelog.tiprev(),
857 857 'tip_node': node.hex(repo.changelog.tip()),
858 858 'op_id': op_id,
859 859 }
860 860 for t in targets:
861 861 t = bundlecaches.parsebundlespec(repo, t, strict=False).as_spec()
862 862 if new_bundle_needed(repo, bundles, ratio, abs_revs, t, revs):
863 863 data = generic_data.copy()
864 864 data['bundle_type'] = t
865 865 b = RequestedBundle(**data)
866 866 create_bundles.append(b)
867 867 delete_bundles.extend(find_outdated_bundles(repo, bundles))
868 868 return create_bundles, delete_bundles
869 869
870 870
871 871 def new_bundle_needed(repo, bundles, ratio, abs_revs, bundle_type, revs):
872 872 """consider the current cached content and trigger new bundles if needed"""
873 873 threshold = max((revs * ratio), (revs - abs_revs))
874 874 for b in bundles:
875 875 if not b.valid_for(repo) or b.bundle_type != bundle_type:
876 876 continue
877 877 if b.revs > threshold:
878 878 return False
879 879 return True
880 880
881 881
882 882 def start_one_bundle(repo, bundle):
883 883 """start the generation of a single bundle file
884 884
885 885 the `bundle` argument should be a RequestedBundle object.
886 886
887 887 This data is passed to the `debugmakeclonebundles` "as is".
888 888 """
889 889 data = util.pickle.dumps(bundle)
890 890 cmd = [procutil.hgexecutable(), b'--cwd', repo.path, INTERNAL_CMD]
891 891 env = procutil.shellenviron()
892 892 msg = b'clone-bundles: starting bundle generation: %s\n'
893 893 stdout = None
894 894 stderr = None
895 895 waits = []
896 896 record_wait = None
897 897 if repo.ui.configbool(b'devel', b'debug.clonebundles'):
898 898 stdout = procutil.stdout
899 899 stderr = procutil.stderr
900 900 repo.ui.write(msg % bundle.bundle_type)
901 901 record_wait = waits.append
902 902 else:
903 903 repo.ui.debug(msg % bundle.bundle_type)
904 904 bg = procutil.runbgcommand
905 905 bg(
906 906 cmd,
907 907 env,
908 908 stdin_bytes=data,
909 909 stdout=stdout,
910 910 stderr=stderr,
911 911 record_wait=record_wait,
912 912 )
913 913 for f in waits:
914 914 f()
915 915
916 916
917 917 INTERNAL_CMD = b'debug::internal-make-clone-bundles'
918 918
919 919
920 920 @command(INTERNAL_CMD, [], b'')
921 921 def debugmakeclonebundles(ui, repo):
922 922 """Internal command to auto-generate debug bundles"""
923 923 requested_bundle = util.pickle.load(procutil.stdin)
924 924 procutil.stdin.close()
925 925
926 926 collect_garbage(repo)
927 927
928 928 fname = requested_bundle.suggested_filename
929 929 fpath = repo.vfs.makedirs(b'tmp-bundles')
930 930 fpath = repo.vfs.join(b'tmp-bundles', fname)
931 931 bundle = requested_bundle.generating(fpath)
932 932 update_bundle_list(repo, new_bundles=[bundle])
933 933
934 934 requested_bundle.generate_bundle(repo, fpath)
935 935
936 936 repo.invalidate()
937 937 finalize_one_bundle(repo, bundle)
938 938
939 939
940 940 def make_auto_bundler(source_repo):
941 941 reporef = weakref.ref(source_repo)
942 942
943 943 def autobundle(tr):
944 944 repo = reporef()
945 945 assert repo is not None
946 946 bundles = read_auto_gen(repo)
947 947 new, __ = auto_bundle_needed_actions(repo, bundles, b"%d_txn" % id(tr))
948 948 for data in new:
949 949 start_one_bundle(repo, data)
950 950 return None
951 951
952 952 return autobundle
953 953
954 954
955 955 def reposetup(ui, repo):
956 956 """install the two pieces needed for automatic clonebundle generation
957 957
958 958 - add a "post-close" hook that fires bundling when needed
959 959 - introduce a clone-bundle lock to let multiple processes meddle with the
960 960 state files.
961 961 """
962 962 if not repo.local():
963 963 return
964 964
965 965 class autobundlesrepo(repo.__class__):
966 966 def transaction(self, *args, **kwargs):
967 967 tr = super(autobundlesrepo, self).transaction(*args, **kwargs)
968 968 enabled = repo.ui.configbool(
969 969 b'clone-bundles',
970 970 b'auto-generate.on-change',
971 971 )
972 972 targets = repo.ui.configlist(
973 973 b'clone-bundles', b'auto-generate.formats'
974 974 )
975 975 if enabled:
976 976 if not targets:
977 977 repo.ui.warn(
978 978 _(
979 979 b'clone-bundle auto-generate enabled, '
980 980 b'but no formats specified: disabling generation\n'
981 981 )
982 982 )
983 983 else:
984 984 tr.addpostclose(CAT_POSTCLOSE, make_auto_bundler(self))
985 985 return tr
986 986
987 987 @localrepo.unfilteredmethod
988 988 def clonebundles_lock(self, wait=True):
989 989 '''Lock the repository file related to clone bundles'''
990 if not util.safehasattr(self, '_cb_lock_ref'):
990 if not hasattr(self, '_cb_lock_ref'):
991 991 self._cb_lock_ref = None
992 992 l = self._currentlock(self._cb_lock_ref)
993 993 if l is not None:
994 994 l.lock()
995 995 return l
996 996
997 997 l = self._lock(
998 998 vfs=self.vfs,
999 999 lockname=b"clonebundleslock",
1000 1000 wait=wait,
1001 1001 releasefn=None,
1002 1002 acquirefn=None,
1003 1003 desc=_(b'repository %s') % self.origroot,
1004 1004 )
1005 1005 self._cb_lock_ref = weakref.ref(l)
1006 1006 return l
1007 1007
1008 1008 repo._wlockfreeprefix.add(AUTO_GEN_FILE)
1009 1009 repo._wlockfreeprefix.add(bundlecaches.CB_MANIFEST_FILE)
1010 1010 repo.__class__ = autobundlesrepo
1011 1011
1012 1012
1013 1013 @command(
1014 1014 b'admin::clone-bundles-refresh',
1015 1015 [
1016 1016 (
1017 1017 b'',
1018 1018 b'background',
1019 1019 False,
1020 1020 _(b'start bundle generation in the background'),
1021 1021 ),
1022 1022 ],
1023 1023 b'',
1024 1024 )
1025 1025 def cmd_admin_clone_bundles_refresh(
1026 1026 ui,
1027 1027 repo: localrepo.localrepository,
1028 1028 background=False,
1029 1029 ):
1030 1030 """generate clone bundles according to the configuration
1031 1031
1032 1032 This runs the logic for automatic generation, removing outdated bundles and
1033 1033 generating new ones if necessary. See :hg:`help -e clone-bundles` for
1034 1034 details about how to configure this feature.
1035 1035 """
1036 1036 debug = repo.ui.configbool(b'devel', b'debug.clonebundles')
1037 1037 bundles = read_auto_gen(repo)
1038 1038 op_id = b"%d_acbr" % os.getpid()
1039 1039 create, delete = auto_bundle_needed_actions(repo, bundles, op_id)
1040 1040
1041 1041 # if some bundles are scheduled for creation in the background, they will
1042 1042 # deal with garbage collection too, so no need to synchroniously do it.
1043 1043 #
1044 1044 # However if no bundles are scheduled for creation, we need to explicitly do
1045 1045 # it here.
1046 1046 if not (background and create):
1047 1047 # we clean up outdated bundles before generating new ones to keep the
1048 1048 # last two versions of the bundle around for a while and avoid having to
1049 1049 # deal with clients that just got served a manifest.
1050 1050 for o in delete:
1051 1051 delete_bundle(repo, o)
1052 1052 update_bundle_list(repo, del_bundles=delete)
1053 1053
1054 1054 if create:
1055 1055 fpath = repo.vfs.makedirs(b'tmp-bundles')
1056 1056
1057 1057 if background:
1058 1058 for requested_bundle in create:
1059 1059 start_one_bundle(repo, requested_bundle)
1060 1060 else:
1061 1061 for requested_bundle in create:
1062 1062 if debug:
1063 1063 msg = b'clone-bundles: starting bundle generation: %s\n'
1064 1064 repo.ui.write(msg % requested_bundle.bundle_type)
1065 1065 fname = requested_bundle.suggested_filename
1066 1066 fpath = repo.vfs.join(b'tmp-bundles', fname)
1067 1067 generating_bundle = requested_bundle.generating(fpath)
1068 1068 update_bundle_list(repo, new_bundles=[generating_bundle])
1069 1069 requested_bundle.generate_bundle(repo, fpath)
1070 1070 result = upload_bundle(repo, generating_bundle)
1071 1071 update_bundle_list(repo, new_bundles=[result])
1072 1072 update_ondisk_manifest(repo)
1073 1073 cleanup_tmp_bundle(repo, generating_bundle)
1074 1074
1075 1075
1076 1076 @command(b'admin::clone-bundles-clear', [], b'')
1077 1077 def cmd_admin_clone_bundles_clear(ui, repo: localrepo.localrepository):
1078 1078 """remove existing clone bundle caches
1079 1079
1080 1080 See `hg help admin::clone-bundles-refresh` for details on how to regenerate
1081 1081 them.
1082 1082
1083 1083 This command will only affect bundles currently available, it will not
1084 1084 affect bundles being asynchronously generated.
1085 1085 """
1086 1086 bundles = read_auto_gen(repo)
1087 1087 delete = [b for b in bundles if b.ready]
1088 1088 for o in delete:
1089 1089 delete_bundle(repo, o)
1090 1090 update_bundle_list(repo, del_bundles=delete)
@@ -1,88 +1,87 b''
1 1 # commitextras.py
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 '''adds a new flag extras to commit (ADVANCED)'''
9 9
10 10
11 11 import re
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial import (
15 15 commands,
16 16 error,
17 17 extensions,
18 18 registrar,
19 util,
20 19 )
21 20
22 21 cmdtable = {}
23 22 command = registrar.command(cmdtable)
24 23 testedwith = b'ships-with-hg-core'
25 24
26 25 usedinternally = {
27 26 b'amend_source',
28 27 b'branch',
29 28 b'close',
30 29 b'histedit_source',
31 30 b'topic',
32 31 b'rebase_source',
33 32 b'intermediate-source',
34 33 b'__touch-noise__',
35 34 b'source',
36 35 b'transplant_source',
37 36 }
38 37
39 38
40 39 def extsetup(ui):
41 40 entry = extensions.wrapcommand(commands.table, b'commit', _commit)
42 41 options = entry[1]
43 42 options.append(
44 43 (
45 44 b'',
46 45 b'extra',
47 46 [],
48 47 _(b'set a changeset\'s extra values'),
49 48 _(b"KEY=VALUE"),
50 49 )
51 50 )
52 51
53 52
54 53 def _commit(orig, ui, repo, *pats, **opts):
55 if util.safehasattr(repo, 'unfiltered'):
54 if hasattr(repo, 'unfiltered'):
56 55 repo = repo.unfiltered()
57 56
58 57 class repoextra(repo.__class__):
59 58 def commit(self, *innerpats, **inneropts):
60 59 extras = opts.get('extra')
61 60 for raw in extras:
62 61 if b'=' not in raw:
63 62 msg = _(
64 63 b"unable to parse '%s', should follow "
65 64 b"KEY=VALUE format"
66 65 )
67 66 raise error.InputError(msg % raw)
68 67 k, v = raw.split(b'=', 1)
69 68 if not k:
70 69 msg = _(b"unable to parse '%s', keys can't be empty")
71 70 raise error.InputError(msg % raw)
72 71 if re.search(br'[^\w-]', k):
73 72 msg = _(
74 73 b"keys can only contain ascii letters, digits,"
75 74 b" '_' and '-'"
76 75 )
77 76 raise error.InputError(msg)
78 77 if k in usedinternally:
79 78 msg = _(
80 79 b"key '%s' is used internally, can't be set "
81 80 b"manually"
82 81 )
83 82 raise error.InputError(msg % k)
84 83 inneropts['extra'][k] = v
85 84 return super(repoextra, self).commit(*innerpats, **inneropts)
86 85
87 86 repo.__class__ = repoextra
88 87 return orig(ui, repo, *pats, **opts)
@@ -1,1071 +1,1071 b''
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import functools
9 9 import os
10 10 import pickle
11 11 import re
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.pycompat import open
15 15 from mercurial import (
16 16 encoding,
17 17 error,
18 18 hook,
19 19 pycompat,
20 20 util,
21 21 )
22 22 from mercurial.utils import (
23 23 dateutil,
24 24 procutil,
25 25 stringutil,
26 26 )
27 27
28 28
29 29 class logentry:
30 30 """Class logentry has the following attributes:
31 31 .author - author name as CVS knows it
32 32 .branch - name of branch this revision is on
33 33 .branches - revision tuple of branches starting at this revision
34 34 .comment - commit message
35 35 .commitid - CVS commitid or None
36 36 .date - the commit date as a (time, tz) tuple
37 37 .dead - true if file revision is dead
38 38 .file - Name of file
39 39 .lines - a tuple (+lines, -lines) or None
40 40 .parent - Previous revision of this entry
41 41 .rcs - name of file as returned from CVS
42 42 .revision - revision number as tuple
43 43 .tags - list of tags on the file
44 44 .synthetic - is this a synthetic "file ... added on ..." revision?
45 45 .mergepoint - the branch that has been merged from (if present in
46 46 rlog output) or None
47 47 .branchpoints - the branches that start at the current entry or empty
48 48 """
49 49
50 50 def __init__(self, **entries):
51 51 self.synthetic = False
52 52 self.__dict__.update(entries)
53 53
54 54 def __repr__(self):
55 55 items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__))
56 56 return "%s(%s)" % (type(self).__name__, ", ".join(items))
57 57
58 58
59 59 class logerror(Exception):
60 60 pass
61 61
62 62
63 63 def getrepopath(cvspath):
64 64 """Return the repository path from a CVS path.
65 65
66 66 >>> getrepopath(b'/foo/bar')
67 67 '/foo/bar'
68 68 >>> getrepopath(b'c:/foo/bar')
69 69 '/foo/bar'
70 70 >>> getrepopath(b':pserver:10/foo/bar')
71 71 '/foo/bar'
72 72 >>> getrepopath(b':pserver:10c:/foo/bar')
73 73 '/foo/bar'
74 74 >>> getrepopath(b':pserver:/foo/bar')
75 75 '/foo/bar'
76 76 >>> getrepopath(b':pserver:c:/foo/bar')
77 77 '/foo/bar'
78 78 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
79 79 '/foo/bar'
80 80 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
81 81 '/foo/bar'
82 82 >>> getrepopath(b'user@server/path/to/repository')
83 83 '/path/to/repository'
84 84 """
85 85 # According to CVS manual, CVS paths are expressed like:
86 86 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
87 87 #
88 88 # CVSpath is splitted into parts and then position of the first occurrence
89 89 # of the '/' char after the '@' is located. The solution is the rest of the
90 90 # string after that '/' sign including it
91 91
92 92 parts = cvspath.split(b':')
93 93 atposition = parts[-1].find(b'@')
94 94 start = 0
95 95
96 96 if atposition != -1:
97 97 start = atposition
98 98
99 99 repopath = parts[-1][parts[-1].find(b'/', start) :]
100 100 return repopath
101 101
102 102
103 103 def createlog(ui, directory=None, root=b"", rlog=True, cache=None):
104 104 '''Collect the CVS rlog'''
105 105
106 106 # Because we store many duplicate commit log messages, reusing strings
107 107 # saves a lot of memory and pickle storage space.
108 108 _scache = {}
109 109
110 110 def scache(s):
111 111 """return a shared version of a string"""
112 112 return _scache.setdefault(s, s)
113 113
114 114 ui.status(_(b'collecting CVS rlog\n'))
115 115
116 116 log = [] # list of logentry objects containing the CVS state
117 117
118 118 # patterns to match in CVS (r)log output, by state of use
119 119 re_00 = re.compile(b'RCS file: (.+)$')
120 120 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
121 121 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
122 122 re_03 = re.compile(
123 123 b"(Cannot access.+CVSROOT)|(can't create temporary directory.+)$"
124 124 )
125 125 re_10 = re.compile(b'Working file: (.+)$')
126 126 re_20 = re.compile(b'symbolic names:')
127 127 re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
128 128 re_31 = re.compile(b'----------------------------$')
129 129 re_32 = re.compile(
130 130 b'======================================='
131 131 b'======================================$'
132 132 )
133 133 re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
134 134 re_60 = re.compile(
135 135 br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
136 136 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
137 137 br'(\s+commitid:\s+([^;]+);)?'
138 138 br'(.*mergepoint:\s+([^;]+);)?'
139 139 )
140 140 re_70 = re.compile(b'branches: (.+);$')
141 141
142 142 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
143 143
144 144 prefix = b'' # leading path to strip of what we get from CVS
145 145
146 146 if directory is None:
147 147 # Current working directory
148 148
149 149 # Get the real directory in the repository
150 150 try:
151 151 with open(os.path.join(b'CVS', b'Repository'), b'rb') as f:
152 152 prefix = f.read().strip()
153 153 directory = prefix
154 154 if prefix == b".":
155 155 prefix = b""
156 156 except IOError:
157 157 raise logerror(_(b'not a CVS sandbox'))
158 158
159 159 if prefix and not prefix.endswith(pycompat.ossep):
160 160 prefix += pycompat.ossep
161 161
162 162 # Use the Root file in the sandbox, if it exists
163 163 try:
164 164 root = open(os.path.join(b'CVS', b'Root'), b'rb').read().strip()
165 165 except IOError:
166 166 pass
167 167
168 168 if not root:
169 169 root = encoding.environ.get(b'CVSROOT', b'')
170 170
171 171 # read log cache if one exists
172 172 oldlog = []
173 173 date = None
174 174
175 175 if cache:
176 176 cachedir = os.path.expanduser(b'~/.hg.cvsps')
177 177 if not os.path.exists(cachedir):
178 178 os.mkdir(cachedir)
179 179
180 180 # The cvsps cache pickle needs a uniquified name, based on the
181 181 # repository location. The address may have all sort of nasties
182 182 # in it, slashes, colons and such. So here we take just the
183 183 # alphanumeric characters, concatenated in a way that does not
184 184 # mix up the various components, so that
185 185 # :pserver:user@server:/path
186 186 # and
187 187 # /pserver/user/server/path
188 188 # are mapped to different cache file names.
189 189 cachefile = root.split(b":") + [directory, b"cache"]
190 190 cachefile = [b'-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
191 191 cachefile = os.path.join(
192 192 cachedir, b'.'.join([s for s in cachefile if s])
193 193 )
194 194
195 195 if cache == b'update':
196 196 try:
197 197 ui.note(_(b'reading cvs log cache %s\n') % cachefile)
198 198 oldlog = pickle.load(open(cachefile, b'rb'))
199 199 for e in oldlog:
200 200 if not (
201 util.safehasattr(e, b'branchpoints')
202 and util.safehasattr(e, b'commitid')
203 and util.safehasattr(e, b'mergepoint')
201 hasattr(e, b'branchpoints')
202 and hasattr(e, b'commitid')
203 and hasattr(e, b'mergepoint')
204 204 ):
205 205 ui.status(_(b'ignoring old cache\n'))
206 206 oldlog = []
207 207 break
208 208
209 209 ui.note(_(b'cache has %d log entries\n') % len(oldlog))
210 210 except Exception as e:
211 211 ui.note(_(b'error reading cache: %r\n') % e)
212 212
213 213 if oldlog:
214 214 date = oldlog[-1].date # last commit date as a (time,tz) tuple
215 215 date = dateutil.datestr(date, b'%Y/%m/%d %H:%M:%S %1%2')
216 216
217 217 # build the CVS commandline
218 218 cmd = [b'cvs', b'-q']
219 219 if root:
220 220 cmd.append(b'-d%s' % root)
221 221 p = util.normpath(getrepopath(root))
222 222 if not p.endswith(b'/'):
223 223 p += b'/'
224 224 if prefix:
225 225 # looks like normpath replaces "" by "."
226 226 prefix = p + util.normpath(prefix)
227 227 else:
228 228 prefix = p
229 229 cmd.append([b'log', b'rlog'][rlog])
230 230 if date:
231 231 # no space between option and date string
232 232 cmd.append(b'-d>%s' % date)
233 233 cmd.append(directory)
234 234
235 235 # state machine begins here
236 236 tags = {} # dictionary of revisions on current file with their tags
237 237 branchmap = {} # mapping between branch names and revision numbers
238 238 rcsmap = {}
239 239 state = 0
240 240 store = False # set when a new record can be appended
241 241
242 242 cmd = [procutil.shellquote(arg) for arg in cmd]
243 243 ui.note(_(b"running %s\n") % (b' '.join(cmd)))
244 244 ui.debug(b"prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
245 245
246 246 pfp = procutil.popen(b' '.join(cmd), b'rb')
247 247 peek = util.fromnativeeol(pfp.readline())
248 248 while True:
249 249 line = peek
250 250 if line == b'':
251 251 break
252 252 peek = util.fromnativeeol(pfp.readline())
253 253 if line.endswith(b'\n'):
254 254 line = line[:-1]
255 255 # ui.debug('state=%d line=%r\n' % (state, line))
256 256
257 257 if state == 0:
258 258 # initial state, consume input until we see 'RCS file'
259 259 match = re_00.match(line)
260 260 if match:
261 261 rcs = match.group(1)
262 262 tags = {}
263 263 if rlog:
264 264 filename = util.normpath(rcs[:-2])
265 265 if filename.startswith(prefix):
266 266 filename = filename[len(prefix) :]
267 267 if filename.startswith(b'/'):
268 268 filename = filename[1:]
269 269 if filename.startswith(b'Attic/'):
270 270 filename = filename[6:]
271 271 else:
272 272 filename = filename.replace(b'/Attic/', b'/')
273 273 state = 2
274 274 continue
275 275 state = 1
276 276 continue
277 277 match = re_01.match(line)
278 278 if match:
279 279 raise logerror(match.group(1))
280 280 match = re_02.match(line)
281 281 if match:
282 282 raise logerror(match.group(2))
283 283 if re_03.match(line):
284 284 raise logerror(line)
285 285
286 286 elif state == 1:
287 287 # expect 'Working file' (only when using log instead of rlog)
288 288 match = re_10.match(line)
289 289 assert match, _(b'RCS file must be followed by working file')
290 290 filename = util.normpath(match.group(1))
291 291 state = 2
292 292
293 293 elif state == 2:
294 294 # expect 'symbolic names'
295 295 if re_20.match(line):
296 296 branchmap = {}
297 297 state = 3
298 298
299 299 elif state == 3:
300 300 # read the symbolic names and store as tags
301 301 match = re_30.match(line)
302 302 if match:
303 303 rev = [int(x) for x in match.group(2).split(b'.')]
304 304
305 305 # Convert magic branch number to an odd-numbered one
306 306 revn = len(rev)
307 307 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
308 308 rev = rev[:-2] + rev[-1:]
309 309 rev = tuple(rev)
310 310
311 311 if rev not in tags:
312 312 tags[rev] = []
313 313 tags[rev].append(match.group(1))
314 314 branchmap[match.group(1)] = match.group(2)
315 315
316 316 elif re_31.match(line):
317 317 state = 5
318 318 elif re_32.match(line):
319 319 state = 0
320 320
321 321 elif state == 4:
322 322 # expecting '------' separator before first revision
323 323 if re_31.match(line):
324 324 state = 5
325 325 else:
326 326 assert not re_32.match(line), _(
327 327 b'must have at least some revisions'
328 328 )
329 329
330 330 elif state == 5:
331 331 # expecting revision number and possibly (ignored) lock indication
332 332 # we create the logentry here from values stored in states 0 to 4,
333 333 # as this state is re-entered for subsequent revisions of a file.
334 334 match = re_50.match(line)
335 335 assert match, _(b'expected revision number')
336 336 e = logentry(
337 337 rcs=scache(rcs),
338 338 file=scache(filename),
339 339 revision=tuple([int(x) for x in match.group(1).split(b'.')]),
340 340 branches=[],
341 341 parent=None,
342 342 commitid=None,
343 343 mergepoint=None,
344 344 branchpoints=set(),
345 345 )
346 346
347 347 state = 6
348 348
349 349 elif state == 6:
350 350 # expecting date, author, state, lines changed
351 351 match = re_60.match(line)
352 352 assert match, _(b'revision must be followed by date line')
353 353 d = match.group(1)
354 354 if d[2] == b'/':
355 355 # Y2K
356 356 d = b'19' + d
357 357
358 358 if len(d.split()) != 3:
359 359 # cvs log dates always in GMT
360 360 d = d + b' UTC'
361 361 e.date = dateutil.parsedate(
362 362 d,
363 363 [
364 364 b'%y/%m/%d %H:%M:%S',
365 365 b'%Y/%m/%d %H:%M:%S',
366 366 b'%Y-%m-%d %H:%M:%S',
367 367 ],
368 368 )
369 369 e.author = scache(match.group(2))
370 370 e.dead = match.group(3).lower() == b'dead'
371 371
372 372 if match.group(5):
373 373 if match.group(6):
374 374 e.lines = (int(match.group(5)), int(match.group(6)))
375 375 else:
376 376 e.lines = (int(match.group(5)), 0)
377 377 elif match.group(6):
378 378 e.lines = (0, int(match.group(6)))
379 379 else:
380 380 e.lines = None
381 381
382 382 if match.group(7): # cvs 1.12 commitid
383 383 e.commitid = match.group(8)
384 384
385 385 if match.group(9): # cvsnt mergepoint
386 386 myrev = match.group(10).split(b'.')
387 387 if len(myrev) == 2: # head
388 388 e.mergepoint = b'HEAD'
389 389 else:
390 390 myrev = b'.'.join(myrev[:-2] + [b'0', myrev[-2]])
391 391 branches = [b for b in branchmap if branchmap[b] == myrev]
392 392 assert len(branches) == 1, (
393 393 b'unknown branch: %s' % e.mergepoint
394 394 )
395 395 e.mergepoint = branches[0]
396 396
397 397 e.comment = []
398 398 state = 7
399 399
400 400 elif state == 7:
401 401 # read the revision numbers of branches that start at this revision
402 402 # or store the commit log message otherwise
403 403 m = re_70.match(line)
404 404 if m:
405 405 e.branches = [
406 406 tuple([int(y) for y in x.strip().split(b'.')])
407 407 for x in m.group(1).split(b';')
408 408 ]
409 409 state = 8
410 410 elif re_31.match(line) and re_50.match(peek):
411 411 state = 5
412 412 store = True
413 413 elif re_32.match(line):
414 414 state = 0
415 415 store = True
416 416 else:
417 417 e.comment.append(line)
418 418
419 419 elif state == 8:
420 420 # store commit log message
421 421 if re_31.match(line):
422 422 cpeek = peek
423 423 if cpeek.endswith(b'\n'):
424 424 cpeek = cpeek[:-1]
425 425 if re_50.match(cpeek):
426 426 state = 5
427 427 store = True
428 428 else:
429 429 e.comment.append(line)
430 430 elif re_32.match(line):
431 431 state = 0
432 432 store = True
433 433 else:
434 434 e.comment.append(line)
435 435
436 436 # When a file is added on a branch B1, CVS creates a synthetic
437 437 # dead trunk revision 1.1 so that the branch has a root.
438 438 # Likewise, if you merge such a file to a later branch B2 (one
439 439 # that already existed when the file was added on B1), CVS
440 440 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
441 441 # these revisions now, but mark them synthetic so
442 442 # createchangeset() can take care of them.
443 443 if (
444 444 store
445 445 and e.dead
446 446 and e.revision[-1] == 1
447 447 and len(e.comment) == 1 # 1.1 or 1.1.x.1
448 448 and file_added_re.match(e.comment[0])
449 449 ):
450 450 ui.debug(
451 451 b'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0])
452 452 )
453 453 e.synthetic = True
454 454
455 455 if store:
456 456 # clean up the results and save in the log.
457 457 store = False
458 458 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
459 459 e.comment = scache(b'\n'.join(e.comment))
460 460
461 461 revn = len(e.revision)
462 462 if revn > 3 and (revn % 2) == 0:
463 463 e.branch = tags.get(e.revision[:-1], [None])[0]
464 464 else:
465 465 e.branch = None
466 466
467 467 # find the branches starting from this revision
468 468 branchpoints = set()
469 469 for branch, revision in branchmap.items():
470 470 revparts = tuple([int(i) for i in revision.split(b'.')])
471 471 if len(revparts) < 2: # bad tags
472 472 continue
473 473 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
474 474 # normal branch
475 475 if revparts[:-2] == e.revision:
476 476 branchpoints.add(branch)
477 477 elif revparts == (1, 1, 1): # vendor branch
478 478 if revparts in e.branches:
479 479 branchpoints.add(branch)
480 480 e.branchpoints = branchpoints
481 481
482 482 log.append(e)
483 483
484 484 rcsmap[e.rcs.replace(b'/Attic/', b'/')] = e.rcs
485 485
486 486 if len(log) % 100 == 0:
487 487 ui.status(
488 488 stringutil.ellipsis(b'%d %s' % (len(log), e.file), 80)
489 489 + b'\n'
490 490 )
491 491
492 492 log.sort(key=lambda x: (x.rcs, x.revision))
493 493
494 494 # find parent revisions of individual files
495 495 versions = {}
496 496 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
497 497 rcs = e.rcs.replace(b'/Attic/', b'/')
498 498 if rcs in rcsmap:
499 499 e.rcs = rcsmap[rcs]
500 500 branch = e.revision[:-1]
501 501 versions[(e.rcs, branch)] = e.revision
502 502
503 503 for e in log:
504 504 branch = e.revision[:-1]
505 505 p = versions.get((e.rcs, branch), None)
506 506 if p is None:
507 507 p = e.revision[:-2]
508 508 e.parent = p
509 509 versions[(e.rcs, branch)] = e.revision
510 510
511 511 # update the log cache
512 512 if cache:
513 513 if log:
514 514 # join up the old and new logs
515 515 log.sort(key=lambda x: x.date)
516 516
517 517 if oldlog and oldlog[-1].date >= log[0].date:
518 518 raise logerror(
519 519 _(
520 520 b'log cache overlaps with new log entries,'
521 521 b' re-run without cache.'
522 522 )
523 523 )
524 524
525 525 log = oldlog + log
526 526
527 527 # write the new cachefile
528 528 ui.note(_(b'writing cvs log cache %s\n') % cachefile)
529 529 pickle.dump(log, open(cachefile, b'wb'))
530 530 else:
531 531 log = oldlog
532 532
533 533 ui.status(_(b'%d log entries\n') % len(log))
534 534
535 535 encodings = ui.configlist(b'convert', b'cvsps.logencoding')
536 536 if encodings:
537 537
538 538 def revstr(r):
539 539 # this is needed, because logentry.revision is a tuple of "int"
540 540 # (e.g. (1, 2) for "1.2")
541 541 return b'.'.join(pycompat.maplist(pycompat.bytestr, r))
542 542
543 543 for entry in log:
544 544 comment = entry.comment
545 545 for e in encodings:
546 546 try:
547 547 entry.comment = comment.decode(pycompat.sysstr(e)).encode(
548 548 'utf-8'
549 549 )
550 550 if ui.debugflag:
551 551 ui.debug(
552 552 b"transcoding by %s: %s of %s\n"
553 553 % (e, revstr(entry.revision), entry.file)
554 554 )
555 555 break
556 556 except UnicodeDecodeError:
557 557 pass # try next encoding
558 558 except LookupError as inst: # unknown encoding, maybe
559 559 raise error.Abort(
560 560 pycompat.bytestr(inst),
561 561 hint=_(
562 562 b'check convert.cvsps.logencoding configuration'
563 563 ),
564 564 )
565 565 else:
566 566 raise error.Abort(
567 567 _(
568 568 b"no encoding can transcode"
569 569 b" CVS log message for %s of %s"
570 570 )
571 571 % (revstr(entry.revision), entry.file),
572 572 hint=_(b'check convert.cvsps.logencoding configuration'),
573 573 )
574 574
575 575 hook.hook(ui, None, b"cvslog", True, log=log)
576 576
577 577 return log
578 578
579 579
580 580 class changeset:
581 581 """Class changeset has the following attributes:
582 582 .id - integer identifying this changeset (list index)
583 583 .author - author name as CVS knows it
584 584 .branch - name of branch this changeset is on, or None
585 585 .comment - commit message
586 586 .commitid - CVS commitid or None
587 587 .date - the commit date as a (time,tz) tuple
588 588 .entries - list of logentry objects in this changeset
589 589 .parents - list of one or two parent changesets
590 590 .tags - list of tags on this changeset
591 591 .synthetic - from synthetic revision "file ... added on branch ..."
592 592 .mergepoint- the branch that has been merged from or None
593 593 .branchpoints- the branches that start at the current entry or empty
594 594 """
595 595
596 596 def __init__(self, **entries):
597 597 self.id = None
598 598 self.synthetic = False
599 599 self.__dict__.update(entries)
600 600
601 601 def __repr__(self):
602 602 items = (
603 603 b"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)
604 604 )
605 605 return b"%s(%s)" % (type(self).__name__, b", ".join(items))
606 606
607 607
608 608 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
609 609 '''Convert log into changesets.'''
610 610
611 611 ui.status(_(b'creating changesets\n'))
612 612
613 613 # try to order commitids by date
614 614 mindate = {}
615 615 for e in log:
616 616 if e.commitid:
617 617 if e.commitid not in mindate:
618 618 mindate[e.commitid] = e.date
619 619 else:
620 620 mindate[e.commitid] = min(e.date, mindate[e.commitid])
621 621
622 622 # Merge changesets
623 623 log.sort(
624 624 key=lambda x: (
625 625 mindate.get(x.commitid, (-1, 0)),
626 626 x.commitid or b'',
627 627 x.comment,
628 628 x.author,
629 629 x.branch or b'',
630 630 x.date,
631 631 x.branchpoints,
632 632 )
633 633 )
634 634
635 635 changesets = []
636 636 files = set()
637 637 c = None
638 638 for i, e in enumerate(log):
639 639
640 640 # Check if log entry belongs to the current changeset or not.
641 641
642 642 # Since CVS is file-centric, two different file revisions with
643 643 # different branchpoints should be treated as belonging to two
644 644 # different changesets (and the ordering is important and not
645 645 # honoured by cvsps at this point).
646 646 #
647 647 # Consider the following case:
648 648 # foo 1.1 branchpoints: [MYBRANCH]
649 649 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
650 650 #
651 651 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
652 652 # later version of foo may be in MYBRANCH2, so foo should be the
653 653 # first changeset and bar the next and MYBRANCH and MYBRANCH2
654 654 # should both start off of the bar changeset. No provisions are
655 655 # made to ensure that this is, in fact, what happens.
656 656 if not (
657 657 c
658 658 and e.branchpoints == c.branchpoints
659 659 and ( # cvs commitids
660 660 (e.commitid is not None and e.commitid == c.commitid)
661 661 or ( # no commitids, use fuzzy commit detection
662 662 (e.commitid is None or c.commitid is None)
663 663 and e.comment == c.comment
664 664 and e.author == c.author
665 665 and e.branch == c.branch
666 666 and (
667 667 (c.date[0] + c.date[1])
668 668 <= (e.date[0] + e.date[1])
669 669 <= (c.date[0] + c.date[1]) + fuzz
670 670 )
671 671 and e.file not in files
672 672 )
673 673 )
674 674 ):
675 675 c = changeset(
676 676 comment=e.comment,
677 677 author=e.author,
678 678 branch=e.branch,
679 679 date=e.date,
680 680 entries=[],
681 681 mergepoint=e.mergepoint,
682 682 branchpoints=e.branchpoints,
683 683 commitid=e.commitid,
684 684 )
685 685 changesets.append(c)
686 686
687 687 files = set()
688 688 if len(changesets) % 100 == 0:
689 689 t = b'%d %s' % (
690 690 len(changesets),
691 691 pycompat.byterepr(e.comment)[2:-1],
692 692 )
693 693 ui.status(stringutil.ellipsis(t, 80) + b'\n')
694 694
695 695 c.entries.append(e)
696 696 files.add(e.file)
697 697 c.date = e.date # changeset date is date of latest commit in it
698 698
699 699 # Mark synthetic changesets
700 700
701 701 for c in changesets:
702 702 # Synthetic revisions always get their own changeset, because
703 703 # the log message includes the filename. E.g. if you add file3
704 704 # and file4 on a branch, you get four log entries and three
705 705 # changesets:
706 706 # "File file3 was added on branch ..." (synthetic, 1 entry)
707 707 # "File file4 was added on branch ..." (synthetic, 1 entry)
708 708 # "Add file3 and file4 to fix ..." (real, 2 entries)
709 709 # Hence the check for 1 entry here.
710 710 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
711 711
712 712 # Sort files in each changeset
713 713
714 714 def entitycompare(l, r):
715 715 """Mimic cvsps sorting order"""
716 716 l = l.file.split(b'/')
717 717 r = r.file.split(b'/')
718 718 nl = len(l)
719 719 nr = len(r)
720 720 n = min(nl, nr)
721 721 for i in range(n):
722 722 if i + 1 == nl and nl < nr:
723 723 return -1
724 724 elif i + 1 == nr and nl > nr:
725 725 return +1
726 726 elif l[i] < r[i]:
727 727 return -1
728 728 elif l[i] > r[i]:
729 729 return +1
730 730 return 0
731 731
732 732 for c in changesets:
733 733 c.entries.sort(key=functools.cmp_to_key(entitycompare))
734 734
735 735 # Sort changesets by date
736 736
737 737 odd = set()
738 738
739 739 def cscmp(l, r):
740 740 d = sum(l.date) - sum(r.date)
741 741 if d:
742 742 return d
743 743
744 744 # detect vendor branches and initial commits on a branch
745 745 le = {}
746 746 for e in l.entries:
747 747 le[e.rcs] = e.revision
748 748 re = {}
749 749 for e in r.entries:
750 750 re[e.rcs] = e.revision
751 751
752 752 d = 0
753 753 for e in l.entries:
754 754 if re.get(e.rcs, None) == e.parent:
755 755 assert not d
756 756 d = 1
757 757 break
758 758
759 759 for e in r.entries:
760 760 if le.get(e.rcs, None) == e.parent:
761 761 if d:
762 762 odd.add((l, r))
763 763 d = -1
764 764 break
765 765 # By this point, the changesets are sufficiently compared that
766 766 # we don't really care about ordering. However, this leaves
767 767 # some race conditions in the tests, so we compare on the
768 768 # number of files modified, the files contained in each
769 769 # changeset, and the branchpoints in the change to ensure test
770 770 # output remains stable.
771 771
772 772 # recommended replacement for cmp from
773 773 # https://docs.python.org/3.0/whatsnew/3.0.html
774 774 c = lambda x, y: (x > y) - (x < y)
775 775 # Sort bigger changes first.
776 776 if not d:
777 777 d = c(len(l.entries), len(r.entries))
778 778 # Try sorting by filename in the change.
779 779 if not d:
780 780 d = c([e.file for e in l.entries], [e.file for e in r.entries])
781 781 # Try and put changes without a branch point before ones with
782 782 # a branch point.
783 783 if not d:
784 784 d = c(len(l.branchpoints), len(r.branchpoints))
785 785 return d
786 786
787 787 changesets.sort(key=functools.cmp_to_key(cscmp))
788 788
789 789 # Collect tags
790 790
791 791 globaltags = {}
792 792 for c in changesets:
793 793 for e in c.entries:
794 794 for tag in e.tags:
795 795 # remember which is the latest changeset to have this tag
796 796 globaltags[tag] = c
797 797
798 798 for c in changesets:
799 799 tags = set()
800 800 for e in c.entries:
801 801 tags.update(e.tags)
802 802 # remember tags only if this is the latest changeset to have it
803 803 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
804 804
805 805 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
806 806 # by inserting dummy changesets with two parents, and handle
807 807 # {{mergefrombranch BRANCHNAME}} by setting two parents.
808 808
809 809 if mergeto is None:
810 810 mergeto = br'{{mergetobranch ([-\w]+)}}'
811 811 if mergeto:
812 812 mergeto = re.compile(mergeto)
813 813
814 814 if mergefrom is None:
815 815 mergefrom = br'{{mergefrombranch ([-\w]+)}}'
816 816 if mergefrom:
817 817 mergefrom = re.compile(mergefrom)
818 818
819 819 versions = {} # changeset index where we saw any particular file version
820 820 branches = {} # changeset index where we saw a branch
821 821 n = len(changesets)
822 822 i = 0
823 823 while i < n:
824 824 c = changesets[i]
825 825
826 826 for f in c.entries:
827 827 versions[(f.rcs, f.revision)] = i
828 828
829 829 p = None
830 830 if c.branch in branches:
831 831 p = branches[c.branch]
832 832 else:
833 833 # first changeset on a new branch
834 834 # the parent is a changeset with the branch in its
835 835 # branchpoints such that it is the latest possible
836 836 # commit without any intervening, unrelated commits.
837 837
838 838 for candidate in range(i):
839 839 if c.branch not in changesets[candidate].branchpoints:
840 840 if p is not None:
841 841 break
842 842 continue
843 843 p = candidate
844 844
845 845 c.parents = []
846 846 if p is not None:
847 847 p = changesets[p]
848 848
849 849 # Ensure no changeset has a synthetic changeset as a parent.
850 850 while p.synthetic:
851 851 assert len(p.parents) <= 1, _(
852 852 b'synthetic changeset cannot have multiple parents'
853 853 )
854 854 if p.parents:
855 855 p = p.parents[0]
856 856 else:
857 857 p = None
858 858 break
859 859
860 860 if p is not None:
861 861 c.parents.append(p)
862 862
863 863 if c.mergepoint:
864 864 if c.mergepoint == b'HEAD':
865 865 c.mergepoint = None
866 866 c.parents.append(changesets[branches[c.mergepoint]])
867 867
868 868 if mergefrom:
869 869 m = mergefrom.search(c.comment)
870 870 if m:
871 871 m = m.group(1)
872 872 if m == b'HEAD':
873 873 m = None
874 874 try:
875 875 candidate = changesets[branches[m]]
876 876 except KeyError:
877 877 ui.warn(
878 878 _(
879 879 b"warning: CVS commit message references "
880 880 b"non-existent branch %r:\n%s\n"
881 881 )
882 882 % (pycompat.bytestr(m), c.comment)
883 883 )
884 884 if m in branches and c.branch != m and not candidate.synthetic:
885 885 c.parents.append(candidate)
886 886
887 887 if mergeto:
888 888 m = mergeto.search(c.comment)
889 889 if m:
890 890 if m.groups():
891 891 m = m.group(1)
892 892 if m == b'HEAD':
893 893 m = None
894 894 else:
895 895 m = None # if no group found then merge to HEAD
896 896 if m in branches and c.branch != m:
897 897 # insert empty changeset for merge
898 898 cc = changeset(
899 899 author=c.author,
900 900 branch=m,
901 901 date=c.date,
902 902 comment=b'convert-repo: CVS merge from branch %s'
903 903 % c.branch,
904 904 entries=[],
905 905 tags=[],
906 906 parents=[changesets[branches[m]], c],
907 907 )
908 908 changesets.insert(i + 1, cc)
909 909 branches[m] = i + 1
910 910
911 911 # adjust our loop counters now we have inserted a new entry
912 912 n += 1
913 913 i += 2
914 914 continue
915 915
916 916 branches[c.branch] = i
917 917 i += 1
918 918
919 919 # Drop synthetic changesets (safe now that we have ensured no other
920 920 # changesets can have them as parents).
921 921 i = 0
922 922 while i < len(changesets):
923 923 if changesets[i].synthetic:
924 924 del changesets[i]
925 925 else:
926 926 i += 1
927 927
928 928 # Number changesets
929 929
930 930 for i, c in enumerate(changesets):
931 931 c.id = i + 1
932 932
933 933 if odd:
934 934 for l, r in odd:
935 935 if l.id is not None and r.id is not None:
936 936 ui.warn(
937 937 _(b'changeset %d is both before and after %d\n')
938 938 % (l.id, r.id)
939 939 )
940 940
941 941 ui.status(_(b'%d changeset entries\n') % len(changesets))
942 942
943 943 hook.hook(ui, None, b"cvschangesets", True, changesets=changesets)
944 944
945 945 return changesets
946 946
947 947
948 948 def debugcvsps(ui, *args, **opts):
949 949 """Read CVS rlog for current directory or named path in
950 950 repository, and convert the log to changesets based on matching
951 951 commit log entries and dates.
952 952 """
953 953 opts = pycompat.byteskwargs(opts)
954 954 if opts[b"new_cache"]:
955 955 cache = b"write"
956 956 elif opts[b"update_cache"]:
957 957 cache = b"update"
958 958 else:
959 959 cache = None
960 960
961 961 revisions = opts[b"revisions"]
962 962
963 963 try:
964 964 if args:
965 965 log = []
966 966 for d in args:
967 967 log += createlog(ui, d, root=opts[b"root"], cache=cache)
968 968 else:
969 969 log = createlog(ui, root=opts[b"root"], cache=cache)
970 970 except logerror as e:
971 971 ui.write(b"%r\n" % e)
972 972 return
973 973
974 974 changesets = createchangeset(ui, log, opts[b"fuzz"])
975 975 del log
976 976
977 977 # Print changesets (optionally filtered)
978 978
979 979 off = len(revisions)
980 980 branches = {} # latest version number in each branch
981 981 ancestors = {} # parent branch
982 982 for cs in changesets:
983 983
984 984 if opts[b"ancestors"]:
985 985 if cs.branch not in branches and cs.parents and cs.parents[0].id:
986 986 ancestors[cs.branch] = (
987 987 changesets[cs.parents[0].id - 1].branch,
988 988 cs.parents[0].id,
989 989 )
990 990 branches[cs.branch] = cs.id
991 991
992 992 # limit by branches
993 993 if (
994 994 opts[b"branches"]
995 995 and (cs.branch or b'HEAD') not in opts[b"branches"]
996 996 ):
997 997 continue
998 998
999 999 if not off:
1000 1000 # Note: trailing spaces on several lines here are needed to have
1001 1001 # bug-for-bug compatibility with cvsps.
1002 1002 ui.write(b'---------------------\n')
1003 1003 ui.write((b'PatchSet %d \n' % cs.id))
1004 1004 ui.write(
1005 1005 (
1006 1006 b'Date: %s\n'
1007 1007 % dateutil.datestr(cs.date, b'%Y/%m/%d %H:%M:%S %1%2')
1008 1008 )
1009 1009 )
1010 1010 ui.write((b'Author: %s\n' % cs.author))
1011 1011 ui.write((b'Branch: %s\n' % (cs.branch or b'HEAD')))
1012 1012 ui.write(
1013 1013 (
1014 1014 b'Tag%s: %s \n'
1015 1015 % (
1016 1016 [b'', b's'][len(cs.tags) > 1],
1017 1017 b','.join(cs.tags) or b'(none)',
1018 1018 )
1019 1019 )
1020 1020 )
1021 1021 if cs.branchpoints:
1022 1022 ui.writenoi18n(
1023 1023 b'Branchpoints: %s \n' % b', '.join(sorted(cs.branchpoints))
1024 1024 )
1025 1025 if opts[b"parents"] and cs.parents:
1026 1026 if len(cs.parents) > 1:
1027 1027 ui.write(
1028 1028 (
1029 1029 b'Parents: %s\n'
1030 1030 % (b','.join([(b"%d" % p.id) for p in cs.parents]))
1031 1031 )
1032 1032 )
1033 1033 else:
1034 1034 ui.write((b'Parent: %d\n' % cs.parents[0].id))
1035 1035
1036 1036 if opts[b"ancestors"]:
1037 1037 b = cs.branch
1038 1038 r = []
1039 1039 while b:
1040 1040 b, c = ancestors[b]
1041 1041 r.append(b'%s:%d:%d' % (b or b"HEAD", c, branches[b]))
1042 1042 if r:
1043 1043 ui.write((b'Ancestors: %s\n' % (b','.join(r))))
1044 1044
1045 1045 ui.writenoi18n(b'Log:\n')
1046 1046 ui.write(b'%s\n\n' % cs.comment)
1047 1047 ui.writenoi18n(b'Members: \n')
1048 1048 for f in cs.entries:
1049 1049 fn = f.file
1050 1050 if fn.startswith(opts[b"prefix"]):
1051 1051 fn = fn[len(opts[b"prefix"]) :]
1052 1052 ui.write(
1053 1053 b'\t%s:%s->%s%s \n'
1054 1054 % (
1055 1055 fn,
1056 1056 b'.'.join([b"%d" % x for x in f.parent]) or b'INITIAL',
1057 1057 b'.'.join([(b"%d" % x) for x in f.revision]),
1058 1058 [b'', b'(DEAD)'][f.dead],
1059 1059 )
1060 1060 )
1061 1061 ui.write(b'\n')
1062 1062
1063 1063 # have we seen the start tag?
1064 1064 if revisions and off:
1065 1065 if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags:
1066 1066 off = False
1067 1067
1068 1068 # see if we reached the end tag
1069 1069 if len(revisions) > 1 and not off:
1070 1070 if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags:
1071 1071 break
@@ -1,160 +1,159 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2007 Daniel Holth <dholth@fastmail.fm>
4 4 # This is a stripped-down version of the original bzr-svn transport.py,
5 5 # Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
6 6
7 7 # This program is free software; you can redistribute it and/or modify
8 8 # it under the terms of the GNU General Public License as published by
9 9 # the Free Software Foundation; either version 2 of the License, or
10 10 # (at your option) any later version.
11 11
12 12 # This program is distributed in the hope that it will be useful,
13 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 15 # GNU General Public License for more details.
16 16
17 17 # You should have received a copy of the GNU General Public License
18 18 # along with this program; if not, see <http://www.gnu.org/licenses/>.
19 19
20 20 # pytype: disable=import-error
21 21 import svn.client
22 22 import svn.core
23 23 import svn.ra
24 24
25 25 # pytype: enable=import-error
26 26
27 27 Pool = svn.core.Pool
28 28 SubversionException = svn.core.SubversionException
29 29
30 30 from mercurial.pycompat import getattr
31 from mercurial import util
32 31
33 32 # Some older versions of the Python bindings need to be
34 33 # explicitly initialized. But what we want to do probably
35 34 # won't work worth a darn against those libraries anyway!
36 35 svn.ra.initialize()
37 36
38 37 svn_config = None
39 38
40 39
41 40 def _create_auth_baton(pool):
42 41 """Create a Subversion authentication baton."""
43 42 import svn.client # pytype: disable=import-error
44 43
45 44 # Give the client context baton a suite of authentication
46 45 # providers.h
47 46 providers = [
48 47 svn.client.get_simple_provider(pool),
49 48 svn.client.get_username_provider(pool),
50 49 svn.client.get_ssl_client_cert_file_provider(pool),
51 50 svn.client.get_ssl_client_cert_pw_file_provider(pool),
52 51 svn.client.get_ssl_server_trust_file_provider(pool),
53 52 ]
54 53 # Platform-dependent authentication methods
55 54 getprovider = getattr(
56 55 svn.core, 'svn_auth_get_platform_specific_provider', None
57 56 )
58 57 if getprovider:
59 58 # Available in svn >= 1.6
60 59 for name in (b'gnome_keyring', b'keychain', b'kwallet', b'windows'):
61 60 for type in (b'simple', b'ssl_client_cert_pw', b'ssl_server_trust'):
62 61 p = getprovider(name, type, pool)
63 62 if p:
64 63 providers.append(p)
65 64 else:
66 if util.safehasattr(svn.client, 'get_windows_simple_provider'):
65 if hasattr(svn.client, 'get_windows_simple_provider'):
67 66 providers.append(svn.client.get_windows_simple_provider(pool))
68 67
69 68 return svn.core.svn_auth_open(providers, pool)
70 69
71 70
72 71 class NotBranchError(SubversionException):
73 72 pass
74 73
75 74
76 75 class SvnRaTransport:
77 76 """
78 77 Open an ra connection to a Subversion repository.
79 78 """
80 79
81 80 def __init__(self, url=b"", ra=None):
82 81 self.pool = Pool()
83 82 self.svn_url = url
84 83 self.username = b''
85 84 self.password = b''
86 85
87 86 # Only Subversion 1.4 has reparent()
88 if ra is None or not util.safehasattr(svn.ra, 'reparent'):
87 if ra is None or not hasattr(svn.ra, 'reparent'):
89 88 self.client = svn.client.create_context(self.pool)
90 89 ab = _create_auth_baton(self.pool)
91 90 self.client.auth_baton = ab
92 91 global svn_config
93 92 if svn_config is None:
94 93 svn_config = svn.core.svn_config_get_config(None)
95 94 self.client.config = svn_config
96 95 try:
97 96 self.ra = svn.client.open_ra_session(
98 97 self.svn_url, self.client, self.pool
99 98 )
100 99 except SubversionException as xxx_todo_changeme:
101 100 (inst, num) = xxx_todo_changeme.args
102 101 if num in (
103 102 svn.core.SVN_ERR_RA_ILLEGAL_URL,
104 103 svn.core.SVN_ERR_RA_LOCAL_REPOS_OPEN_FAILED,
105 104 svn.core.SVN_ERR_BAD_URL,
106 105 ):
107 106 raise NotBranchError(url)
108 107 raise
109 108 else:
110 109 self.ra = ra
111 110 svn.ra.reparent(self.ra, self.svn_url.encode('utf8'))
112 111
113 112 class Reporter:
114 113 def __init__(self, reporter_data):
115 114 self._reporter, self._baton = reporter_data
116 115
117 116 def set_path(self, path, revnum, start_empty, lock_token, pool=None):
118 117 svn.ra.reporter2_invoke_set_path(
119 118 self._reporter,
120 119 self._baton,
121 120 path,
122 121 revnum,
123 122 start_empty,
124 123 lock_token,
125 124 pool,
126 125 )
127 126
128 127 def delete_path(self, path, pool=None):
129 128 svn.ra.reporter2_invoke_delete_path(
130 129 self._reporter, self._baton, path, pool
131 130 )
132 131
133 132 def link_path(
134 133 self, path, url, revision, start_empty, lock_token, pool=None
135 134 ):
136 135 svn.ra.reporter2_invoke_link_path(
137 136 self._reporter,
138 137 self._baton,
139 138 path,
140 139 url,
141 140 revision,
142 141 start_empty,
143 142 lock_token,
144 143 pool,
145 144 )
146 145
147 146 def finish_report(self, pool=None):
148 147 svn.ra.reporter2_invoke_finish_report(
149 148 self._reporter, self._baton, pool
150 149 )
151 150
152 151 def abort_report(self, pool=None):
153 152 svn.ra.reporter2_invoke_abort_report(
154 153 self._reporter, self._baton, pool
155 154 )
156 155
157 156 def do_update(self, revnum, path, *args, **kwargs):
158 157 return self.Reporter(
159 158 svn.ra.do_update(self.ra, revnum, path, *args, **kwargs)
160 159 )
@@ -1,357 +1,356 b''
1 1 # Copyright 2016-present Facebook. All Rights Reserved.
2 2 #
3 3 # commands: fastannotate commands
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import os
10 10
11 11 from mercurial.i18n import _
12 12 from mercurial import (
13 13 commands,
14 14 encoding,
15 15 error,
16 16 extensions,
17 17 logcmdutil,
18 18 patch,
19 19 pycompat,
20 20 registrar,
21 21 scmutil,
22 util,
23 22 )
24 23
25 24 from . import (
26 25 context as facontext,
27 26 error as faerror,
28 27 formatter as faformatter,
29 28 )
30 29
31 30 cmdtable = {}
32 31 command = registrar.command(cmdtable)
33 32
34 33
35 34 def _matchpaths(repo, rev, pats, opts, aopts=facontext.defaultopts):
36 35 """generate paths matching given patterns"""
37 36 perfhack = repo.ui.configbool(b'fastannotate', b'perfhack')
38 37
39 38 # disable perfhack if:
40 39 # a) any walkopt is used
41 40 # b) if we treat pats as plain file names, some of them do not have
42 41 # corresponding linelog files
43 42 if perfhack:
44 43 # cwd related to reporoot
45 44 reporoot = os.path.dirname(repo.path)
46 45 reldir = os.path.relpath(encoding.getcwd(), reporoot)
47 46 if reldir == b'.':
48 47 reldir = b''
49 48 if any(opts.get(o[1]) for o in commands.walkopts): # a)
50 49 perfhack = False
51 50 else: # b)
52 51 relpats = [
53 52 os.path.relpath(p, reporoot) if os.path.isabs(p) else p
54 53 for p in pats
55 54 ]
56 55 # disable perfhack on '..' since it allows escaping from the repo
57 56 if any(
58 57 (
59 58 b'..' in f
60 59 or not os.path.isfile(
61 60 facontext.pathhelper(repo, f, aopts).linelogpath
62 61 )
63 62 )
64 63 for f in relpats
65 64 ):
66 65 perfhack = False
67 66
68 67 # perfhack: emit paths directory without checking with manifest
69 68 # this can be incorrect if the rev dos not have file.
70 69 if perfhack:
71 70 for p in relpats:
72 71 yield os.path.join(reldir, p)
73 72 else:
74 73
75 74 def bad(x, y):
76 75 raise error.Abort(b"%s: %s" % (x, y))
77 76
78 77 ctx = logcmdutil.revsingle(repo, rev)
79 78 m = scmutil.match(ctx, pats, opts, badfn=bad)
80 79 for p in ctx.walk(m):
81 80 yield p
82 81
83 82
84 83 fastannotatecommandargs = {
85 84 'options': [
86 85 (b'r', b'rev', b'.', _(b'annotate the specified revision'), _(b'REV')),
87 86 (b'u', b'user', None, _(b'list the author (long with -v)')),
88 87 (b'f', b'file', None, _(b'list the filename')),
89 88 (b'd', b'date', None, _(b'list the date (short with -q)')),
90 89 (b'n', b'number', None, _(b'list the revision number (default)')),
91 90 (b'c', b'changeset', None, _(b'list the changeset')),
92 91 (
93 92 b'l',
94 93 b'line-number',
95 94 None,
96 95 _(b'show line number at the first appearance'),
97 96 ),
98 97 (
99 98 b'e',
100 99 b'deleted',
101 100 None,
102 101 _(b'show deleted lines (slow) (EXPERIMENTAL)'),
103 102 ),
104 103 (
105 104 b'',
106 105 b'no-content',
107 106 None,
108 107 _(b'do not show file content (EXPERIMENTAL)'),
109 108 ),
110 109 (b'', b'no-follow', None, _(b"don't follow copies and renames")),
111 110 (
112 111 b'',
113 112 b'linear',
114 113 None,
115 114 _(
116 115 b'enforce linear history, ignore second parent '
117 116 b'of merges (EXPERIMENTAL)'
118 117 ),
119 118 ),
120 119 (
121 120 b'',
122 121 b'long-hash',
123 122 None,
124 123 _(b'show long changeset hash (EXPERIMENTAL)'),
125 124 ),
126 125 (
127 126 b'',
128 127 b'rebuild',
129 128 None,
130 129 _(b'rebuild cache even if it exists (EXPERIMENTAL)'),
131 130 ),
132 131 ]
133 132 + commands.diffwsopts
134 133 + commands.walkopts
135 134 + commands.formatteropts,
136 135 'synopsis': _(b'[-r REV] [-f] [-a] [-u] [-d] [-n] [-c] [-l] FILE...'),
137 136 'inferrepo': True,
138 137 }
139 138
140 139
141 140 def fastannotate(ui, repo, *pats, **opts):
142 141 """show changeset information by line for each file
143 142
144 143 List changes in files, showing the revision id responsible for each line.
145 144
146 145 This command is useful for discovering when a change was made and by whom.
147 146
148 147 By default this command prints revision numbers. If you include --file,
149 148 --user, or --date, the revision number is suppressed unless you also
150 149 include --number. The default format can also be customized by setting
151 150 fastannotate.defaultformat.
152 151
153 152 Returns 0 on success.
154 153
155 154 .. container:: verbose
156 155
157 156 This command uses an implementation different from the vanilla annotate
158 157 command, which may produce slightly different (while still reasonable)
159 158 outputs for some cases.
160 159
161 160 Unlike the vanilla anootate, fastannotate follows rename regardless of
162 161 the existence of --file.
163 162
164 163 For the best performance when running on a full repo, use -c, -l,
165 164 avoid -u, -d, -n. Use --linear and --no-content to make it even faster.
166 165
167 166 For the best performance when running on a shallow (remotefilelog)
168 167 repo, avoid --linear, --no-follow, or any diff options. As the server
169 168 won't be able to populate annotate cache when non-default options
170 169 affecting results are used.
171 170 """
172 171 if not pats:
173 172 raise error.Abort(_(b'at least one filename or pattern is required'))
174 173
175 174 # performance hack: filtered repo can be slow. unfilter by default.
176 175 if ui.configbool(b'fastannotate', b'unfilteredrepo'):
177 176 repo = repo.unfiltered()
178 177
179 178 opts = pycompat.byteskwargs(opts)
180 179
181 180 rev = opts.get(b'rev', b'.')
182 181 rebuild = opts.get(b'rebuild', False)
183 182
184 183 diffopts = patch.difffeatureopts(
185 184 ui, opts, section=b'annotate', whitespace=True
186 185 )
187 186 aopts = facontext.annotateopts(
188 187 diffopts=diffopts,
189 188 followmerge=not opts.get(b'linear', False),
190 189 followrename=not opts.get(b'no_follow', False),
191 190 )
192 191
193 192 if not any(
194 193 opts.get(s)
195 194 for s in [b'user', b'date', b'file', b'number', b'changeset']
196 195 ):
197 196 # default 'number' for compatibility. but fastannotate is more
198 197 # efficient with "changeset", "line-number" and "no-content".
199 198 for name in ui.configlist(
200 199 b'fastannotate', b'defaultformat', [b'number']
201 200 ):
202 201 opts[name] = True
203 202
204 203 ui.pager(b'fastannotate')
205 204 template = opts.get(b'template')
206 205 if template == b'json':
207 206 formatter = faformatter.jsonformatter(ui, repo, opts)
208 207 else:
209 208 formatter = faformatter.defaultformatter(ui, repo, opts)
210 209 showdeleted = opts.get(b'deleted', False)
211 210 showlines = not bool(opts.get(b'no_content'))
212 211 showpath = opts.get(b'file', False)
213 212
214 213 # find the head of the main (master) branch
215 214 master = ui.config(b'fastannotate', b'mainbranch') or rev
216 215
217 216 # paths will be used for prefetching and the real annotating
218 217 paths = list(_matchpaths(repo, rev, pats, opts, aopts))
219 218
220 219 # for client, prefetch from the server
221 if util.safehasattr(repo, 'prefetchfastannotate'):
220 if hasattr(repo, 'prefetchfastannotate'):
222 221 repo.prefetchfastannotate(paths)
223 222
224 223 for path in paths:
225 224 result = lines = existinglines = None
226 225 while True:
227 226 try:
228 227 with facontext.annotatecontext(repo, path, aopts, rebuild) as a:
229 228 result = a.annotate(
230 229 rev,
231 230 master=master,
232 231 showpath=showpath,
233 232 showlines=(showlines and not showdeleted),
234 233 )
235 234 if showdeleted:
236 235 existinglines = {(l[0], l[1]) for l in result}
237 236 result = a.annotatealllines(
238 237 rev, showpath=showpath, showlines=showlines
239 238 )
240 239 break
241 240 except (faerror.CannotReuseError, faerror.CorruptedFileError):
242 241 # happens if master moves backwards, or the file was deleted
243 242 # and readded, or renamed to an existing name, or corrupted.
244 243 if rebuild: # give up since we have tried rebuild already
245 244 raise
246 245 else: # try a second time rebuilding the cache (slow)
247 246 rebuild = True
248 247 continue
249 248
250 249 if showlines:
251 250 result, lines = result
252 251
253 252 formatter.write(result, lines, existinglines=existinglines)
254 253 formatter.end()
255 254
256 255
257 256 _newopts = set()
258 257 _knownopts = {
259 258 opt[1].replace(b'-', b'_')
260 259 for opt in (fastannotatecommandargs['options'] + commands.globalopts)
261 260 }
262 261
263 262
264 263 def _annotatewrapper(orig, ui, repo, *pats, **opts):
265 264 """used by wrapdefault"""
266 265 # we need this hack until the obsstore has 0.0 seconds perf impact
267 266 if ui.configbool(b'fastannotate', b'unfilteredrepo'):
268 267 repo = repo.unfiltered()
269 268
270 269 # treat the file as text (skip the isbinary check)
271 270 if ui.configbool(b'fastannotate', b'forcetext'):
272 271 opts['text'] = True
273 272
274 273 # check if we need to do prefetch (client-side)
275 274 rev = opts.get('rev')
276 if util.safehasattr(repo, 'prefetchfastannotate') and rev is not None:
275 if hasattr(repo, 'prefetchfastannotate') and rev is not None:
277 276 paths = list(_matchpaths(repo, rev, pats, pycompat.byteskwargs(opts)))
278 277 repo.prefetchfastannotate(paths)
279 278
280 279 return orig(ui, repo, *pats, **opts)
281 280
282 281
283 282 def registercommand():
284 283 """register the fastannotate command"""
285 284 name = b'fastannotate|fastblame|fa'
286 285 command(name, helpbasic=True, **fastannotatecommandargs)(fastannotate)
287 286
288 287
289 288 def wrapdefault():
290 289 """wrap the default annotate command, to be aware of the protocol"""
291 290 extensions.wrapcommand(commands.table, b'annotate', _annotatewrapper)
292 291
293 292
294 293 @command(
295 294 b'debugbuildannotatecache',
296 295 [(b'r', b'rev', b'', _(b'build up to the specific revision'), _(b'REV'))]
297 296 + commands.walkopts,
298 297 _(b'[-r REV] FILE...'),
299 298 )
300 299 def debugbuildannotatecache(ui, repo, *pats, **opts):
301 300 """incrementally build fastannotate cache up to REV for specified files
302 301
303 302 If REV is not specified, use the config 'fastannotate.mainbranch'.
304 303
305 304 If fastannotate.client is True, download the annotate cache from the
306 305 server. Otherwise, build the annotate cache locally.
307 306
308 307 The annotate cache will be built using the default diff and follow
309 308 options and lives in '.hg/fastannotate/default'.
310 309 """
311 310 opts = pycompat.byteskwargs(opts)
312 311 rev = opts.get(b'REV') or ui.config(b'fastannotate', b'mainbranch')
313 312 if not rev:
314 313 raise error.Abort(
315 314 _(b'you need to provide a revision'),
316 315 hint=_(b'set fastannotate.mainbranch or use --rev'),
317 316 )
318 317 if ui.configbool(b'fastannotate', b'unfilteredrepo'):
319 318 repo = repo.unfiltered()
320 319 ctx = logcmdutil.revsingle(repo, rev)
321 320 m = scmutil.match(ctx, pats, opts)
322 321 paths = list(ctx.walk(m))
323 if util.safehasattr(repo, 'prefetchfastannotate'):
322 if hasattr(repo, 'prefetchfastannotate'):
324 323 # client
325 324 if opts.get(b'REV'):
326 325 raise error.Abort(_(b'--rev cannot be used for client'))
327 326 repo.prefetchfastannotate(paths)
328 327 else:
329 328 # server, or full repo
330 329 progress = ui.makeprogress(_(b'building'), total=len(paths))
331 330 for i, path in enumerate(paths):
332 331 progress.update(i)
333 332 with facontext.annotatecontext(repo, path) as actx:
334 333 try:
335 334 if actx.isuptodate(rev):
336 335 continue
337 336 actx.annotate(rev, rev)
338 337 except (faerror.CannotReuseError, faerror.CorruptedFileError):
339 338 # the cache is broken (could happen with renaming so the
340 339 # file history gets invalidated). rebuild and try again.
341 340 ui.debug(
342 341 b'fastannotate: %s: rebuilding broken cache\n' % path
343 342 )
344 343 actx.rebuild()
345 344 try:
346 345 actx.annotate(rev, rev)
347 346 except Exception as ex:
348 347 # possibly a bug, but should not stop us from building
349 348 # cache for other files.
350 349 ui.warn(
351 350 _(
352 351 b'fastannotate: %s: failed to '
353 352 b'build cache: %r\n'
354 353 )
355 354 % (path, ex)
356 355 )
357 356 progress.complete()
@@ -1,860 +1,860 b''
1 1 # Copyright 2016-present Facebook. All Rights Reserved.
2 2 #
3 3 # context: context needed to annotate a file
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import collections
10 10 import contextlib
11 11 import os
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.pycompat import (
15 15 getattr,
16 16 open,
17 17 setattr,
18 18 )
19 19 from mercurial.node import (
20 20 bin,
21 21 hex,
22 22 short,
23 23 )
24 24 from mercurial import (
25 25 error,
26 26 linelog as linelogmod,
27 27 lock as lockmod,
28 28 mdiff,
29 29 pycompat,
30 30 scmutil,
31 31 util,
32 32 )
33 33 from mercurial.utils import (
34 34 hashutil,
35 35 stringutil,
36 36 )
37 37
38 38 from . import (
39 39 error as faerror,
40 40 revmap as revmapmod,
41 41 )
42 42
43 43 # given path, get filelog, cached
44 44 @util.lrucachefunc
45 45 def _getflog(repo, path):
46 46 return repo.file(path)
47 47
48 48
49 49 # extracted from mercurial.context.basefilectx.annotate
50 50 def _parents(f, follow=True):
51 51 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
52 52 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
53 53 # from the topmost introrev (= srcrev) down to p.linkrev() if it
54 54 # isn't an ancestor of the srcrev.
55 55 f._changeid
56 56 pl = f.parents()
57 57
58 58 # Don't return renamed parents if we aren't following.
59 59 if not follow:
60 60 pl = [p for p in pl if p.path() == f.path()]
61 61
62 62 # renamed filectx won't have a filelog yet, so set it
63 63 # from the cache to save time
64 64 for p in pl:
65 65 if not '_filelog' in p.__dict__:
66 66 p._filelog = _getflog(f._repo, p.path())
67 67
68 68 return pl
69 69
70 70
71 71 # extracted from mercurial.context.basefilectx.annotate. slightly modified
72 72 # so it takes a fctx instead of a pair of text and fctx.
73 73 def _decorate(fctx):
74 74 text = fctx.data()
75 75 linecount = text.count(b'\n')
76 76 if text and not text.endswith(b'\n'):
77 77 linecount += 1
78 78 return ([(fctx, i) for i in range(linecount)], text)
79 79
80 80
81 81 # extracted from mercurial.context.basefilectx.annotate. slightly modified
82 82 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
83 83 # calculating diff here.
84 84 def _pair(parent, child, blocks):
85 85 for (a1, a2, b1, b2), t in blocks:
86 86 # Changed blocks ('!') or blocks made only of blank lines ('~')
87 87 # belong to the child.
88 88 if t == b'=':
89 89 child[0][b1:b2] = parent[0][a1:a2]
90 90 return child
91 91
92 92
93 93 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
94 94 # could be reused
95 95 _revsingle = util.lrucachefunc(scmutil.revsingle)
96 96
97 97
98 98 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
99 99 """(repo, str, str) -> fctx
100 100
101 101 get the filectx object from repo, rev, path, in an efficient way.
102 102
103 103 if resolverev is True, "rev" is a revision specified by the revset
104 104 language, otherwise "rev" is a nodeid, or a revision number that can
105 105 be consumed by repo.__getitem__.
106 106
107 107 if adjustctx is not None, the returned fctx will point to a changeset
108 108 that introduces the change (last modified the file). if adjustctx
109 109 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
110 110 faster for big repos but is incorrect for some cases.
111 111 """
112 112 if resolverev and not isinstance(rev, int) and rev is not None:
113 113 ctx = _revsingle(repo, rev)
114 114 else:
115 115 ctx = repo[rev]
116 116
117 117 # If we don't need to adjust the linkrev, create the filectx using the
118 118 # changectx instead of using ctx[path]. This means it already has the
119 119 # changectx information, so blame -u will be able to look directly at the
120 120 # commitctx object instead of having to resolve it by going through the
121 121 # manifest. In a lazy-manifest world this can prevent us from downloading a
122 122 # lot of data.
123 123 if adjustctx is None:
124 124 # ctx.rev() is None means it's the working copy, which is a special
125 125 # case.
126 126 if ctx.rev() is None:
127 127 fctx = ctx[path]
128 128 else:
129 129 fctx = repo.filectx(path, changeid=ctx.rev())
130 130 else:
131 131 fctx = ctx[path]
132 132 if adjustctx == b'linkrev':
133 133 introrev = fctx.linkrev()
134 134 else:
135 135 introrev = fctx.introrev()
136 136 if introrev != ctx.rev():
137 137 fctx._changeid = introrev
138 138 fctx._changectx = repo[introrev]
139 139 return fctx
140 140
141 141
142 142 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
143 143 def encodedir(path):
144 144 return (
145 145 path.replace(b'.hg/', b'.hg.hg/')
146 146 .replace(b'.l/', b'.l.hg/')
147 147 .replace(b'.m/', b'.m.hg/')
148 148 .replace(b'.lock/', b'.lock.hg/')
149 149 )
150 150
151 151
152 152 def hashdiffopts(diffopts):
153 153 diffoptstr = stringutil.pprint(
154 154 sorted(
155 155 (k, getattr(diffopts, pycompat.sysstr(k)))
156 156 for k in mdiff.diffopts.defaults
157 157 )
158 158 )
159 159 return hex(hashutil.sha1(diffoptstr).digest())[:6]
160 160
161 161
162 162 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
163 163
164 164
165 165 class annotateopts:
166 166 """like mercurial.mdiff.diffopts, but is for annotate
167 167
168 168 followrename: follow renames, like "hg annotate -f"
169 169 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
170 170 """
171 171
172 172 defaults = {
173 173 'diffopts': None,
174 174 'followrename': True,
175 175 'followmerge': True,
176 176 }
177 177
178 178 def __init__(self, **opts):
179 179 for k, v in self.defaults.items():
180 180 setattr(self, k, opts.get(k, v))
181 181
182 182 @util.propertycache
183 183 def shortstr(self):
184 184 """represent opts in a short string, suitable for a directory name"""
185 185 result = b''
186 186 if not self.followrename:
187 187 result += b'r0'
188 188 if not self.followmerge:
189 189 result += b'm0'
190 190 if self.diffopts is not None:
191 191 assert isinstance(self.diffopts, mdiff.diffopts)
192 192 diffopthash = hashdiffopts(self.diffopts)
193 193 if diffopthash != _defaultdiffopthash:
194 194 result += b'i' + diffopthash
195 195 return result or b'default'
196 196
197 197
198 198 defaultopts = annotateopts()
199 199
200 200
201 201 class _annotatecontext:
202 202 """do not use this class directly as it does not use lock to protect
203 203 writes. use "with annotatecontext(...)" instead.
204 204 """
205 205
206 206 def __init__(self, repo, path, linelogpath, revmappath, opts):
207 207 self.repo = repo
208 208 self.ui = repo.ui
209 209 self.path = path
210 210 self.opts = opts
211 211 self.linelogpath = linelogpath
212 212 self.revmappath = revmappath
213 213 self._linelog = None
214 214 self._revmap = None
215 215 self._node2path = {} # {str: str}
216 216
217 217 @property
218 218 def linelog(self):
219 219 if self._linelog is None:
220 220 if os.path.exists(self.linelogpath):
221 221 with open(self.linelogpath, b'rb') as f:
222 222 try:
223 223 self._linelog = linelogmod.linelog.fromdata(f.read())
224 224 except linelogmod.LineLogError:
225 225 self._linelog = linelogmod.linelog()
226 226 else:
227 227 self._linelog = linelogmod.linelog()
228 228 return self._linelog
229 229
230 230 @property
231 231 def revmap(self):
232 232 if self._revmap is None:
233 233 self._revmap = revmapmod.revmap(self.revmappath)
234 234 return self._revmap
235 235
236 236 def close(self):
237 237 if self._revmap is not None:
238 238 self._revmap.flush()
239 239 self._revmap = None
240 240 if self._linelog is not None:
241 241 with open(self.linelogpath, b'wb') as f:
242 242 f.write(self._linelog.encode())
243 243 self._linelog = None
244 244
245 245 __del__ = close
246 246
247 247 def rebuild(self):
248 248 """delete linelog and revmap, useful for rebuilding"""
249 249 self.close()
250 250 self._node2path.clear()
251 251 _unlinkpaths([self.revmappath, self.linelogpath])
252 252
253 253 @property
254 254 def lastnode(self):
255 255 """return last node in revmap, or None if revmap is empty"""
256 256 if self._revmap is None:
257 257 # fast path, read revmap without loading its full content
258 258 return revmapmod.getlastnode(self.revmappath)
259 259 else:
260 260 return self._revmap.rev2hsh(self._revmap.maxrev)
261 261
262 262 def isuptodate(self, master, strict=True):
263 263 """return True if the revmap / linelog is up-to-date, or the file
264 264 does not exist in the master revision. False otherwise.
265 265
266 266 it tries to be fast and could return false negatives, because of the
267 267 use of linkrev instead of introrev.
268 268
269 269 useful for both server and client to decide whether to update
270 270 fastannotate cache or not.
271 271
272 272 if strict is True, even if fctx exists in the revmap, but is not the
273 273 last node, isuptodate will return False. it's good for performance - no
274 274 expensive check was done.
275 275
276 276 if strict is False, if fctx exists in the revmap, this function may
277 277 return True. this is useful for the client to skip downloading the
278 278 cache if the client's master is behind the server's.
279 279 """
280 280 lastnode = self.lastnode
281 281 try:
282 282 f = self._resolvefctx(master, resolverev=True)
283 283 # choose linkrev instead of introrev as the check is meant to be
284 284 # *fast*.
285 285 linknode = self.repo.changelog.node(f.linkrev())
286 286 if not strict and lastnode and linknode != lastnode:
287 287 # check if f.node() is in the revmap. note: this loads the
288 288 # revmap and can be slow.
289 289 return self.revmap.hsh2rev(linknode) is not None
290 290 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
291 291 # false negatives are acceptable in this case.
292 292 return linknode == lastnode
293 293 except LookupError:
294 294 # master does not have the file, or the revmap is ahead
295 295 return True
296 296
297 297 def annotate(self, rev, master=None, showpath=False, showlines=False):
298 298 """incrementally update the cache so it includes revisions in the main
299 299 branch till 'master'. and run annotate on 'rev', which may or may not be
300 300 included in the main branch.
301 301
302 302 if master is None, do not update linelog.
303 303
304 304 the first value returned is the annotate result, it is [(node, linenum)]
305 305 by default. [(node, linenum, path)] if showpath is True.
306 306
307 307 if showlines is True, a second value will be returned, it is a list of
308 308 corresponding line contents.
309 309 """
310 310
311 311 # the fast path test requires commit hash, convert rev number to hash,
312 312 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
313 313 # command could give us a revision number even if the user passes a
314 314 # commit hash.
315 315 if isinstance(rev, int):
316 316 rev = hex(self.repo.changelog.node(rev))
317 317
318 318 # fast path: if rev is in the main branch already
319 319 directly, revfctx = self.canannotatedirectly(rev)
320 320 if directly:
321 321 if self.ui.debugflag:
322 322 self.ui.debug(
323 323 b'fastannotate: %s: using fast path '
324 324 b'(resolved fctx: %s)\n'
325 325 % (
326 326 self.path,
327 stringutil.pprint(util.safehasattr(revfctx, 'node')),
327 stringutil.pprint(hasattr(revfctx, 'node')),
328 328 )
329 329 )
330 330 return self.annotatedirectly(revfctx, showpath, showlines)
331 331
332 332 # resolve master
333 333 masterfctx = None
334 334 if master:
335 335 try:
336 336 masterfctx = self._resolvefctx(
337 337 master, resolverev=True, adjustctx=True
338 338 )
339 339 except LookupError: # master does not have the file
340 340 pass
341 341 else:
342 342 if masterfctx in self.revmap: # no need to update linelog
343 343 masterfctx = None
344 344
345 345 # ... - @ <- rev (can be an arbitrary changeset,
346 346 # / not necessarily a descendant
347 347 # master -> o of master)
348 348 # |
349 349 # a merge -> o 'o': new changesets in the main branch
350 350 # |\ '#': revisions in the main branch that
351 351 # o * exist in linelog / revmap
352 352 # | . '*': changesets in side branches, or
353 353 # last master -> # . descendants of master
354 354 # | .
355 355 # # * joint: '#', and is a parent of a '*'
356 356 # |/
357 357 # a joint -> # ^^^^ --- side branches
358 358 # |
359 359 # ^ --- main branch (in linelog)
360 360
361 361 # these DFSes are similar to the traditional annotate algorithm.
362 362 # we cannot really reuse the code for perf reason.
363 363
364 364 # 1st DFS calculates merges, joint points, and needed.
365 365 # "needed" is a simple reference counting dict to free items in
366 366 # "hist", reducing its memory usage otherwise could be huge.
367 367 initvisit = [revfctx]
368 368 if masterfctx:
369 369 if masterfctx.rev() is None:
370 370 raise error.Abort(
371 371 _(b'cannot update linelog to wdir()'),
372 372 hint=_(b'set fastannotate.mainbranch'),
373 373 )
374 374 initvisit.append(masterfctx)
375 375 visit = initvisit[:]
376 376 pcache = {}
377 377 needed = {revfctx: 1}
378 378 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
379 379 while visit:
380 380 f = visit.pop()
381 381 if f in pcache or f in hist:
382 382 continue
383 383 if f in self.revmap: # in the old main branch, it's a joint
384 384 llrev = self.revmap.hsh2rev(f.node())
385 385 self.linelog.annotate(llrev)
386 386 result = self.linelog.annotateresult
387 387 hist[f] = (result, f.data())
388 388 continue
389 389 pl = self._parentfunc(f)
390 390 pcache[f] = pl
391 391 for p in pl:
392 392 needed[p] = needed.get(p, 0) + 1
393 393 if p not in pcache:
394 394 visit.append(p)
395 395
396 396 # 2nd (simple) DFS calculates new changesets in the main branch
397 397 # ('o' nodes in # the above graph), so we know when to update linelog.
398 398 newmainbranch = set()
399 399 f = masterfctx
400 400 while f and f not in self.revmap:
401 401 newmainbranch.add(f)
402 402 pl = pcache[f]
403 403 if pl:
404 404 f = pl[0]
405 405 else:
406 406 f = None
407 407 break
408 408
409 409 # f, if present, is the position where the last build stopped at, and
410 410 # should be the "master" last time. check to see if we can continue
411 411 # building the linelog incrementally. (we cannot if diverged)
412 412 if masterfctx is not None:
413 413 self._checklastmasterhead(f)
414 414
415 415 if self.ui.debugflag:
416 416 if newmainbranch:
417 417 self.ui.debug(
418 418 b'fastannotate: %s: %d new changesets in the main'
419 419 b' branch\n' % (self.path, len(newmainbranch))
420 420 )
421 421 elif not hist: # no joints, no updates
422 422 self.ui.debug(
423 423 b'fastannotate: %s: linelog cannot help in '
424 424 b'annotating this revision\n' % self.path
425 425 )
426 426
427 427 # prepare annotateresult so we can update linelog incrementally
428 428 self.linelog.annotate(self.linelog.maxrev)
429 429
430 430 # 3rd DFS does the actual annotate
431 431 visit = initvisit[:]
432 432 progress = self.ui.makeprogress(
433 433 b'building cache', total=len(newmainbranch)
434 434 )
435 435 while visit:
436 436 f = visit[-1]
437 437 if f in hist:
438 438 visit.pop()
439 439 continue
440 440
441 441 ready = True
442 442 pl = pcache[f]
443 443 for p in pl:
444 444 if p not in hist:
445 445 ready = False
446 446 visit.append(p)
447 447 if not ready:
448 448 continue
449 449
450 450 visit.pop()
451 451 blocks = None # mdiff blocks, used for appending linelog
452 452 ismainbranch = f in newmainbranch
453 453 # curr is the same as the traditional annotate algorithm,
454 454 # if we only care about linear history (do not follow merge),
455 455 # then curr is not actually used.
456 456 assert f not in hist
457 457 curr = _decorate(f)
458 458 for i, p in enumerate(pl):
459 459 bs = list(self._diffblocks(hist[p][1], curr[1]))
460 460 if i == 0 and ismainbranch:
461 461 blocks = bs
462 462 curr = _pair(hist[p], curr, bs)
463 463 if needed[p] == 1:
464 464 del hist[p]
465 465 del needed[p]
466 466 else:
467 467 needed[p] -= 1
468 468
469 469 hist[f] = curr
470 470 del pcache[f]
471 471
472 472 if ismainbranch: # need to write to linelog
473 473 progress.increment()
474 474 bannotated = None
475 475 if len(pl) == 2 and self.opts.followmerge: # merge
476 476 bannotated = curr[0]
477 477 if blocks is None: # no parents, add an empty one
478 478 blocks = list(self._diffblocks(b'', curr[1]))
479 479 self._appendrev(f, blocks, bannotated)
480 480 elif showpath: # not append linelog, but we need to record path
481 481 self._node2path[f.node()] = f.path()
482 482
483 483 progress.complete()
484 484
485 485 result = [
486 486 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
487 487 for fr, l in hist[revfctx][0]
488 488 ] # [(node, linenumber)]
489 489 return self._refineannotateresult(result, revfctx, showpath, showlines)
490 490
491 491 def canannotatedirectly(self, rev):
492 492 """(str) -> bool, fctx or node.
493 493 return (True, f) if we can annotate without updating the linelog, pass
494 494 f to annotatedirectly.
495 495 return (False, f) if we need extra calculation. f is the fctx resolved
496 496 from rev.
497 497 """
498 498 result = True
499 499 f = None
500 500 if not isinstance(rev, int) and rev is not None:
501 501 hsh = {20: bytes, 40: bin}.get(len(rev), lambda x: None)(rev)
502 502 if hsh is not None and (hsh, self.path) in self.revmap:
503 503 f = hsh
504 504 if f is None:
505 505 adjustctx = b'linkrev' if self._perfhack else True
506 506 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
507 507 result = f in self.revmap
508 508 if not result and self._perfhack:
509 509 # redo the resolution without perfhack - as we are going to
510 510 # do write operations, we need a correct fctx.
511 511 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
512 512 return result, f
513 513
514 514 def annotatealllines(self, rev, showpath=False, showlines=False):
515 515 """(rev : str) -> [(node : str, linenum : int, path : str)]
516 516
517 517 the result has the same format with annotate, but include all (including
518 518 deleted) lines up to rev. call this after calling annotate(rev, ...) for
519 519 better performance and accuracy.
520 520 """
521 521 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
522 522
523 523 # find a chain from rev to anything in the mainbranch
524 524 if revfctx not in self.revmap:
525 525 chain = [revfctx]
526 526 a = b''
527 527 while True:
528 528 f = chain[-1]
529 529 pl = self._parentfunc(f)
530 530 if not pl:
531 531 break
532 532 if pl[0] in self.revmap:
533 533 a = pl[0].data()
534 534 break
535 535 chain.append(pl[0])
536 536
537 537 # both self.linelog and self.revmap is backed by filesystem. now
538 538 # we want to modify them but do not want to write changes back to
539 539 # files. so we create in-memory objects and copy them. it's like
540 540 # a "fork".
541 541 linelog = linelogmod.linelog()
542 542 linelog.copyfrom(self.linelog)
543 543 linelog.annotate(linelog.maxrev)
544 544 revmap = revmapmod.revmap()
545 545 revmap.copyfrom(self.revmap)
546 546
547 547 for f in reversed(chain):
548 548 b = f.data()
549 549 blocks = list(self._diffblocks(a, b))
550 550 self._doappendrev(linelog, revmap, f, blocks)
551 551 a = b
552 552 else:
553 553 # fastpath: use existing linelog, revmap as we don't write to them
554 554 linelog = self.linelog
555 555 revmap = self.revmap
556 556
557 557 lines = linelog.getalllines()
558 558 hsh = revfctx.node()
559 559 llrev = revmap.hsh2rev(hsh)
560 560 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
561 561 # cannot use _refineannotateresult since we need custom logic for
562 562 # resolving line contents
563 563 if showpath:
564 564 result = self._addpathtoresult(result, revmap)
565 565 if showlines:
566 566 linecontents = self._resolvelines(result, revmap, linelog)
567 567 result = (result, linecontents)
568 568 return result
569 569
570 570 def _resolvelines(self, annotateresult, revmap, linelog):
571 571 """(annotateresult) -> [line]. designed for annotatealllines.
572 572 this is probably the most inefficient code in the whole fastannotate
573 573 directory. but we have made a decision that the linelog does not
574 574 store line contents. so getting them requires random accesses to
575 575 the revlog data, since they can be many, it can be very slow.
576 576 """
577 577 # [llrev]
578 578 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
579 579 result = [None] * len(annotateresult)
580 580 # {(rev, linenum): [lineindex]}
581 581 key2idxs = collections.defaultdict(list)
582 582 for i in range(len(result)):
583 583 key2idxs[(revs[i], annotateresult[i][1])].append(i)
584 584 while key2idxs:
585 585 # find an unresolved line and its linelog rev to annotate
586 586 hsh = None
587 587 try:
588 588 for (rev, _linenum), idxs in key2idxs.items():
589 589 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
590 590 continue
591 591 hsh = annotateresult[idxs[0]][0]
592 592 break
593 593 except StopIteration: # no more unresolved lines
594 594 return result
595 595 if hsh is None:
596 596 # the remaining key2idxs are not in main branch, resolving them
597 597 # using the hard way...
598 598 revlines = {}
599 599 for (rev, linenum), idxs in key2idxs.items():
600 600 if rev not in revlines:
601 601 hsh = annotateresult[idxs[0]][0]
602 602 if self.ui.debugflag:
603 603 self.ui.debug(
604 604 b'fastannotate: reading %s line #%d '
605 605 b'to resolve lines %r\n'
606 606 % (short(hsh), linenum, idxs)
607 607 )
608 608 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
609 609 lines = mdiff.splitnewlines(fctx.data())
610 610 revlines[rev] = lines
611 611 for idx in idxs:
612 612 result[idx] = revlines[rev][linenum]
613 613 assert all(x is not None for x in result)
614 614 return result
615 615
616 616 # run the annotate and the lines should match to the file content
617 617 self.ui.debug(
618 618 b'fastannotate: annotate %s to resolve lines\n' % short(hsh)
619 619 )
620 620 linelog.annotate(rev)
621 621 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
622 622 annotated = linelog.annotateresult
623 623 lines = mdiff.splitnewlines(fctx.data())
624 624 if len(lines) != len(annotated):
625 625 raise faerror.CorruptedFileError(b'unexpected annotated lines')
626 626 # resolve lines from the annotate result
627 627 for i, line in enumerate(lines):
628 628 k = annotated[i]
629 629 if k in key2idxs:
630 630 for idx in key2idxs[k]:
631 631 result[idx] = line
632 632 del key2idxs[k]
633 633 return result
634 634
635 635 def annotatedirectly(self, f, showpath, showlines):
636 636 """like annotate, but when we know that f is in linelog.
637 637 f can be either a 20-char str (node) or a fctx. this is for perf - in
638 638 the best case, the user provides a node and we don't need to read the
639 639 filelog or construct any filecontext.
640 640 """
641 641 if isinstance(f, bytes):
642 642 hsh = f
643 643 else:
644 644 hsh = f.node()
645 645 llrev = self.revmap.hsh2rev(hsh)
646 646 if not llrev:
647 647 raise faerror.CorruptedFileError(b'%s is not in revmap' % hex(hsh))
648 648 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
649 649 raise faerror.CorruptedFileError(
650 650 b'%s is not in revmap mainbranch' % hex(hsh)
651 651 )
652 652 self.linelog.annotate(llrev)
653 653 result = [
654 654 (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
655 655 ]
656 656 return self._refineannotateresult(result, f, showpath, showlines)
657 657
658 658 def _refineannotateresult(self, result, f, showpath, showlines):
659 659 """add the missing path or line contents, they can be expensive.
660 660 f could be either node or fctx.
661 661 """
662 662 if showpath:
663 663 result = self._addpathtoresult(result)
664 664 if showlines:
665 665 if isinstance(f, bytes): # f: node or fctx
666 666 llrev = self.revmap.hsh2rev(f)
667 667 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
668 668 else:
669 669 fctx = f
670 670 lines = mdiff.splitnewlines(fctx.data())
671 671 if len(lines) != len(result): # linelog is probably corrupted
672 672 raise faerror.CorruptedFileError()
673 673 result = (result, lines)
674 674 return result
675 675
676 676 def _appendrev(self, fctx, blocks, bannotated=None):
677 677 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
678 678
679 679 def _diffblocks(self, a, b):
680 680 return mdiff.allblocks(a, b, self.opts.diffopts)
681 681
682 682 @staticmethod
683 683 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
684 684 """append a revision to linelog and revmap"""
685 685
686 686 def getllrev(f):
687 687 """(fctx) -> int"""
688 688 # f should not be a linelog revision
689 689 if isinstance(f, int):
690 690 raise error.ProgrammingError(b'f should not be an int')
691 691 # f is a fctx, allocate linelog rev on demand
692 692 hsh = f.node()
693 693 rev = revmap.hsh2rev(hsh)
694 694 if rev is None:
695 695 rev = revmap.append(hsh, sidebranch=True, path=f.path())
696 696 return rev
697 697
698 698 # append sidebranch revisions to revmap
699 699 siderevs = []
700 700 siderevmap = {} # node: int
701 701 if bannotated is not None:
702 702 for (a1, a2, b1, b2), op in blocks:
703 703 if op != b'=':
704 704 # f could be either linelong rev, or fctx.
705 705 siderevs += [
706 706 f
707 707 for f, l in bannotated[b1:b2]
708 708 if not isinstance(f, int)
709 709 ]
710 710 siderevs = set(siderevs)
711 711 if fctx in siderevs: # mainnode must be appended seperately
712 712 siderevs.remove(fctx)
713 713 for f in siderevs:
714 714 siderevmap[f] = getllrev(f)
715 715
716 716 # the changeset in the main branch, could be a merge
717 717 llrev = revmap.append(fctx.node(), path=fctx.path())
718 718 siderevmap[fctx] = llrev
719 719
720 720 for (a1, a2, b1, b2), op in reversed(blocks):
721 721 if op == b'=':
722 722 continue
723 723 if bannotated is None:
724 724 linelog.replacelines(llrev, a1, a2, b1, b2)
725 725 else:
726 726 blines = [
727 727 ((r if isinstance(r, int) else siderevmap[r]), l)
728 728 for r, l in bannotated[b1:b2]
729 729 ]
730 730 linelog.replacelines_vec(llrev, a1, a2, blines)
731 731
732 732 def _addpathtoresult(self, annotateresult, revmap=None):
733 733 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
734 734 if revmap is None:
735 735 revmap = self.revmap
736 736
737 737 def _getpath(nodeid):
738 738 path = self._node2path.get(nodeid)
739 739 if path is None:
740 740 path = revmap.rev2path(revmap.hsh2rev(nodeid))
741 741 self._node2path[nodeid] = path
742 742 return path
743 743
744 744 return [(n, l, _getpath(n)) for n, l in annotateresult]
745 745
746 746 def _checklastmasterhead(self, fctx):
747 747 """check if fctx is the master's head last time, raise if not"""
748 748 if fctx is None:
749 749 llrev = 0
750 750 else:
751 751 llrev = self.revmap.hsh2rev(fctx.node())
752 752 if not llrev:
753 753 raise faerror.CannotReuseError()
754 754 if self.linelog.maxrev != llrev:
755 755 raise faerror.CannotReuseError()
756 756
757 757 @util.propertycache
758 758 def _parentfunc(self):
759 759 """-> (fctx) -> [fctx]"""
760 760 followrename = self.opts.followrename
761 761 followmerge = self.opts.followmerge
762 762
763 763 def parents(f):
764 764 pl = _parents(f, follow=followrename)
765 765 if not followmerge:
766 766 pl = pl[:1]
767 767 return pl
768 768
769 769 return parents
770 770
771 771 @util.propertycache
772 772 def _perfhack(self):
773 773 return self.ui.configbool(b'fastannotate', b'perfhack')
774 774
775 775 def _resolvefctx(self, rev, path=None, **kwds):
776 776 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
777 777
778 778
779 779 def _unlinkpaths(paths):
780 780 """silent, best-effort unlink"""
781 781 for path in paths:
782 782 try:
783 783 util.unlink(path)
784 784 except OSError:
785 785 pass
786 786
787 787
788 788 class pathhelper:
789 789 """helper for getting paths for lockfile, linelog and revmap"""
790 790
791 791 def __init__(self, repo, path, opts=defaultopts):
792 792 # different options use different directories
793 793 self._vfspath = os.path.join(
794 794 b'fastannotate', opts.shortstr, encodedir(path)
795 795 )
796 796 self._repo = repo
797 797
798 798 @property
799 799 def dirname(self):
800 800 return os.path.dirname(self._repo.vfs.join(self._vfspath))
801 801
802 802 @property
803 803 def linelogpath(self):
804 804 return self._repo.vfs.join(self._vfspath + b'.l')
805 805
806 806 def lock(self):
807 807 return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
808 808
809 809 @property
810 810 def revmappath(self):
811 811 return self._repo.vfs.join(self._vfspath + b'.m')
812 812
813 813
814 814 @contextlib.contextmanager
815 815 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
816 816 """context needed to perform (fast) annotate on a file
817 817
818 818 an annotatecontext of a single file consists of two structures: the
819 819 linelog and the revmap. this function takes care of locking. only 1
820 820 process is allowed to write that file's linelog and revmap at a time.
821 821
822 822 when something goes wrong, this function will assume the linelog and the
823 823 revmap are in a bad state, and remove them from disk.
824 824
825 825 use this function in the following way:
826 826
827 827 with annotatecontext(...) as actx:
828 828 actx. ....
829 829 """
830 830 helper = pathhelper(repo, path, opts)
831 831 util.makedirs(helper.dirname)
832 832 revmappath = helper.revmappath
833 833 linelogpath = helper.linelogpath
834 834 actx = None
835 835 try:
836 836 with helper.lock():
837 837 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
838 838 if rebuild:
839 839 actx.rebuild()
840 840 yield actx
841 841 except Exception:
842 842 if actx is not None:
843 843 actx.rebuild()
844 844 repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
845 845 raise
846 846 finally:
847 847 if actx is not None:
848 848 actx.close()
849 849
850 850
851 851 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
852 852 """like annotatecontext but get the context from a fctx. convenient when
853 853 used in fctx.annotate
854 854 """
855 855 repo = fctx._repo
856 856 path = fctx._path
857 857 if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
858 858 follow = True
859 859 aopts = annotateopts(diffopts=diffopts, followrename=follow)
860 860 return annotatecontext(repo, path, aopts, rebuild)
@@ -1,1016 +1,1016 b''
1 1 # __init__.py - fsmonitor initialization and overrides
2 2 #
3 3 # Copyright 2013-2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
9 9
10 10 Integrates the file-watching program Watchman with Mercurial to produce faster
11 11 status results.
12 12
13 13 On a particular Linux system, for a real-world repository with over 400,000
14 14 files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
15 15 system, with fsmonitor it takes about 0.3 seconds.
16 16
17 17 fsmonitor requires no configuration -- it will tell Watchman about your
18 18 repository as necessary. You'll need to install Watchman from
19 19 https://facebook.github.io/watchman/ and make sure it is in your PATH.
20 20
21 21 fsmonitor is incompatible with the largefiles and eol extensions, and
22 22 will disable itself if any of those are active.
23 23
24 24 The following configuration options exist:
25 25
26 26 ::
27 27
28 28 [fsmonitor]
29 29 mode = {off, on, paranoid}
30 30
31 31 When `mode = off`, fsmonitor will disable itself (similar to not loading the
32 32 extension at all). When `mode = on`, fsmonitor will be enabled (the default).
33 33 When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
34 34 and ensure that the results are consistent.
35 35
36 36 ::
37 37
38 38 [fsmonitor]
39 39 timeout = (float)
40 40
41 41 A value, in seconds, that determines how long fsmonitor will wait for Watchman
42 42 to return results. Defaults to `2.0`.
43 43
44 44 ::
45 45
46 46 [fsmonitor]
47 47 blacklistusers = (list of userids)
48 48
49 49 A list of usernames for which fsmonitor will disable itself altogether.
50 50
51 51 ::
52 52
53 53 [fsmonitor]
54 54 walk_on_invalidate = (boolean)
55 55
56 56 Whether or not to walk the whole repo ourselves when our cached state has been
57 57 invalidated, for example when Watchman has been restarted or .hgignore rules
58 58 have been changed. Walking the repo in that case can result in competing for
59 59 I/O with Watchman. For large repos it is recommended to set this value to
60 60 false. You may wish to set this to true if you have a very fast filesystem
61 61 that can outpace the IPC overhead of getting the result data for the full repo
62 62 from Watchman. Defaults to false.
63 63
64 64 ::
65 65
66 66 [fsmonitor]
67 67 warn_when_unused = (boolean)
68 68
69 69 Whether to print a warning during certain operations when fsmonitor would be
70 70 beneficial to performance but isn't enabled.
71 71
72 72 ::
73 73
74 74 [fsmonitor]
75 75 warn_update_file_count = (integer)
76 76 # or when mercurial is built with rust support
77 77 warn_update_file_count_rust = (integer)
78 78
79 79 If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
80 80 be printed during working directory updates if this many files will be
81 81 created.
82 82 '''
83 83
84 84 # Platforms Supported
85 85 # ===================
86 86 #
87 87 # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
88 88 # even under severe loads.
89 89 #
90 90 # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
91 91 # turned on, on case-insensitive HFS+. There has been a reasonable amount of
92 92 # user testing under normal loads.
93 93 #
94 94 # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
95 95 # very little testing has been done.
96 96 #
97 97 # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
98 98 #
99 99 # Known Issues
100 100 # ============
101 101 #
102 102 # * fsmonitor will disable itself if any of the following extensions are
103 103 # enabled: largefiles, inotify, eol; or if the repository has subrepos.
104 104 # * fsmonitor will produce incorrect results if nested repos that are not
105 105 # subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
106 106 #
107 107 # The issues related to nested repos and subrepos are probably not fundamental
108 108 # ones. Patches to fix them are welcome.
109 109
110 110
111 111 import codecs
112 112 import os
113 113 import stat
114 114 import sys
115 115 import tempfile
116 116 import weakref
117 117
118 118 from mercurial.i18n import _
119 119 from mercurial.node import hex
120 120
121 121 from mercurial.pycompat import open
122 122 from mercurial import (
123 123 context,
124 124 encoding,
125 125 error,
126 126 extensions,
127 127 localrepo,
128 128 merge,
129 129 pathutil,
130 130 pycompat,
131 131 registrar,
132 132 scmutil,
133 133 util,
134 134 )
135 135
136 136 # no-check-code because we're accessing private information only public in pure
137 137 from mercurial.pure import parsers
138 138 from mercurial import match as matchmod
139 139 from mercurial.utils import (
140 140 hashutil,
141 141 stringutil,
142 142 )
143 143
144 144 from . import (
145 145 pywatchman,
146 146 state,
147 147 watchmanclient,
148 148 )
149 149
150 150 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
151 151 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
152 152 # be specifying the version(s) of Mercurial they are tested with, or
153 153 # leave the attribute unspecified.
154 154 testedwith = b'ships-with-hg-core'
155 155
156 156 configtable = {}
157 157 configitem = registrar.configitem(configtable)
158 158
159 159 configitem(
160 160 b'fsmonitor',
161 161 b'mode',
162 162 default=b'on',
163 163 )
164 164 configitem(
165 165 b'fsmonitor',
166 166 b'walk_on_invalidate',
167 167 default=False,
168 168 )
169 169 configitem(
170 170 b'fsmonitor',
171 171 b'timeout',
172 172 default=b'2',
173 173 )
174 174 configitem(
175 175 b'fsmonitor',
176 176 b'blacklistusers',
177 177 default=list,
178 178 )
179 179 configitem(
180 180 b'fsmonitor',
181 181 b'watchman_exe',
182 182 default=b'watchman',
183 183 )
184 184 configitem(
185 185 b'fsmonitor',
186 186 b'verbose',
187 187 default=True,
188 188 experimental=True,
189 189 )
190 190 configitem(
191 191 b'experimental',
192 192 b'fsmonitor.transaction_notify',
193 193 default=False,
194 194 )
195 195
196 196 # This extension is incompatible with the following blacklisted extensions
197 197 # and will disable itself when encountering one of these:
198 198 _blacklist = [b'largefiles', b'eol']
199 199
200 200
201 201 def debuginstall(ui, fm):
202 202 fm.write(
203 203 b"fsmonitor-watchman",
204 204 _(b"fsmonitor checking for watchman binary... (%s)\n"),
205 205 ui.configpath(b"fsmonitor", b"watchman_exe"),
206 206 )
207 207 root = tempfile.mkdtemp()
208 208 c = watchmanclient.client(ui, root)
209 209 err = None
210 210 try:
211 211 v = c.command(b"version")
212 212 fm.write(
213 213 b"fsmonitor-watchman-version",
214 214 _(b" watchman binary version %s\n"),
215 215 pycompat.bytestr(v["version"]),
216 216 )
217 217 except watchmanclient.Unavailable as e:
218 218 err = stringutil.forcebytestr(e)
219 219 fm.condwrite(
220 220 err,
221 221 b"fsmonitor-watchman-error",
222 222 _(b" watchman binary missing or broken: %s\n"),
223 223 err,
224 224 )
225 225 return 1 if err else 0
226 226
227 227
228 228 def _handleunavailable(ui, state, ex):
229 229 """Exception handler for Watchman interaction exceptions"""
230 230 if isinstance(ex, watchmanclient.Unavailable):
231 231 # experimental config: fsmonitor.verbose
232 232 if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
233 233 if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
234 234 ui.warn(stringutil.forcebytestr(ex) + b'\n')
235 235 if ex.invalidate:
236 236 state.invalidate()
237 237 # experimental config: fsmonitor.verbose
238 238 if ui.configbool(b'fsmonitor', b'verbose'):
239 239 ui.log(
240 240 b'fsmonitor',
241 241 b'Watchman unavailable: %s\n',
242 242 stringutil.forcebytestr(ex.msg),
243 243 )
244 244 else:
245 245 ui.log(
246 246 b'fsmonitor',
247 247 b'Watchman exception: %s\n',
248 248 stringutil.forcebytestr(ex),
249 249 )
250 250
251 251
252 252 def _hashignore(ignore):
253 253 """Calculate hash for ignore patterns and filenames
254 254
255 255 If this information changes between Mercurial invocations, we can't
256 256 rely on Watchman information anymore and have to re-scan the working
257 257 copy.
258 258
259 259 """
260 260 sha1 = hashutil.sha1()
261 261 sha1.update(pycompat.byterepr(ignore))
262 262 return pycompat.sysbytes(sha1.hexdigest())
263 263
264 264
265 265 _watchmanencoding = pywatchman.encoding.get_local_encoding()
266 266 _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
267 267 _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
268 268
269 269
270 270 def _watchmantofsencoding(path):
271 271 """Fix path to match watchman and local filesystem encoding
272 272
273 273 watchman's paths encoding can differ from filesystem encoding. For example,
274 274 on Windows, it's always utf-8.
275 275 """
276 276 try:
277 277 decoded = path.decode(_watchmanencoding)
278 278 except UnicodeDecodeError as e:
279 279 raise error.Abort(
280 280 stringutil.forcebytestr(e), hint=b'watchman encoding error'
281 281 )
282 282
283 283 try:
284 284 encoded = decoded.encode(_fsencoding, 'strict')
285 285 except UnicodeEncodeError as e:
286 286 raise error.Abort(stringutil.forcebytestr(e))
287 287
288 288 return encoded
289 289
290 290
291 291 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
292 292 """Replacement for dirstate.walk, hooking into Watchman.
293 293
294 294 Whenever full is False, ignored is False, and the Watchman client is
295 295 available, use Watchman combined with saved state to possibly return only a
296 296 subset of files."""
297 297
298 298 def bail(reason):
299 299 self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
300 300 return orig(match, subrepos, unknown, ignored, full=True)
301 301
302 302 if full:
303 303 return bail(b'full rewalk requested')
304 304 if ignored:
305 305 return bail(b'listing ignored files')
306 306 if not self._watchmanclient.available():
307 307 return bail(b'client unavailable')
308 308 state = self._fsmonitorstate
309 309 clock, ignorehash, notefiles = state.get()
310 310 if not clock:
311 311 if state.walk_on_invalidate:
312 312 return bail(b'no clock')
313 313 # Initial NULL clock value, see
314 314 # https://facebook.github.io/watchman/docs/clockspec.html
315 315 clock = b'c:0:0'
316 316 notefiles = []
317 317
318 318 ignore = self._ignore
319 319 dirignore = self._dirignore
320 320 if unknown:
321 321 if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
322 322 # ignore list changed -- can't rely on Watchman state any more
323 323 if state.walk_on_invalidate:
324 324 return bail(b'ignore rules changed')
325 325 notefiles = []
326 326 clock = b'c:0:0'
327 327 else:
328 328 # always ignore
329 329 ignore = util.always
330 330 dirignore = util.always
331 331
332 332 matchfn = match.matchfn
333 333 matchalways = match.always()
334 334 dmap = self._map
335 if util.safehasattr(dmap, b'_map'):
335 if hasattr(dmap, b'_map'):
336 336 # for better performance, directly access the inner dirstate map if the
337 337 # standard dirstate implementation is in use.
338 338 dmap = dmap._map
339 339
340 340 has_mtime = parsers.DIRSTATE_V2_HAS_MTIME
341 341 mtime_is_ambiguous = parsers.DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS
342 342 mask = has_mtime | mtime_is_ambiguous
343 343
344 344 # All entries that may not be clean
345 345 nonnormalset = {
346 346 f
347 347 for f, e in self._map.items()
348 348 if not e.maybe_clean
349 349 # same as "not has_time or has_ambiguous_time", but factored to only
350 350 # need a single access to flags for performance.
351 351 # `mask` removes all irrelevant bits, then we flip the `mtime` bit so
352 352 # its `true` value is NOT having a mtime, then check if either bit
353 353 # is set.
354 354 or bool((e.v2_data()[0] & mask) ^ has_mtime)
355 355 }
356 356
357 357 copymap = self._map.copymap
358 358 getkind = stat.S_IFMT
359 359 dirkind = stat.S_IFDIR
360 360 regkind = stat.S_IFREG
361 361 lnkkind = stat.S_IFLNK
362 362 join = self._join
363 363 normcase = util.normcase
364 364 fresh_instance = False
365 365
366 366 exact = skipstep3 = False
367 367 if match.isexact(): # match.exact
368 368 exact = True
369 369 dirignore = util.always # skip step 2
370 370 elif match.prefix(): # match.match, no patterns
371 371 skipstep3 = True
372 372
373 373 if not exact and self._checkcase:
374 374 # note that even though we could receive directory entries, we're only
375 375 # interested in checking if a file with the same name exists. So only
376 376 # normalize files if possible.
377 377 normalize = self._normalizefile
378 378 skipstep3 = False
379 379 else:
380 380 normalize = None
381 381
382 382 # step 1: find all explicit files
383 383 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
384 384
385 385 skipstep3 = skipstep3 and not (work or dirsnotfound)
386 386 work = [d for d in work if not dirignore(d[0])]
387 387
388 388 if not work and (exact or skipstep3):
389 389 for s in subrepos:
390 390 del results[s]
391 391 del results[b'.hg']
392 392 return results
393 393
394 394 # step 2: query Watchman
395 395 try:
396 396 # Use the user-configured timeout for the query.
397 397 # Add a little slack over the top of the user query to allow for
398 398 # overheads while transferring the data
399 399 self._watchmanclient.settimeout(state.timeout + 0.1)
400 400 result = self._watchmanclient.command(
401 401 b'query',
402 402 {
403 403 b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
404 404 b'since': clock,
405 405 b'expression': [
406 406 b'not',
407 407 [
408 408 b'anyof',
409 409 [b'dirname', b'.hg'],
410 410 [b'name', b'.hg', b'wholename'],
411 411 ],
412 412 ],
413 413 b'sync_timeout': int(state.timeout * 1000),
414 414 b'empty_on_fresh_instance': state.walk_on_invalidate,
415 415 },
416 416 )
417 417 except Exception as ex:
418 418 _handleunavailable(self._ui, state, ex)
419 419 self._watchmanclient.clearconnection()
420 420 return bail(b'exception during run')
421 421 else:
422 422 # We need to propagate the last observed clock up so that we
423 423 # can use it for our next query
424 424 state.setlastclock(pycompat.sysbytes(result[b'clock']))
425 425 if result[b'is_fresh_instance']:
426 426 if state.walk_on_invalidate:
427 427 state.invalidate()
428 428 return bail(b'fresh instance')
429 429 fresh_instance = True
430 430 # Ignore any prior noteable files from the state info
431 431 notefiles = []
432 432
433 433 # for file paths which require normalization and we encounter a case
434 434 # collision, we store our own foldmap
435 435 if normalize:
436 436 foldmap = {normcase(k): k for k in results}
437 437
438 438 switch_slashes = pycompat.ossep == b'\\'
439 439 # The order of the results is, strictly speaking, undefined.
440 440 # For case changes on a case insensitive filesystem we may receive
441 441 # two entries, one with exists=True and another with exists=False.
442 442 # The exists=True entries in the same response should be interpreted
443 443 # as being happens-after the exists=False entries due to the way that
444 444 # Watchman tracks files. We use this property to reconcile deletes
445 445 # for name case changes.
446 446 for entry in result[b'files']:
447 447 fname = entry[b'name']
448 448
449 449 # Watchman always give us a str. Normalize to bytes on Python 3
450 450 # using Watchman's encoding, if needed.
451 451 if not isinstance(fname, bytes):
452 452 fname = fname.encode(_watchmanencoding)
453 453
454 454 if _fixencoding:
455 455 fname = _watchmantofsencoding(fname)
456 456
457 457 if switch_slashes:
458 458 fname = fname.replace(b'\\', b'/')
459 459 if normalize:
460 460 normed = normcase(fname)
461 461 fname = normalize(fname, True, True)
462 462 foldmap[normed] = fname
463 463 fmode = entry[b'mode']
464 464 fexists = entry[b'exists']
465 465 kind = getkind(fmode)
466 466
467 467 if b'/.hg/' in fname or fname.endswith(b'/.hg'):
468 468 return bail(b'nested-repo-detected')
469 469
470 470 if not fexists:
471 471 # if marked as deleted and we don't already have a change
472 472 # record, mark it as deleted. If we already have an entry
473 473 # for fname then it was either part of walkexplicit or was
474 474 # an earlier result that was a case change
475 475 if (
476 476 fname not in results
477 477 and fname in dmap
478 478 and (matchalways or matchfn(fname))
479 479 ):
480 480 results[fname] = None
481 481 elif kind == dirkind:
482 482 if fname in dmap and (matchalways or matchfn(fname)):
483 483 results[fname] = None
484 484 elif kind == regkind or kind == lnkkind:
485 485 if fname in dmap:
486 486 if matchalways or matchfn(fname):
487 487 results[fname] = entry
488 488 elif (matchalways or matchfn(fname)) and not ignore(fname):
489 489 results[fname] = entry
490 490 elif fname in dmap and (matchalways or matchfn(fname)):
491 491 results[fname] = None
492 492
493 493 # step 3: query notable files we don't already know about
494 494 # XXX try not to iterate over the entire dmap
495 495 if normalize:
496 496 # any notable files that have changed case will already be handled
497 497 # above, so just check membership in the foldmap
498 498 notefiles = {
499 499 normalize(f, True, True)
500 500 for f in notefiles
501 501 if normcase(f) not in foldmap
502 502 }
503 503 visit = {
504 504 f
505 505 for f in notefiles
506 506 if (f not in results and matchfn(f) and (f in dmap or not ignore(f)))
507 507 }
508 508
509 509 if not fresh_instance:
510 510 if matchalways:
511 511 visit.update(f for f in nonnormalset if f not in results)
512 512 visit.update(f for f in copymap if f not in results)
513 513 else:
514 514 visit.update(
515 515 f for f in nonnormalset if f not in results and matchfn(f)
516 516 )
517 517 visit.update(f for f in copymap if f not in results and matchfn(f))
518 518 else:
519 519 if matchalways:
520 520 visit.update(f for f, st in dmap.items() if f not in results)
521 521 visit.update(f for f in copymap if f not in results)
522 522 else:
523 523 visit.update(
524 524 f for f, st in dmap.items() if f not in results and matchfn(f)
525 525 )
526 526 visit.update(f for f in copymap if f not in results and matchfn(f))
527 527
528 528 audit = pathutil.pathauditor(self._root, cached=True).check
529 529 auditpass = [f for f in visit if audit(f)]
530 530 auditpass.sort()
531 531 auditfail = visit.difference(auditpass)
532 532 for f in auditfail:
533 533 results[f] = None
534 534
535 535 nf = iter(auditpass)
536 536 for st in util.statfiles([join(f) for f in auditpass]):
537 537 f = next(nf)
538 538 if st or f in dmap:
539 539 results[f] = st
540 540
541 541 for s in subrepos:
542 542 del results[s]
543 543 del results[b'.hg']
544 544 return results
545 545
546 546
547 547 def overridestatus(
548 548 orig,
549 549 self,
550 550 node1=b'.',
551 551 node2=None,
552 552 match=None,
553 553 ignored=False,
554 554 clean=False,
555 555 unknown=False,
556 556 listsubrepos=False,
557 557 ):
558 558 listignored = ignored
559 559 listclean = clean
560 560 listunknown = unknown
561 561
562 562 def _cmpsets(l1, l2):
563 563 try:
564 564 if b'FSMONITOR_LOG_FILE' in encoding.environ:
565 565 fn = encoding.environ[b'FSMONITOR_LOG_FILE']
566 566 f = open(fn, b'wb')
567 567 else:
568 568 fn = b'fsmonitorfail.log'
569 569 f = self.vfs.open(fn, b'wb')
570 570 except (IOError, OSError):
571 571 self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
572 572 return
573 573
574 574 try:
575 575 for i, (s1, s2) in enumerate(zip(l1, l2)):
576 576 if set(s1) != set(s2):
577 577 f.write(b'sets at position %d are unequal\n' % i)
578 578 f.write(b'watchman returned: %r\n' % s1)
579 579 f.write(b'stat returned: %r\n' % s2)
580 580 finally:
581 581 f.close()
582 582
583 583 if isinstance(node1, context.changectx):
584 584 ctx1 = node1
585 585 else:
586 586 ctx1 = self[node1]
587 587 if isinstance(node2, context.changectx):
588 588 ctx2 = node2
589 589 else:
590 590 ctx2 = self[node2]
591 591
592 592 working = ctx2.rev() is None
593 593 parentworking = working and ctx1 == self[b'.']
594 594 match = match or matchmod.always()
595 595
596 596 # Maybe we can use this opportunity to update Watchman's state.
597 597 # Mercurial uses workingcommitctx and/or memctx to represent the part of
598 598 # the workingctx that is to be committed. So don't update the state in
599 599 # that case.
600 600 # HG_PENDING is set in the environment when the dirstate is being updated
601 601 # in the middle of a transaction; we must not update our state in that
602 602 # case, or we risk forgetting about changes in the working copy.
603 603 updatestate = (
604 604 parentworking
605 605 and match.always()
606 606 and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
607 607 and b'HG_PENDING' not in encoding.environ
608 608 )
609 609
610 610 try:
611 611 if self._fsmonitorstate.walk_on_invalidate:
612 612 # Use a short timeout to query the current clock. If that
613 613 # takes too long then we assume that the service will be slow
614 614 # to answer our query.
615 615 # walk_on_invalidate indicates that we prefer to walk the
616 616 # tree ourselves because we can ignore portions that Watchman
617 617 # cannot and we tend to be faster in the warmer buffer cache
618 618 # cases.
619 619 self._watchmanclient.settimeout(0.1)
620 620 else:
621 621 # Give Watchman more time to potentially complete its walk
622 622 # and return the initial clock. In this mode we assume that
623 623 # the filesystem will be slower than parsing a potentially
624 624 # very large Watchman result set.
625 625 self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
626 626 startclock = self._watchmanclient.getcurrentclock()
627 627 except Exception as ex:
628 628 self._watchmanclient.clearconnection()
629 629 _handleunavailable(self.ui, self._fsmonitorstate, ex)
630 630 # boo, Watchman failed. bail
631 631 return orig(
632 632 node1,
633 633 node2,
634 634 match,
635 635 listignored,
636 636 listclean,
637 637 listunknown,
638 638 listsubrepos,
639 639 )
640 640
641 641 if updatestate:
642 642 # We need info about unknown files. This may make things slower the
643 643 # first time, but whatever.
644 644 stateunknown = True
645 645 else:
646 646 stateunknown = listunknown
647 647
648 648 if updatestate:
649 649 ps = poststatus(startclock)
650 650 self.addpostdsstatus(ps)
651 651
652 652 r = orig(
653 653 node1, node2, match, listignored, listclean, stateunknown, listsubrepos
654 654 )
655 655 modified, added, removed, deleted, unknown, ignored, clean = r
656 656
657 657 if not listunknown:
658 658 unknown = []
659 659
660 660 # don't do paranoid checks if we're not going to query Watchman anyway
661 661 full = listclean or match.traversedir is not None
662 662 if self._fsmonitorstate.mode == b'paranoid' and not full:
663 663 # run status again and fall back to the old walk this time
664 664 self.dirstate._fsmonitordisable = True
665 665
666 666 # shut the UI up
667 667 quiet = self.ui.quiet
668 668 self.ui.quiet = True
669 669 fout, ferr = self.ui.fout, self.ui.ferr
670 670 self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
671 671
672 672 try:
673 673 rv2 = orig(
674 674 node1,
675 675 node2,
676 676 match,
677 677 listignored,
678 678 listclean,
679 679 listunknown,
680 680 listsubrepos,
681 681 )
682 682 finally:
683 683 self.dirstate._fsmonitordisable = False
684 684 self.ui.quiet = quiet
685 685 self.ui.fout, self.ui.ferr = fout, ferr
686 686
687 687 # clean isn't tested since it's set to True above
688 688 with self.wlock():
689 689 _cmpsets(
690 690 [modified, added, removed, deleted, unknown, ignored, clean],
691 691 rv2,
692 692 )
693 693 modified, added, removed, deleted, unknown, ignored, clean = rv2
694 694
695 695 return scmutil.status(
696 696 modified, added, removed, deleted, unknown, ignored, clean
697 697 )
698 698
699 699
700 700 class poststatus:
701 701 def __init__(self, startclock):
702 702 self._startclock = pycompat.sysbytes(startclock)
703 703
704 704 def __call__(self, wctx, status):
705 705 clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
706 706 hashignore = _hashignore(wctx.repo().dirstate._ignore)
707 707 notefiles = (
708 708 status.modified
709 709 + status.added
710 710 + status.removed
711 711 + status.deleted
712 712 + status.unknown
713 713 )
714 714 wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
715 715
716 716
717 717 def makedirstate(repo, dirstate):
718 718 class fsmonitordirstate(dirstate.__class__):
719 719 def _fsmonitorinit(self, repo):
720 720 # _fsmonitordisable is used in paranoid mode
721 721 self._fsmonitordisable = False
722 722 self._fsmonitorstate = repo._fsmonitorstate
723 723 self._watchmanclient = repo._watchmanclient
724 724 self._repo = weakref.proxy(repo)
725 725
726 726 def walk(self, *args, **kwargs):
727 727 orig = super(fsmonitordirstate, self).walk
728 728 if self._fsmonitordisable:
729 729 return orig(*args, **kwargs)
730 730 return overridewalk(orig, self, *args, **kwargs)
731 731
732 732 def rebuild(self, *args, **kwargs):
733 733 self._fsmonitorstate.invalidate()
734 734 return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
735 735
736 736 def invalidate(self, *args, **kwargs):
737 737 self._fsmonitorstate.invalidate()
738 738 return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
739 739
740 740 dirstate.__class__ = fsmonitordirstate
741 741 dirstate._fsmonitorinit(repo)
742 742
743 743
744 744 def wrapdirstate(orig, self):
745 745 ds = orig(self)
746 746 # only override the dirstate when Watchman is available for the repo
747 if util.safehasattr(self, b'_fsmonitorstate'):
747 if hasattr(self, b'_fsmonitorstate'):
748 748 makedirstate(self, ds)
749 749 return ds
750 750
751 751
752 752 def extsetup(ui):
753 753 extensions.wrapfilecache(
754 754 localrepo.localrepository, b'dirstate', wrapdirstate
755 755 )
756 756 if pycompat.isdarwin:
757 757 # An assist for avoiding the dangling-symlink fsevents bug
758 758 extensions.wrapfunction(os, 'symlink', wrapsymlink)
759 759
760 760 extensions.wrapfunction(merge, '_update', wrapupdate)
761 761
762 762
763 763 def wrapsymlink(orig, source, link_name):
764 764 """if we create a dangling symlink, also touch the parent dir
765 765 to encourage fsevents notifications to work more correctly"""
766 766 try:
767 767 return orig(source, link_name)
768 768 finally:
769 769 try:
770 770 os.utime(os.path.dirname(link_name), None)
771 771 except OSError:
772 772 pass
773 773
774 774
775 775 class state_update:
776 776 """This context manager is responsible for dispatching the state-enter
777 777 and state-leave signals to the watchman service. The enter and leave
778 778 methods can be invoked manually (for scenarios where context manager
779 779 semantics are not possible). If parameters oldnode and newnode are None,
780 780 they will be populated based on current working copy in enter and
781 781 leave, respectively. Similarly, if the distance is none, it will be
782 782 calculated based on the oldnode and newnode in the leave method."""
783 783
784 784 def __init__(
785 785 self,
786 786 repo,
787 787 name,
788 788 oldnode=None,
789 789 newnode=None,
790 790 distance=None,
791 791 partial=False,
792 792 ):
793 793 self.repo = repo.unfiltered()
794 794 self.name = name
795 795 self.oldnode = oldnode
796 796 self.newnode = newnode
797 797 self.distance = distance
798 798 self.partial = partial
799 799 self._lock = None
800 800 self.need_leave = False
801 801
802 802 def __enter__(self):
803 803 self.enter()
804 804
805 805 def enter(self):
806 806 # Make sure we have a wlock prior to sending notifications to watchman.
807 807 # We don't want to race with other actors. In the update case,
808 808 # merge.update is going to take the wlock almost immediately. We are
809 809 # effectively extending the lock around several short sanity checks.
810 810 if self.oldnode is None:
811 811 self.oldnode = self.repo[b'.'].node()
812 812
813 813 if self.repo.currentwlock() is None:
814 if util.safehasattr(self.repo, b'wlocknostateupdate'):
814 if hasattr(self.repo, b'wlocknostateupdate'):
815 815 self._lock = self.repo.wlocknostateupdate()
816 816 else:
817 817 self._lock = self.repo.wlock()
818 818 self.need_leave = self._state(b'state-enter', hex(self.oldnode))
819 819 return self
820 820
821 821 def __exit__(self, type_, value, tb):
822 822 abort = True if type_ else False
823 823 self.exit(abort=abort)
824 824
825 825 def exit(self, abort=False):
826 826 try:
827 827 if self.need_leave:
828 828 status = b'failed' if abort else b'ok'
829 829 if self.newnode is None:
830 830 self.newnode = self.repo[b'.'].node()
831 831 if self.distance is None:
832 832 self.distance = calcdistance(
833 833 self.repo, self.oldnode, self.newnode
834 834 )
835 835 self._state(b'state-leave', hex(self.newnode), status=status)
836 836 finally:
837 837 self.need_leave = False
838 838 if self._lock:
839 839 self._lock.release()
840 840
841 841 def _state(self, cmd, commithash, status=b'ok'):
842 if not util.safehasattr(self.repo, b'_watchmanclient'):
842 if not hasattr(self.repo, b'_watchmanclient'):
843 843 return False
844 844 try:
845 845 self.repo._watchmanclient.command(
846 846 cmd,
847 847 {
848 848 b'name': self.name,
849 849 b'metadata': {
850 850 # the target revision
851 851 b'rev': commithash,
852 852 # approximate number of commits between current and target
853 853 b'distance': self.distance if self.distance else 0,
854 854 # success/failure (only really meaningful for state-leave)
855 855 b'status': status,
856 856 # whether the working copy parent is changing
857 857 b'partial': self.partial,
858 858 },
859 859 },
860 860 )
861 861 return True
862 862 except Exception as e:
863 863 # Swallow any errors; fire and forget
864 864 self.repo.ui.log(
865 865 b'watchman', b'Exception %s while running %s\n', e, cmd
866 866 )
867 867 return False
868 868
869 869
870 870 # Estimate the distance between two nodes
871 871 def calcdistance(repo, oldnode, newnode):
872 872 anc = repo.changelog.ancestor(oldnode, newnode)
873 873 ancrev = repo[anc].rev()
874 874 distance = abs(repo[oldnode].rev() - ancrev) + abs(
875 875 repo[newnode].rev() - ancrev
876 876 )
877 877 return distance
878 878
879 879
880 880 # Bracket working copy updates with calls to the watchman state-enter
881 881 # and state-leave commands. This allows clients to perform more intelligent
882 882 # settling during bulk file change scenarios
883 883 # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
884 884 def wrapupdate(
885 885 orig,
886 886 repo,
887 887 node,
888 888 branchmerge,
889 889 force,
890 890 ancestor=None,
891 891 mergeancestor=False,
892 892 labels=None,
893 893 matcher=None,
894 894 **kwargs
895 895 ):
896 896
897 897 distance = 0
898 898 partial = True
899 899 oldnode = repo[b'.'].node()
900 900 newnode = repo[node].node()
901 901 if matcher is None or matcher.always():
902 902 partial = False
903 903 distance = calcdistance(repo.unfiltered(), oldnode, newnode)
904 904
905 905 with state_update(
906 906 repo,
907 907 name=b"hg.update",
908 908 oldnode=oldnode,
909 909 newnode=newnode,
910 910 distance=distance,
911 911 partial=partial,
912 912 ):
913 913 return orig(
914 914 repo,
915 915 node,
916 916 branchmerge,
917 917 force,
918 918 ancestor,
919 919 mergeancestor,
920 920 labels,
921 921 matcher,
922 922 **kwargs
923 923 )
924 924
925 925
926 926 def repo_has_depth_one_nested_repo(repo):
927 927 for f in repo.wvfs.listdir():
928 928 if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
929 929 msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
930 930 repo.ui.debug(msg % f)
931 931 return True
932 932 return False
933 933
934 934
935 935 def reposetup(ui, repo):
936 936 # We don't work with largefiles or inotify
937 937 exts = extensions.enabled()
938 938 for ext in _blacklist:
939 939 if ext in exts:
940 940 ui.warn(
941 941 _(
942 942 b'The fsmonitor extension is incompatible with the %s '
943 943 b'extension and has been disabled.\n'
944 944 )
945 945 % ext
946 946 )
947 947 return
948 948
949 949 if repo.local():
950 950 # We don't work with subrepos either.
951 951 #
952 952 # if repo[None].substate can cause a dirstate parse, which is too
953 953 # slow. Instead, look for a file called hgsubstate,
954 954 if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
955 955 return
956 956
957 957 if repo_has_depth_one_nested_repo(repo):
958 958 return
959 959
960 960 fsmonitorstate = state.state(repo)
961 961 if fsmonitorstate.mode == b'off':
962 962 return
963 963
964 964 try:
965 965 client = watchmanclient.client(repo.ui, repo.root)
966 966 except Exception as ex:
967 967 _handleunavailable(ui, fsmonitorstate, ex)
968 968 return
969 969
970 970 repo._fsmonitorstate = fsmonitorstate
971 971 repo._watchmanclient = client
972 972
973 973 dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
974 974 if cached:
975 975 # at this point since fsmonitorstate wasn't present,
976 976 # repo.dirstate is not a fsmonitordirstate
977 977 makedirstate(repo, dirstate)
978 978
979 979 class fsmonitorrepo(repo.__class__):
980 980 def status(self, *args, **kwargs):
981 981 orig = super(fsmonitorrepo, self).status
982 982 return overridestatus(orig, self, *args, **kwargs)
983 983
984 984 def wlocknostateupdate(self, *args, **kwargs):
985 985 return super(fsmonitorrepo, self).wlock(*args, **kwargs)
986 986
987 987 def wlock(self, *args, **kwargs):
988 988 l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
989 989 if not ui.configbool(
990 990 b"experimental", b"fsmonitor.transaction_notify"
991 991 ):
992 992 return l
993 993 if l.held != 1:
994 994 return l
995 995 origrelease = l.releasefn
996 996
997 997 def staterelease():
998 998 if origrelease:
999 999 origrelease()
1000 1000 if l.stateupdate:
1001 1001 l.stateupdate.exit()
1002 1002 l.stateupdate = None
1003 1003
1004 1004 try:
1005 1005 l.stateupdate = None
1006 1006 l.stateupdate = state_update(self, name=b"hg.transaction")
1007 1007 l.stateupdate.enter()
1008 1008 l.releasefn = staterelease
1009 1009 except Exception as e:
1010 1010 # Swallow any errors; fire and forget
1011 1011 self.ui.log(
1012 1012 b'watchman', b'Exception in state update %s\n', e
1013 1013 )
1014 1014 return l
1015 1015
1016 1016 repo.__class__ = fsmonitorrepo
@@ -1,128 +1,128 b''
1 1 # watchmanclient.py - Watchman client for the fsmonitor extension
2 2 #
3 3 # Copyright 2013-2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import getpass
10 10
11 11 from mercurial import (
12 12 encoding,
13 13 util,
14 14 )
15 15 from mercurial.utils import (
16 16 procutil,
17 17 stringutil,
18 18 )
19 19
20 20 from . import pywatchman
21 21
22 22
23 23 class Unavailable(Exception):
24 24 def __init__(self, msg, warn=True, invalidate=False):
25 25 self.msg = msg
26 26 self.warn = warn
27 27 if self.msg == b'timed out waiting for response':
28 28 self.warn = False
29 29 self.invalidate = invalidate
30 30
31 31 def __bytes__(self):
32 32 if self.warn:
33 33 return b'warning: Watchman unavailable: %s' % self.msg
34 34 else:
35 35 return b'Watchman unavailable: %s' % self.msg
36 36
37 37 __str__ = encoding.strmethod(__bytes__)
38 38
39 39
40 40 class WatchmanNoRoot(Unavailable):
41 41 def __init__(self, root, msg):
42 42 self.root = root
43 43 super(WatchmanNoRoot, self).__init__(msg)
44 44
45 45
46 46 class client:
47 47 def __init__(self, ui, root, timeout=1.0):
48 48 err = None
49 49 if not self._user:
50 50 err = b"couldn't get user"
51 51 warn = True
52 52 if self._user in ui.configlist(b'fsmonitor', b'blacklistusers'):
53 53 err = b'user %s in blacklist' % self._user
54 54 warn = False
55 55
56 56 if err:
57 57 raise Unavailable(err, warn)
58 58
59 59 self._timeout = timeout
60 60 self._watchmanclient = None
61 61 self._root = root
62 62 self._ui = ui
63 63 self._firsttime = True
64 64
65 65 def settimeout(self, timeout):
66 66 self._timeout = timeout
67 67 if self._watchmanclient is not None:
68 68 self._watchmanclient.setTimeout(timeout)
69 69
70 70 def getcurrentclock(self):
71 71 result = self.command(b'clock')
72 if not util.safehasattr(result, 'clock'):
72 if not hasattr(result, 'clock'):
73 73 raise Unavailable(
74 74 b'clock result is missing clock value', invalidate=True
75 75 )
76 76 return result.clock
77 77
78 78 def clearconnection(self):
79 79 self._watchmanclient = None
80 80
81 81 def available(self):
82 82 return self._watchmanclient is not None or self._firsttime
83 83
84 84 @util.propertycache
85 85 def _user(self):
86 86 try:
87 87 return getpass.getuser()
88 88 except KeyError:
89 89 # couldn't figure out our user
90 90 return None
91 91
92 92 def _command(self, *args):
93 93 watchmanargs = (args[0], self._root) + args[1:]
94 94 try:
95 95 if self._watchmanclient is None:
96 96 self._firsttime = False
97 97 watchman_exe = self._ui.configpath(
98 98 b'fsmonitor', b'watchman_exe'
99 99 )
100 100 self._watchmanclient = pywatchman.client(
101 101 timeout=self._timeout,
102 102 useImmutableBser=True,
103 103 binpath=procutil.tonativestr(watchman_exe),
104 104 )
105 105 return self._watchmanclient.query(*watchmanargs)
106 106 except pywatchman.CommandError as ex:
107 107 if 'unable to resolve root' in ex.msg:
108 108 raise WatchmanNoRoot(
109 109 self._root, stringutil.forcebytestr(ex.msg)
110 110 )
111 111 raise Unavailable(stringutil.forcebytestr(ex.msg))
112 112 except pywatchman.WatchmanError as ex:
113 113 raise Unavailable(stringutil.forcebytestr(ex))
114 114
115 115 def command(self, *args):
116 116 try:
117 117 try:
118 118 return self._command(*args)
119 119 except WatchmanNoRoot:
120 120 # this 'watch' command can also raise a WatchmanNoRoot if
121 121 # watchman refuses to accept this root
122 122 self._command(b'watch')
123 123 return self._command(*args)
124 124 except Unavailable:
125 125 # this is in an outer scope to catch Unavailable form any of the
126 126 # above _command calls
127 127 self._watchmanclient = None
128 128 raise
@@ -1,610 +1,606 b''
1 1 # journal.py
2 2 #
3 3 # Copyright 2014-2016 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """track previous positions of bookmarks (EXPERIMENTAL)
8 8
9 9 This extension adds a new command: `hg journal`, which shows you where
10 10 bookmarks were previously located.
11 11
12 12 """
13 13
14 14
15 15 import collections
16 16 import os
17 17 import weakref
18 18
19 19 from mercurial.i18n import _
20 20 from mercurial.node import (
21 21 bin,
22 22 hex,
23 23 )
24 24
25 25 from mercurial import (
26 26 bookmarks,
27 27 cmdutil,
28 28 dispatch,
29 29 encoding,
30 30 error,
31 31 extensions,
32 32 hg,
33 33 localrepo,
34 34 lock,
35 35 logcmdutil,
36 36 pycompat,
37 37 registrar,
38 38 util,
39 39 )
40 40 from mercurial.utils import (
41 41 dateutil,
42 42 procutil,
43 43 stringutil,
44 44 )
45 45
46 46 cmdtable = {}
47 47 command = registrar.command(cmdtable)
48 48
49 49 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
50 50 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
51 51 # be specifying the version(s) of Mercurial they are tested with, or
52 52 # leave the attribute unspecified.
53 53 testedwith = b'ships-with-hg-core'
54 54
55 55 # storage format version; increment when the format changes
56 56 storageversion = 0
57 57
58 58 # namespaces
59 59 bookmarktype = b'bookmark'
60 60 wdirparenttype = b'wdirparent'
61 61 # In a shared repository, what shared feature name is used
62 62 # to indicate this namespace is shared with the source?
63 63 sharednamespaces = {
64 64 bookmarktype: hg.sharedbookmarks,
65 65 }
66 66
67 67 # Journal recording, register hooks and storage object
68 68 def extsetup(ui):
69 69 extensions.wrapfunction(dispatch, 'runcommand', runcommand)
70 70 extensions.wrapfunction(bookmarks.bmstore, '_write', recordbookmarks)
71 71 extensions.wrapfilecache(
72 72 localrepo.localrepository, b'dirstate', wrapdirstate
73 73 )
74 74 extensions.wrapfunction(hg, 'postshare', wrappostshare)
75 75 extensions.wrapfunction(hg, 'copystore', unsharejournal)
76 76
77 77
78 78 def reposetup(ui, repo):
79 79 if repo.local():
80 80 repo.journal = journalstorage(repo)
81 81 repo._wlockfreeprefix.add(b'namejournal')
82 82
83 83 dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
84 84 if cached:
85 85 # already instantiated dirstate isn't yet marked as
86 86 # "journal"-ing, even though repo.dirstate() was already
87 87 # wrapped by own wrapdirstate()
88 88 _setupdirstate(repo, dirstate)
89 89
90 90
91 91 def runcommand(orig, lui, repo, cmd, fullargs, *args):
92 92 """Track the command line options for recording in the journal"""
93 93 journalstorage.recordcommand(*fullargs)
94 94 return orig(lui, repo, cmd, fullargs, *args)
95 95
96 96
97 97 def _setupdirstate(repo, dirstate):
98 98 dirstate.journalstorage = repo.journal
99 99 dirstate.addparentchangecallback(b'journal', recorddirstateparents)
100 100
101 101
102 102 # hooks to record dirstate changes
103 103 def wrapdirstate(orig, repo):
104 104 """Make journal storage available to the dirstate object"""
105 105 dirstate = orig(repo)
106 if util.safehasattr(repo, 'journal'):
106 if hasattr(repo, 'journal'):
107 107 _setupdirstate(repo, dirstate)
108 108 return dirstate
109 109
110 110
111 111 def recorddirstateparents(dirstate, old, new):
112 112 """Records all dirstate parent changes in the journal."""
113 113 old = list(old)
114 114 new = list(new)
115 if util.safehasattr(dirstate, 'journalstorage'):
115 if hasattr(dirstate, 'journalstorage'):
116 116 # only record two hashes if there was a merge
117 117 oldhashes = old[:1] if old[1] == dirstate._nodeconstants.nullid else old
118 118 newhashes = new[:1] if new[1] == dirstate._nodeconstants.nullid else new
119 119 dirstate.journalstorage.record(
120 120 wdirparenttype, b'.', oldhashes, newhashes
121 121 )
122 122
123 123
124 124 # hooks to record bookmark changes (both local and remote)
125 125 def recordbookmarks(orig, store, fp):
126 126 """Records all bookmark changes in the journal."""
127 127 repo = store._repo
128 if util.safehasattr(repo, 'journal'):
128 if hasattr(repo, 'journal'):
129 129 oldmarks = bookmarks.bmstore(repo)
130 130 all_marks = set(b for b, n in oldmarks.items())
131 131 all_marks.update(b for b, n in store.items())
132 132 for mark in sorted(all_marks):
133 133 value = store.get(mark, repo.nullid)
134 134 oldvalue = oldmarks.get(mark, repo.nullid)
135 135 if value != oldvalue:
136 136 repo.journal.record(bookmarktype, mark, oldvalue, value)
137 137 return orig(store, fp)
138 138
139 139
140 140 # shared repository support
141 141 def _readsharedfeatures(repo):
142 142 """A set of shared features for this repository"""
143 143 try:
144 144 return set(repo.vfs.read(b'shared').splitlines())
145 145 except FileNotFoundError:
146 146 return set()
147 147
148 148
149 149 def _mergeentriesiter(*iterables, **kwargs):
150 150 """Given a set of sorted iterables, yield the next entry in merged order
151 151
152 152 Note that by default entries go from most recent to oldest.
153 153 """
154 154 order = kwargs.pop('order', max)
155 155 iterables = [iter(it) for it in iterables]
156 156 # this tracks still active iterables; iterables are deleted as they are
157 157 # exhausted, which is why this is a dictionary and why each entry also
158 158 # stores the key. Entries are mutable so we can store the next value each
159 159 # time.
160 160 iterable_map = {}
161 161 for key, it in enumerate(iterables):
162 162 try:
163 163 iterable_map[key] = [next(it), key, it]
164 164 except StopIteration:
165 165 # empty entry, can be ignored
166 166 pass
167 167
168 168 while iterable_map:
169 169 value, key, it = order(iterable_map.values())
170 170 yield value
171 171 try:
172 172 iterable_map[key][0] = next(it)
173 173 except StopIteration:
174 174 # this iterable is empty, remove it from consideration
175 175 del iterable_map[key]
176 176
177 177
178 178 def wrappostshare(orig, sourcerepo, destrepo, **kwargs):
179 179 """Mark this shared working copy as sharing journal information"""
180 180 with destrepo.wlock():
181 181 orig(sourcerepo, destrepo, **kwargs)
182 182 with destrepo.vfs(b'shared', b'a') as fp:
183 183 fp.write(b'journal\n')
184 184
185 185
186 186 def unsharejournal(orig, ui, repo, repopath):
187 187 """Copy shared journal entries into this repo when unsharing"""
188 if (
189 repo.path == repopath
190 and repo.shared()
191 and util.safehasattr(repo, 'journal')
192 ):
188 if repo.path == repopath and repo.shared() and hasattr(repo, 'journal'):
193 189 sharedrepo = hg.sharedreposource(repo)
194 190 sharedfeatures = _readsharedfeatures(repo)
195 191 if sharedrepo and sharedfeatures > {b'journal'}:
196 192 # there is a shared repository and there are shared journal entries
197 193 # to copy. move shared date over from source to destination but
198 194 # move the local file first
199 195 if repo.vfs.exists(b'namejournal'):
200 196 journalpath = repo.vfs.join(b'namejournal')
201 197 util.rename(journalpath, journalpath + b'.bak')
202 198 storage = repo.journal
203 199 local = storage._open(
204 200 repo.vfs, filename=b'namejournal.bak', _newestfirst=False
205 201 )
206 202 shared = (
207 203 e
208 204 for e in storage._open(sharedrepo.vfs, _newestfirst=False)
209 205 if sharednamespaces.get(e.namespace) in sharedfeatures
210 206 )
211 207 for entry in _mergeentriesiter(local, shared, order=min):
212 208 storage._write(repo.vfs, entry)
213 209
214 210 return orig(ui, repo, repopath)
215 211
216 212
217 213 class journalentry(
218 214 collections.namedtuple(
219 215 'journalentry',
220 216 'timestamp user command namespace name oldhashes newhashes',
221 217 )
222 218 ):
223 219 """Individual journal entry
224 220
225 221 * timestamp: a mercurial (time, timezone) tuple
226 222 * user: the username that ran the command
227 223 * namespace: the entry namespace, an opaque string
228 224 * name: the name of the changed item, opaque string with meaning in the
229 225 namespace
230 226 * command: the hg command that triggered this record
231 227 * oldhashes: a tuple of one or more binary hashes for the old location
232 228 * newhashes: a tuple of one or more binary hashes for the new location
233 229
234 230 Handles serialisation from and to the storage format. Fields are
235 231 separated by newlines, hashes are written out in hex separated by commas,
236 232 timestamp and timezone are separated by a space.
237 233
238 234 """
239 235
240 236 @classmethod
241 237 def fromstorage(cls, line):
242 238 (
243 239 time,
244 240 user,
245 241 command,
246 242 namespace,
247 243 name,
248 244 oldhashes,
249 245 newhashes,
250 246 ) = line.split(b'\n')
251 247 timestamp, tz = time.split()
252 248 timestamp, tz = float(timestamp), int(tz)
253 249 oldhashes = tuple(bin(hash) for hash in oldhashes.split(b','))
254 250 newhashes = tuple(bin(hash) for hash in newhashes.split(b','))
255 251 return cls(
256 252 (timestamp, tz),
257 253 user,
258 254 command,
259 255 namespace,
260 256 name,
261 257 oldhashes,
262 258 newhashes,
263 259 )
264 260
265 261 def __bytes__(self):
266 262 """bytes representation for storage"""
267 263 time = b' '.join(map(pycompat.bytestr, self.timestamp))
268 264 oldhashes = b','.join([hex(hash) for hash in self.oldhashes])
269 265 newhashes = b','.join([hex(hash) for hash in self.newhashes])
270 266 return b'\n'.join(
271 267 (
272 268 time,
273 269 self.user,
274 270 self.command,
275 271 self.namespace,
276 272 self.name,
277 273 oldhashes,
278 274 newhashes,
279 275 )
280 276 )
281 277
282 278 __str__ = encoding.strmethod(__bytes__)
283 279
284 280
285 281 class journalstorage:
286 282 """Storage for journal entries
287 283
288 284 Entries are divided over two files; one with entries that pertain to the
289 285 local working copy *only*, and one with entries that are shared across
290 286 multiple working copies when shared using the share extension.
291 287
292 288 Entries are stored with NUL bytes as separators. See the journalentry
293 289 class for the per-entry structure.
294 290
295 291 The file format starts with an integer version, delimited by a NUL.
296 292
297 293 This storage uses a dedicated lock; this makes it easier to avoid issues
298 294 with adding entries that added when the regular wlock is unlocked (e.g.
299 295 the dirstate).
300 296
301 297 """
302 298
303 299 _currentcommand = ()
304 300 _lockref = None
305 301
306 302 def __init__(self, repo):
307 303 self.user = procutil.getuser()
308 304 self.ui = repo.ui
309 305 self.vfs = repo.vfs
310 306
311 307 # is this working copy using a shared storage?
312 308 self.sharedfeatures = self.sharedvfs = None
313 309 if repo.shared():
314 310 features = _readsharedfeatures(repo)
315 311 sharedrepo = hg.sharedreposource(repo)
316 312 if sharedrepo is not None and b'journal' in features:
317 313 self.sharedvfs = sharedrepo.vfs
318 314 self.sharedfeatures = features
319 315
320 316 # track the current command for recording in journal entries
321 317 @property
322 318 def command(self):
323 319 commandstr = b' '.join(
324 320 map(procutil.shellquote, journalstorage._currentcommand)
325 321 )
326 322 if b'\n' in commandstr:
327 323 # truncate multi-line commands
328 324 commandstr = commandstr.partition(b'\n')[0] + b' ...'
329 325 return commandstr
330 326
331 327 @classmethod
332 328 def recordcommand(cls, *fullargs):
333 329 """Set the current hg arguments, stored with recorded entries"""
334 330 # Set the current command on the class because we may have started
335 331 # with a non-local repo (cloning for example).
336 332 cls._currentcommand = fullargs
337 333
338 334 def _currentlock(self, lockref):
339 335 """Returns the lock if it's held, or None if it's not.
340 336
341 337 (This is copied from the localrepo class)
342 338 """
343 339 if lockref is None:
344 340 return None
345 341 l = lockref()
346 342 if l is None or not l.held:
347 343 return None
348 344 return l
349 345
350 346 def jlock(self, vfs):
351 347 """Create a lock for the journal file"""
352 348 if self._currentlock(self._lockref) is not None:
353 349 raise error.Abort(_(b'journal lock does not support nesting'))
354 350 desc = _(b'journal of %s') % vfs.base
355 351 try:
356 352 l = lock.lock(vfs, b'namejournal.lock', 0, desc=desc)
357 353 except error.LockHeld as inst:
358 354 self.ui.warn(
359 355 _(b"waiting for lock on %s held by %r\n") % (desc, inst.locker)
360 356 )
361 357 # default to 600 seconds timeout
362 358 l = lock.lock(
363 359 vfs,
364 360 b'namejournal.lock',
365 361 self.ui.configint(b"ui", b"timeout"),
366 362 desc=desc,
367 363 )
368 364 self.ui.warn(_(b"got lock after %s seconds\n") % l.delay)
369 365 self._lockref = weakref.ref(l)
370 366 return l
371 367
372 368 def record(self, namespace, name, oldhashes, newhashes):
373 369 """Record a new journal entry
374 370
375 371 * namespace: an opaque string; this can be used to filter on the type
376 372 of recorded entries.
377 373 * name: the name defining this entry; for bookmarks, this is the
378 374 bookmark name. Can be filtered on when retrieving entries.
379 375 * oldhashes and newhashes: each a single binary hash, or a list of
380 376 binary hashes. These represent the old and new position of the named
381 377 item.
382 378
383 379 """
384 380 if not isinstance(oldhashes, list):
385 381 oldhashes = [oldhashes]
386 382 if not isinstance(newhashes, list):
387 383 newhashes = [newhashes]
388 384
389 385 entry = journalentry(
390 386 dateutil.makedate(),
391 387 self.user,
392 388 self.command,
393 389 namespace,
394 390 name,
395 391 oldhashes,
396 392 newhashes,
397 393 )
398 394
399 395 vfs = self.vfs
400 396 if self.sharedvfs is not None:
401 397 # write to the shared repository if this feature is being
402 398 # shared between working copies.
403 399 if sharednamespaces.get(namespace) in self.sharedfeatures:
404 400 vfs = self.sharedvfs
405 401
406 402 self._write(vfs, entry)
407 403
408 404 def _write(self, vfs, entry):
409 405 with self.jlock(vfs):
410 406 # open file in amend mode to ensure it is created if missing
411 407 with vfs(b'namejournal', mode=b'a+b') as f:
412 408 f.seek(0, os.SEEK_SET)
413 409 # Read just enough bytes to get a version number (up to 2
414 410 # digits plus separator)
415 411 version = f.read(3).partition(b'\0')[0]
416 412 if version and version != b"%d" % storageversion:
417 413 # different version of the storage. Exit early (and not
418 414 # write anything) if this is not a version we can handle or
419 415 # the file is corrupt. In future, perhaps rotate the file
420 416 # instead?
421 417 self.ui.warn(
422 418 _(b"unsupported journal file version '%s'\n") % version
423 419 )
424 420 return
425 421 if not version:
426 422 # empty file, write version first
427 423 f.write((b"%d" % storageversion) + b'\0')
428 424 f.seek(0, os.SEEK_END)
429 425 f.write(bytes(entry) + b'\0')
430 426
431 427 def filtered(self, namespace=None, name=None):
432 428 """Yield all journal entries with the given namespace or name
433 429
434 430 Both the namespace and the name are optional; if neither is given all
435 431 entries in the journal are produced.
436 432
437 433 Matching supports regular expressions by using the `re:` prefix
438 434 (use `literal:` to match names or namespaces that start with `re:`)
439 435
440 436 """
441 437 if namespace is not None:
442 438 namespace = stringutil.stringmatcher(namespace)[-1]
443 439 if name is not None:
444 440 name = stringutil.stringmatcher(name)[-1]
445 441 for entry in self:
446 442 if namespace is not None and not namespace(entry.namespace):
447 443 continue
448 444 if name is not None and not name(entry.name):
449 445 continue
450 446 yield entry
451 447
452 448 def __iter__(self):
453 449 """Iterate over the storage
454 450
455 451 Yields journalentry instances for each contained journal record.
456 452
457 453 """
458 454 local = self._open(self.vfs)
459 455
460 456 if self.sharedvfs is None:
461 457 return local
462 458
463 459 # iterate over both local and shared entries, but only those
464 460 # shared entries that are among the currently shared features
465 461 shared = (
466 462 e
467 463 for e in self._open(self.sharedvfs)
468 464 if sharednamespaces.get(e.namespace) in self.sharedfeatures
469 465 )
470 466 return _mergeentriesiter(local, shared)
471 467
472 468 def _open(self, vfs, filename=b'namejournal', _newestfirst=True):
473 469 if not vfs.exists(filename):
474 470 return
475 471
476 472 with vfs(filename) as f:
477 473 raw = f.read()
478 474
479 475 lines = raw.split(b'\0')
480 476 version = lines and lines[0]
481 477 if version != b"%d" % storageversion:
482 478 version = version or _(b'not available')
483 479 raise error.Abort(_(b"unknown journal file version '%s'") % version)
484 480
485 481 # Skip the first line, it's a version number. Normally we iterate over
486 482 # these in reverse order to list newest first; only when copying across
487 483 # a shared storage do we forgo reversing.
488 484 lines = lines[1:]
489 485 if _newestfirst:
490 486 lines = reversed(lines)
491 487 for line in lines:
492 488 if not line:
493 489 continue
494 490 yield journalentry.fromstorage(line)
495 491
496 492
497 493 # journal reading
498 494 # log options that don't make sense for journal
499 495 _ignoreopts = (b'no-merges', b'graph')
500 496
501 497
502 498 @command(
503 499 b'journal',
504 500 [
505 501 (b'', b'all', None, b'show history for all names'),
506 502 (b'c', b'commits', None, b'show commit metadata'),
507 503 ]
508 504 + [opt for opt in cmdutil.logopts if opt[1] not in _ignoreopts],
509 505 b'[OPTION]... [BOOKMARKNAME]',
510 506 helpcategory=command.CATEGORY_CHANGE_ORGANIZATION,
511 507 )
512 508 def journal(ui, repo, *args, **opts):
513 509 """show the previous position of bookmarks and the working copy
514 510
515 511 The journal is used to see the previous commits that bookmarks and the
516 512 working copy pointed to. By default the previous locations for the working
517 513 copy. Passing a bookmark name will show all the previous positions of
518 514 that bookmark. Use the --all switch to show previous locations for all
519 515 bookmarks and the working copy; each line will then include the bookmark
520 516 name, or '.' for the working copy, as well.
521 517
522 518 If `name` starts with `re:`, the remainder of the name is treated as
523 519 a regular expression. To match a name that actually starts with `re:`,
524 520 use the prefix `literal:`.
525 521
526 522 By default hg journal only shows the commit hash and the command that was
527 523 running at that time. -v/--verbose will show the prior hash, the user, and
528 524 the time at which it happened.
529 525
530 526 Use -c/--commits to output log information on each commit hash; at this
531 527 point you can use the usual `--patch`, `--git`, `--stat` and `--template`
532 528 switches to alter the log output for these.
533 529
534 530 `hg journal -T json` can be used to produce machine readable output.
535 531
536 532 """
537 533 opts = pycompat.byteskwargs(opts)
538 534 name = b'.'
539 535 if opts.get(b'all'):
540 536 if args:
541 537 raise error.Abort(
542 538 _(b"You can't combine --all and filtering on a name")
543 539 )
544 540 name = None
545 541 if args:
546 542 name = args[0]
547 543
548 544 fm = ui.formatter(b'journal', opts)
549 545
550 546 def formatnodes(nodes):
551 547 return fm.formatlist(map(fm.hexfunc, nodes), name=b'node', sep=b',')
552 548
553 549 if opts.get(b"template") != b"json":
554 550 if name is None:
555 551 displayname = _(b'the working copy and bookmarks')
556 552 else:
557 553 displayname = b"'%s'" % name
558 554 ui.status(_(b"previous locations of %s:\n") % displayname)
559 555
560 556 limit = logcmdutil.getlimit(opts)
561 557 entry = None
562 558 ui.pager(b'journal')
563 559 for count, entry in enumerate(repo.journal.filtered(name=name)):
564 560 if count == limit:
565 561 break
566 562
567 563 fm.startitem()
568 564 fm.condwrite(
569 565 ui.verbose, b'oldnodes', b'%s -> ', formatnodes(entry.oldhashes)
570 566 )
571 567 fm.write(b'newnodes', b'%s', formatnodes(entry.newhashes))
572 568 fm.condwrite(ui.verbose, b'user', b' %-8s', entry.user)
573 569
574 570 # ``name`` is bytes, or None only if 'all' was an option.
575 571 fm.condwrite(
576 572 # pytype: disable=attribute-error
577 573 opts.get(b'all') or name.startswith(b're:'),
578 574 # pytype: enable=attribute-error
579 575 b'name',
580 576 b' %-8s',
581 577 entry.name,
582 578 )
583 579
584 580 fm.condwrite(
585 581 ui.verbose,
586 582 b'date',
587 583 b' %s',
588 584 fm.formatdate(entry.timestamp, b'%Y-%m-%d %H:%M %1%2'),
589 585 )
590 586 fm.write(b'command', b' %s\n', entry.command)
591 587
592 588 if opts.get(b"commits"):
593 589 if fm.isplain():
594 590 displayer = logcmdutil.changesetdisplayer(ui, repo, opts)
595 591 else:
596 592 displayer = logcmdutil.changesetformatter(
597 593 ui, repo, fm.nested(b'changesets'), diffopts=opts
598 594 )
599 595 for hash in entry.newhashes:
600 596 try:
601 597 ctx = repo[hash]
602 598 displayer.show(ctx)
603 599 except error.RepoLookupError as e:
604 600 fm.plain(b"%s\n\n" % pycompat.bytestr(e))
605 601 displayer.close()
606 602
607 603 fm.end()
608 604
609 605 if entry is None:
610 606 ui.status(_(b"no recorded locations\n"))
@@ -1,823 +1,823 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import contextlib
12 12 import copy
13 13 import os
14 14 import stat
15 15
16 16 from mercurial.i18n import _
17 17 from mercurial.node import hex
18 18 from mercurial.pycompat import open
19 19
20 20 from mercurial import (
21 21 dirstate,
22 22 encoding,
23 23 error,
24 24 httpconnection,
25 25 match as matchmod,
26 26 pycompat,
27 27 requirements,
28 28 scmutil,
29 29 sparse,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33 from mercurial.utils import hashutil
34 34 from mercurial.dirstateutils import timestamp
35 35
36 36 shortname = b'.hglf'
37 37 shortnameslash = shortname + b'/'
38 38 longname = b'largefiles'
39 39
40 40 # -- Private worker functions ------------------------------------------
41 41
42 42
43 43 @contextlib.contextmanager
44 44 def lfstatus(repo, value=True):
45 45 oldvalue = getattr(repo, 'lfstatus', False)
46 46 repo.lfstatus = value
47 47 try:
48 48 yield
49 49 finally:
50 50 repo.lfstatus = oldvalue
51 51
52 52
53 53 def getminsize(ui, assumelfiles, opt, default=10):
54 54 lfsize = opt
55 55 if not lfsize and assumelfiles:
56 56 lfsize = ui.config(longname, b'minsize', default=default)
57 57 if lfsize:
58 58 try:
59 59 lfsize = float(lfsize)
60 60 except ValueError:
61 61 raise error.Abort(
62 62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 63 )
64 64 if lfsize is None:
65 65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 66 return lfsize
67 67
68 68
69 69 def link(src, dest):
70 70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 71 util.makedirs(os.path.dirname(dest))
72 72 try:
73 73 util.oslink(src, dest)
74 74 except OSError:
75 75 # if hardlinks fail, fallback on atomic copy
76 76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 77 for chunk in util.filechunkiter(srcf):
78 78 dstf.write(chunk)
79 79 os.chmod(dest, os.stat(src).st_mode)
80 80
81 81
82 82 def usercachepath(ui, hash):
83 83 """Return the correct location in the "global" largefiles cache for a file
84 84 with the given hash.
85 85 This cache is used for sharing of largefiles across repositories - both
86 86 to preserve download bandwidth and storage space."""
87 87 return os.path.join(_usercachedir(ui), hash)
88 88
89 89
90 90 def _usercachedir(ui, name=longname):
91 91 '''Return the location of the "global" largefiles cache.'''
92 92 path = ui.configpath(name, b'usercache')
93 93 if path:
94 94 return path
95 95
96 96 hint = None
97 97
98 98 if pycompat.iswindows:
99 99 appdata = encoding.environ.get(
100 100 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
101 101 )
102 102 if appdata:
103 103 return os.path.join(appdata, name)
104 104
105 105 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
106 106 b"LOCALAPPDATA",
107 107 b"APPDATA",
108 108 name,
109 109 )
110 110 elif pycompat.isdarwin:
111 111 home = encoding.environ.get(b'HOME')
112 112 if home:
113 113 return os.path.join(home, b'Library', b'Caches', name)
114 114
115 115 hint = _(b"define %s in the environment, or set %s.usercache") % (
116 116 b"HOME",
117 117 name,
118 118 )
119 119 elif pycompat.isposix:
120 120 path = encoding.environ.get(b'XDG_CACHE_HOME')
121 121 if path:
122 122 return os.path.join(path, name)
123 123 home = encoding.environ.get(b'HOME')
124 124 if home:
125 125 return os.path.join(home, b'.cache', name)
126 126
127 127 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
128 128 b"XDG_CACHE_HOME",
129 129 b"HOME",
130 130 name,
131 131 )
132 132 else:
133 133 raise error.Abort(
134 134 _(b'unknown operating system: %s\n') % pycompat.osname
135 135 )
136 136
137 137 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
138 138
139 139
140 140 def inusercache(ui, hash):
141 141 path = usercachepath(ui, hash)
142 142 return os.path.exists(path)
143 143
144 144
145 145 def findfile(repo, hash):
146 146 """Return store path of the largefile with the specified hash.
147 147 As a side effect, the file might be linked from user cache.
148 148 Return None if the file can't be found locally."""
149 149 path, exists = findstorepath(repo, hash)
150 150 if exists:
151 151 repo.ui.note(_(b'found %s in store\n') % hash)
152 152 return path
153 153 elif inusercache(repo.ui, hash):
154 154 repo.ui.note(_(b'found %s in system cache\n') % hash)
155 155 path = storepath(repo, hash)
156 156 link(usercachepath(repo.ui, hash), path)
157 157 return path
158 158 return None
159 159
160 160
161 161 class largefilesdirstate(dirstate.dirstate):
162 162 _large_file_dirstate = True
163 163 _tr_key_suffix = b'-large-files'
164 164
165 165 def __getitem__(self, key):
166 166 return super(largefilesdirstate, self).__getitem__(unixpath(key))
167 167
168 168 def set_tracked(self, f):
169 169 return super(largefilesdirstate, self).set_tracked(unixpath(f))
170 170
171 171 def set_untracked(self, f):
172 172 return super(largefilesdirstate, self).set_untracked(unixpath(f))
173 173
174 174 def normal(self, f, parentfiledata=None):
175 175 # not sure if we should pass the `parentfiledata` down or throw it
176 176 # away. So throwing it away to stay on the safe side.
177 177 return super(largefilesdirstate, self).normal(unixpath(f))
178 178
179 179 def remove(self, f):
180 180 return super(largefilesdirstate, self).remove(unixpath(f))
181 181
182 182 def add(self, f):
183 183 return super(largefilesdirstate, self).add(unixpath(f))
184 184
185 185 def drop(self, f):
186 186 return super(largefilesdirstate, self).drop(unixpath(f))
187 187
188 188 def forget(self, f):
189 189 return super(largefilesdirstate, self).forget(unixpath(f))
190 190
191 191 def normallookup(self, f):
192 192 return super(largefilesdirstate, self).normallookup(unixpath(f))
193 193
194 194 def _ignore(self, f):
195 195 return False
196 196
197 197 def write(self, tr):
198 198 # (1) disable PENDING mode always
199 199 # (lfdirstate isn't yet managed as a part of the transaction)
200 200 # (2) avoid develwarn 'use dirstate.write with ....'
201 201 if tr:
202 202 tr.addbackup(b'largefiles/dirstate', location=b'plain')
203 203 super(largefilesdirstate, self).write(None)
204 204
205 205
206 206 def openlfdirstate(ui, repo, create=True):
207 207 """
208 208 Return a dirstate object that tracks largefiles: i.e. its root is
209 209 the repo root, but it is saved in .hg/largefiles/dirstate.
210 210
211 211 If a dirstate object already exists and is being used for a 'changing_*'
212 212 context, it will be returned.
213 213 """
214 214 sub_dirstate = getattr(repo.dirstate, '_sub_dirstate', None)
215 215 if sub_dirstate is not None:
216 216 return sub_dirstate
217 217 vfs = repo.vfs
218 218 lfstoredir = longname
219 219 opener = vfsmod.vfs(vfs.join(lfstoredir))
220 220 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
221 221 lfdirstate = largefilesdirstate(
222 222 opener,
223 223 ui,
224 224 repo.root,
225 225 repo.dirstate._validate,
226 226 lambda: sparse.matcher(repo),
227 227 repo.nodeconstants,
228 228 use_dirstate_v2,
229 229 )
230 230
231 231 # If the largefiles dirstate does not exist, populate and create
232 232 # it. This ensures that we create it on the first meaningful
233 233 # largefiles operation in a new clone.
234 234 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
235 235 try:
236 236 with repo.wlock(wait=False), lfdirstate.changing_files(repo):
237 237 matcher = getstandinmatcher(repo)
238 238 standins = repo.dirstate.walk(
239 239 matcher, subrepos=[], unknown=False, ignored=False
240 240 )
241 241
242 242 if len(standins) > 0:
243 243 vfs.makedirs(lfstoredir)
244 244
245 245 for standin in standins:
246 246 lfile = splitstandin(standin)
247 247 lfdirstate.hacky_extension_update_file(
248 248 lfile,
249 249 p1_tracked=True,
250 250 wc_tracked=True,
251 251 possibly_dirty=True,
252 252 )
253 253 except error.LockError:
254 254 # Assume that whatever was holding the lock was important.
255 255 # If we were doing something important, we would already have
256 256 # either the lock or a largefile dirstate.
257 257 pass
258 258 return lfdirstate
259 259
260 260
261 261 def lfdirstatestatus(lfdirstate, repo):
262 262 pctx = repo[b'.']
263 263 match = matchmod.always()
264 264 unsure, s, mtime_boundary = lfdirstate.status(
265 265 match, subrepos=[], ignored=False, clean=False, unknown=False
266 266 )
267 267 modified, clean = s.modified, s.clean
268 268 wctx = repo[None]
269 269 for lfile in unsure:
270 270 try:
271 271 fctx = pctx[standin(lfile)]
272 272 except LookupError:
273 273 fctx = None
274 274 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
275 275 modified.append(lfile)
276 276 else:
277 277 clean.append(lfile)
278 278 st = wctx[lfile].lstat()
279 279 mode = st.st_mode
280 280 size = st.st_size
281 281 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
282 282 if mtime is not None:
283 283 cache_data = (mode, size, mtime)
284 284 lfdirstate.set_clean(lfile, cache_data)
285 285 return s
286 286
287 287
288 288 def listlfiles(repo, rev=None, matcher=None):
289 289 """return a list of largefiles in the working copy or the
290 290 specified changeset"""
291 291
292 292 if matcher is None:
293 293 matcher = getstandinmatcher(repo)
294 294
295 295 # ignore unknown files in working directory
296 296 return [
297 297 splitstandin(f)
298 298 for f in repo[rev].walk(matcher)
299 299 if rev is not None or repo.dirstate.get_entry(f).any_tracked
300 300 ]
301 301
302 302
303 303 def instore(repo, hash, forcelocal=False):
304 304 '''Return true if a largefile with the given hash exists in the store'''
305 305 return os.path.exists(storepath(repo, hash, forcelocal))
306 306
307 307
308 308 def storepath(repo, hash, forcelocal=False):
309 309 """Return the correct location in the repository largefiles store for a
310 310 file with the given hash."""
311 311 if not forcelocal and repo.shared():
312 312 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
313 313 return repo.vfs.join(longname, hash)
314 314
315 315
316 316 def findstorepath(repo, hash):
317 317 """Search through the local store path(s) to find the file for the given
318 318 hash. If the file is not found, its path in the primary store is returned.
319 319 The return value is a tuple of (path, exists(path)).
320 320 """
321 321 # For shared repos, the primary store is in the share source. But for
322 322 # backward compatibility, force a lookup in the local store if it wasn't
323 323 # found in the share source.
324 324 path = storepath(repo, hash, False)
325 325
326 326 if instore(repo, hash):
327 327 return (path, True)
328 328 elif repo.shared() and instore(repo, hash, True):
329 329 return storepath(repo, hash, True), True
330 330
331 331 return (path, False)
332 332
333 333
334 334 def copyfromcache(repo, hash, filename):
335 335 """Copy the specified largefile from the repo or system cache to
336 336 filename in the repository. Return true on success or false if the
337 337 file was not found in either cache (which should not happened:
338 338 this is meant to be called only after ensuring that the needed
339 339 largefile exists in the cache)."""
340 340 wvfs = repo.wvfs
341 341 path = findfile(repo, hash)
342 342 if path is None:
343 343 return False
344 344 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
345 345 # The write may fail before the file is fully written, but we
346 346 # don't use atomic writes in the working copy.
347 347 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
348 348 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
349 349 if gothash != hash:
350 350 repo.ui.warn(
351 351 _(b'%s: data corruption in %s with hash %s\n')
352 352 % (filename, path, gothash)
353 353 )
354 354 wvfs.unlink(filename)
355 355 return False
356 356 return True
357 357
358 358
359 359 def copytostore(repo, ctx, file, fstandin):
360 360 wvfs = repo.wvfs
361 361 hash = readasstandin(ctx[fstandin])
362 362 if instore(repo, hash):
363 363 return
364 364 if wvfs.exists(file):
365 365 copytostoreabsolute(repo, wvfs.join(file), hash)
366 366 else:
367 367 repo.ui.warn(
368 368 _(b"%s: largefile %s not available from local store\n")
369 369 % (file, hash)
370 370 )
371 371
372 372
373 373 def copyalltostore(repo, node):
374 374 '''Copy all largefiles in a given revision to the store'''
375 375
376 376 ctx = repo[node]
377 377 for filename in ctx.files():
378 378 realfile = splitstandin(filename)
379 379 if realfile is not None and filename in ctx.manifest():
380 380 copytostore(repo, ctx, realfile, filename)
381 381
382 382
383 383 def copytostoreabsolute(repo, file, hash):
384 384 if inusercache(repo.ui, hash):
385 385 link(usercachepath(repo.ui, hash), storepath(repo, hash))
386 386 else:
387 387 util.makedirs(os.path.dirname(storepath(repo, hash)))
388 388 with open(file, b'rb') as srcf:
389 389 with util.atomictempfile(
390 390 storepath(repo, hash), createmode=repo.store.createmode
391 391 ) as dstf:
392 392 for chunk in util.filechunkiter(srcf):
393 393 dstf.write(chunk)
394 394 linktousercache(repo, hash)
395 395
396 396
397 397 def linktousercache(repo, hash):
398 398 """Link / copy the largefile with the specified hash from the store
399 399 to the cache."""
400 400 path = usercachepath(repo.ui, hash)
401 401 link(storepath(repo, hash), path)
402 402
403 403
404 404 def getstandinmatcher(repo, rmatcher=None):
405 405 '''Return a match object that applies rmatcher to the standin directory'''
406 406 wvfs = repo.wvfs
407 407 standindir = shortname
408 408
409 409 # no warnings about missing files or directories
410 410 badfn = lambda f, msg: None
411 411
412 412 if rmatcher and not rmatcher.always():
413 413 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
414 414 if not pats:
415 415 pats = [wvfs.join(standindir)]
416 416 match = scmutil.match(repo[None], pats, badfn=badfn)
417 417 else:
418 418 # no patterns: relative to repo root
419 419 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
420 420 return match
421 421
422 422
423 423 def composestandinmatcher(repo, rmatcher):
424 424 """Return a matcher that accepts standins corresponding to the
425 425 files accepted by rmatcher. Pass the list of files in the matcher
426 426 as the paths specified by the user."""
427 427 smatcher = getstandinmatcher(repo, rmatcher)
428 428 isstandin = smatcher.matchfn
429 429
430 430 def composedmatchfn(f):
431 431 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
432 432
433 433 smatcher.matchfn = composedmatchfn
434 434
435 435 return smatcher
436 436
437 437
438 438 def standin(filename):
439 439 """Return the repo-relative path to the standin for the specified big
440 440 file."""
441 441 # Notes:
442 442 # 1) Some callers want an absolute path, but for instance addlargefiles
443 443 # needs it repo-relative so it can be passed to repo[None].add(). So
444 444 # leave it up to the caller to use repo.wjoin() to get an absolute path.
445 445 # 2) Join with '/' because that's what dirstate always uses, even on
446 446 # Windows. Change existing separator to '/' first in case we are
447 447 # passed filenames from an external source (like the command line).
448 448 return shortnameslash + util.pconvert(filename)
449 449
450 450
451 451 def isstandin(filename):
452 452 """Return true if filename is a big file standin. filename must be
453 453 in Mercurial's internal form (slash-separated)."""
454 454 return filename.startswith(shortnameslash)
455 455
456 456
457 457 def splitstandin(filename):
458 458 # Split on / because that's what dirstate always uses, even on Windows.
459 459 # Change local separator to / first just in case we are passed filenames
460 460 # from an external source (like the command line).
461 461 bits = util.pconvert(filename).split(b'/', 1)
462 462 if len(bits) == 2 and bits[0] == shortname:
463 463 return bits[1]
464 464 else:
465 465 return None
466 466
467 467
468 468 def updatestandin(repo, lfile, standin):
469 469 """Re-calculate hash value of lfile and write it into standin
470 470
471 471 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
472 472 """
473 473 file = repo.wjoin(lfile)
474 474 if repo.wvfs.exists(lfile):
475 475 hash = hashfile(file)
476 476 executable = getexecutable(file)
477 477 writestandin(repo, standin, hash, executable)
478 478 else:
479 479 raise error.Abort(_(b'%s: file not found!') % lfile)
480 480
481 481
482 482 def readasstandin(fctx):
483 483 """read hex hash from given filectx of standin file
484 484
485 485 This encapsulates how "standin" data is stored into storage layer."""
486 486 return fctx.data().strip()
487 487
488 488
489 489 def writestandin(repo, standin, hash, executable):
490 490 '''write hash to <repo.root>/<standin>'''
491 491 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
492 492
493 493
494 494 def copyandhash(instream, outfile):
495 495 """Read bytes from instream (iterable) and write them to outfile,
496 496 computing the SHA-1 hash of the data along the way. Return the hash."""
497 497 hasher = hashutil.sha1(b'')
498 498 for data in instream:
499 499 hasher.update(data)
500 500 outfile.write(data)
501 501 return hex(hasher.digest())
502 502
503 503
504 504 def hashfile(file):
505 505 if not os.path.exists(file):
506 506 return b''
507 507 with open(file, b'rb') as fd:
508 508 return hexsha1(fd)
509 509
510 510
511 511 def getexecutable(filename):
512 512 mode = os.stat(filename).st_mode
513 513 return (
514 514 (mode & stat.S_IXUSR)
515 515 and (mode & stat.S_IXGRP)
516 516 and (mode & stat.S_IXOTH)
517 517 )
518 518
519 519
520 520 def urljoin(first, second, *arg):
521 521 def join(left, right):
522 522 if not left.endswith(b'/'):
523 523 left += b'/'
524 524 if right.startswith(b'/'):
525 525 right = right[1:]
526 526 return left + right
527 527
528 528 url = join(first, second)
529 529 for a in arg:
530 530 url = join(url, a)
531 531 return url
532 532
533 533
534 534 def hexsha1(fileobj):
535 535 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
536 536 object data"""
537 537 h = hashutil.sha1()
538 538 for chunk in util.filechunkiter(fileobj):
539 539 h.update(chunk)
540 540 return hex(h.digest())
541 541
542 542
543 543 def httpsendfile(ui, filename):
544 544 return httpconnection.httpsendfile(ui, filename, b'rb')
545 545
546 546
547 547 def unixpath(path):
548 548 '''Return a version of path normalized for use with the lfdirstate.'''
549 549 return util.pconvert(os.path.normpath(path))
550 550
551 551
552 552 def islfilesrepo(repo):
553 553 '''Return true if the repo is a largefile repo.'''
554 554 if b'largefiles' in repo.requirements:
555 555 for entry in repo.store.data_entries():
556 556 if entry.is_revlog and shortnameslash in entry.target_id:
557 557 return True
558 558
559 559 return any(openlfdirstate(repo.ui, repo, False))
560 560
561 561
562 562 class storeprotonotcapable(Exception):
563 563 def __init__(self, storetypes):
564 564 self.storetypes = storetypes
565 565
566 566
567 567 def getstandinsstate(repo):
568 568 standins = []
569 569 matcher = getstandinmatcher(repo)
570 570 wctx = repo[None]
571 571 for standin in repo.dirstate.walk(
572 572 matcher, subrepos=[], unknown=False, ignored=False
573 573 ):
574 574 lfile = splitstandin(standin)
575 575 try:
576 576 hash = readasstandin(wctx[standin])
577 577 except IOError:
578 578 hash = None
579 579 standins.append((lfile, hash))
580 580 return standins
581 581
582 582
583 583 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
584 584 lfstandin = standin(lfile)
585 585 if lfstandin not in repo.dirstate:
586 586 lfdirstate.hacky_extension_update_file(
587 587 lfile,
588 588 p1_tracked=False,
589 589 wc_tracked=False,
590 590 )
591 591 else:
592 592 entry = repo.dirstate.get_entry(lfstandin)
593 593 lfdirstate.hacky_extension_update_file(
594 594 lfile,
595 595 wc_tracked=entry.tracked,
596 596 p1_tracked=entry.p1_tracked,
597 597 p2_info=entry.p2_info,
598 598 possibly_dirty=True,
599 599 )
600 600
601 601
602 602 def markcommitted(orig, ctx, node):
603 603 repo = ctx.repo()
604 604
605 605 with repo.dirstate.changing_parents(repo):
606 606 orig(node)
607 607
608 608 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
609 609 # because files coming from the 2nd parent are omitted in the latter.
610 610 #
611 611 # The former should be used to get targets of "synclfdirstate",
612 612 # because such files:
613 613 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
614 614 # - have to be marked as "n" after commit, but
615 615 # - aren't listed in "repo[node].files()"
616 616
617 617 lfdirstate = openlfdirstate(repo.ui, repo)
618 618 for f in ctx.files():
619 619 lfile = splitstandin(f)
620 620 if lfile is not None:
621 621 synclfdirstate(repo, lfdirstate, lfile, False)
622 622
623 623 # As part of committing, copy all of the largefiles into the cache.
624 624 #
625 625 # Using "node" instead of "ctx" implies additional "repo[node]"
626 626 # lookup while copyalltostore(), but can omit redundant check for
627 627 # files comming from the 2nd parent, which should exist in store
628 628 # at merging.
629 629 copyalltostore(repo, node)
630 630
631 631
632 632 def getlfilestoupdate(oldstandins, newstandins):
633 633 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
634 634 filelist = []
635 635 for f in changedstandins:
636 636 if f[0] not in filelist:
637 637 filelist.append(f[0])
638 638 return filelist
639 639
640 640
641 641 def getlfilestoupload(repo, missing, addfunc):
642 642 makeprogress = repo.ui.makeprogress
643 643 with makeprogress(
644 644 _(b'finding outgoing largefiles'),
645 645 unit=_(b'revisions'),
646 646 total=len(missing),
647 647 ) as progress:
648 648 for i, n in enumerate(missing):
649 649 progress.update(i)
650 650 parents = [p for p in repo[n].parents() if p != repo.nullid]
651 651
652 652 with lfstatus(repo, value=False):
653 653 ctx = repo[n]
654 654
655 655 files = set(ctx.files())
656 656 if len(parents) == 2:
657 657 mc = ctx.manifest()
658 658 mp1 = ctx.p1().manifest()
659 659 mp2 = ctx.p2().manifest()
660 660 for f in mp1:
661 661 if f not in mc:
662 662 files.add(f)
663 663 for f in mp2:
664 664 if f not in mc:
665 665 files.add(f)
666 666 for f in mc:
667 667 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
668 668 files.add(f)
669 669 for fn in files:
670 670 if isstandin(fn) and fn in ctx:
671 671 addfunc(fn, readasstandin(ctx[fn]))
672 672
673 673
674 674 def updatestandinsbymatch(repo, match):
675 675 """Update standins in the working directory according to specified match
676 676
677 677 This returns (possibly modified) ``match`` object to be used for
678 678 subsequent commit process.
679 679 """
680 680
681 681 ui = repo.ui
682 682
683 683 # Case 1: user calls commit with no specific files or
684 684 # include/exclude patterns: refresh and commit all files that
685 685 # are "dirty".
686 686 if match is None or match.always():
687 687 # Spend a bit of time here to get a list of files we know
688 688 # are modified so we can compare only against those.
689 689 # It can cost a lot of time (several seconds)
690 690 # otherwise to update all standins if the largefiles are
691 691 # large.
692 692 dirtymatch = matchmod.always()
693 693 with repo.dirstate.running_status(repo):
694 694 lfdirstate = openlfdirstate(ui, repo)
695 695 unsure, s, mtime_boundary = lfdirstate.status(
696 696 dirtymatch,
697 697 subrepos=[],
698 698 ignored=False,
699 699 clean=False,
700 700 unknown=False,
701 701 )
702 702 modifiedfiles = unsure + s.modified + s.added + s.removed
703 703 lfiles = listlfiles(repo)
704 704 # this only loops through largefiles that exist (not
705 705 # removed/renamed)
706 706 for lfile in lfiles:
707 707 if lfile in modifiedfiles:
708 708 fstandin = standin(lfile)
709 709 if repo.wvfs.exists(fstandin):
710 710 # this handles the case where a rebase is being
711 711 # performed and the working copy is not updated
712 712 # yet.
713 713 if repo.wvfs.exists(lfile):
714 714 updatestandin(repo, lfile, fstandin)
715 715
716 716 return match
717 717
718 718 lfiles = listlfiles(repo)
719 719 match._files = repo._subdirlfs(match.files(), lfiles)
720 720
721 721 # Case 2: user calls commit with specified patterns: refresh
722 722 # any matching big files.
723 723 smatcher = composestandinmatcher(repo, match)
724 724 standins = repo.dirstate.walk(
725 725 smatcher, subrepos=[], unknown=False, ignored=False
726 726 )
727 727
728 728 # No matching big files: get out of the way and pass control to
729 729 # the usual commit() method.
730 730 if not standins:
731 731 return match
732 732
733 733 # Refresh all matching big files. It's possible that the
734 734 # commit will end up failing, in which case the big files will
735 735 # stay refreshed. No harm done: the user modified them and
736 736 # asked to commit them, so sooner or later we're going to
737 737 # refresh the standins. Might as well leave them refreshed.
738 738 lfdirstate = openlfdirstate(ui, repo)
739 739 for fstandin in standins:
740 740 lfile = splitstandin(fstandin)
741 741 if lfdirstate.get_entry(lfile).tracked:
742 742 updatestandin(repo, lfile, fstandin)
743 743
744 744 # Cook up a new matcher that only matches regular files or
745 745 # standins corresponding to the big files requested by the
746 746 # user. Have to modify _files to prevent commit() from
747 747 # complaining "not tracked" for big files.
748 748 match = copy.copy(match)
749 749 origmatchfn = match.matchfn
750 750
751 751 # Check both the list of largefiles and the list of
752 752 # standins because if a largefile was removed, it
753 753 # won't be in the list of largefiles at this point
754 754 match._files += sorted(standins)
755 755
756 756 actualfiles = []
757 757 for f in match._files:
758 758 fstandin = standin(f)
759 759
760 760 # For largefiles, only one of the normal and standin should be
761 761 # committed (except if one of them is a remove). In the case of a
762 762 # standin removal, drop the normal file if it is unknown to dirstate.
763 763 # Thus, skip plain largefile names but keep the standin.
764 764 if f in lfiles or fstandin in standins:
765 765 if not repo.dirstate.get_entry(fstandin).removed:
766 766 if not repo.dirstate.get_entry(f).removed:
767 767 continue
768 768 elif not repo.dirstate.get_entry(f).any_tracked:
769 769 continue
770 770
771 771 actualfiles.append(f)
772 772 match._files = actualfiles
773 773
774 774 def matchfn(f):
775 775 if origmatchfn(f):
776 776 return f not in lfiles
777 777 else:
778 778 return f in standins
779 779
780 780 match.matchfn = matchfn
781 781
782 782 return match
783 783
784 784
785 785 class automatedcommithook:
786 786 """Stateful hook to update standins at the 1st commit of resuming
787 787
788 788 For efficiency, updating standins in the working directory should
789 789 be avoided while automated committing (like rebase, transplant and
790 790 so on), because they should be updated before committing.
791 791
792 792 But the 1st commit of resuming automated committing (e.g. ``rebase
793 793 --continue``) should update them, because largefiles may be
794 794 modified manually.
795 795 """
796 796
797 797 def __init__(self, resuming):
798 798 self.resuming = resuming
799 799
800 800 def __call__(self, repo, match):
801 801 if self.resuming:
802 802 self.resuming = False # avoids updating at subsequent commits
803 803 return updatestandinsbymatch(repo, match)
804 804 else:
805 805 return match
806 806
807 807
808 808 def getstatuswriter(ui, repo, forcibly=None):
809 809 """Return the function to write largefiles specific status out
810 810
811 811 If ``forcibly`` is ``None``, this returns the last element of
812 812 ``repo._lfstatuswriters`` as "default" writer function.
813 813
814 814 Otherwise, this returns the function to always write out (or
815 815 ignore if ``not forcibly``) status.
816 816 """
817 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
817 if forcibly is None and hasattr(repo, '_largefilesenabled'):
818 818 return repo._lfstatuswriters[-1]
819 819 else:
820 820 if forcibly:
821 821 return ui.status # forcibly WRITE OUT
822 822 else:
823 823 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,1924 +1,1924 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''Overridden Mercurial commands and functions for the largefiles extension'''
10 10
11 11 import contextlib
12 12 import copy
13 13 import os
14 14
15 15 from mercurial.i18n import _
16 16
17 17 from mercurial.pycompat import open
18 18
19 19 from mercurial.hgweb import webcommands
20 20
21 21 from mercurial import (
22 22 archival,
23 23 cmdutil,
24 24 copies as copiesmod,
25 25 dirstate,
26 26 error,
27 27 exchange,
28 28 extensions,
29 29 exthelper,
30 30 filemerge,
31 31 hg,
32 32 logcmdutil,
33 33 match as matchmod,
34 34 merge,
35 35 mergestate as mergestatemod,
36 36 pathutil,
37 37 pycompat,
38 38 scmutil,
39 39 smartset,
40 40 subrepo,
41 41 url as urlmod,
42 42 util,
43 43 )
44 44
45 45 from mercurial.upgrade_utils import (
46 46 actions as upgrade_actions,
47 47 )
48 48
49 49 from . import (
50 50 lfcommands,
51 51 lfutil,
52 52 storefactory,
53 53 )
54 54
55 55 ACTION_ADD = mergestatemod.ACTION_ADD
56 56 ACTION_DELETED_CHANGED = mergestatemod.ACTION_DELETED_CHANGED
57 57 ACTION_GET = mergestatemod.ACTION_GET
58 58 ACTION_KEEP = mergestatemod.ACTION_KEEP
59 59 ACTION_REMOVE = mergestatemod.ACTION_REMOVE
60 60
61 61 eh = exthelper.exthelper()
62 62
63 63 lfstatus = lfutil.lfstatus
64 64
65 65 MERGE_ACTION_LARGEFILE_MARK_REMOVED = mergestatemod.MergeAction('lfmr')
66 66
67 67 # -- Utility functions: commonly/repeatedly needed functionality ---------------
68 68
69 69
70 70 def composelargefilematcher(match, manifest):
71 71 """create a matcher that matches only the largefiles in the original
72 72 matcher"""
73 73 m = copy.copy(match)
74 74 lfile = lambda f: lfutil.standin(f) in manifest
75 75 m._files = [lf for lf in m._files if lfile(lf)]
76 76 m._fileset = set(m._files)
77 77 m.always = lambda: False
78 78 origmatchfn = m.matchfn
79 79 m.matchfn = lambda f: lfile(f) and origmatchfn(f)
80 80 return m
81 81
82 82
83 83 def composenormalfilematcher(match, manifest, exclude=None):
84 84 excluded = set()
85 85 if exclude is not None:
86 86 excluded.update(exclude)
87 87
88 88 m = copy.copy(match)
89 89 notlfile = lambda f: not (
90 90 lfutil.isstandin(f) or lfutil.standin(f) in manifest or f in excluded
91 91 )
92 92 m._files = [lf for lf in m._files if notlfile(lf)]
93 93 m._fileset = set(m._files)
94 94 m.always = lambda: False
95 95 origmatchfn = m.matchfn
96 96 m.matchfn = lambda f: notlfile(f) and origmatchfn(f)
97 97 return m
98 98
99 99
100 100 def addlargefiles(ui, repo, isaddremove, matcher, uipathfn, **opts):
101 101 large = opts.get('large')
102 102 lfsize = lfutil.getminsize(
103 103 ui, lfutil.islfilesrepo(repo), opts.get('lfsize')
104 104 )
105 105
106 106 lfmatcher = None
107 107 if lfutil.islfilesrepo(repo):
108 108 lfpats = ui.configlist(lfutil.longname, b'patterns')
109 109 if lfpats:
110 110 lfmatcher = matchmod.match(repo.root, b'', list(lfpats))
111 111
112 112 lfnames = []
113 113 m = matcher
114 114
115 115 wctx = repo[None]
116 116 for f in wctx.walk(matchmod.badmatch(m, lambda x, y: None)):
117 117 exact = m.exact(f)
118 118 lfile = lfutil.standin(f) in wctx
119 119 nfile = f in wctx
120 120 exists = lfile or nfile
121 121
122 122 # Don't warn the user when they attempt to add a normal tracked file.
123 123 # The normal add code will do that for us.
124 124 if exact and exists:
125 125 if lfile:
126 126 ui.warn(_(b'%s already a largefile\n') % uipathfn(f))
127 127 continue
128 128
129 129 if (exact or not exists) and not lfutil.isstandin(f):
130 130 # In case the file was removed previously, but not committed
131 131 # (issue3507)
132 132 if not repo.wvfs.exists(f):
133 133 continue
134 134
135 135 abovemin = (
136 136 lfsize and repo.wvfs.lstat(f).st_size >= lfsize * 1024 * 1024
137 137 )
138 138 if large or abovemin or (lfmatcher and lfmatcher(f)):
139 139 lfnames.append(f)
140 140 if ui.verbose or not exact:
141 141 ui.status(_(b'adding %s as a largefile\n') % uipathfn(f))
142 142
143 143 bad = []
144 144
145 145 # Need to lock, otherwise there could be a race condition between
146 146 # when standins are created and added to the repo.
147 147 with repo.wlock():
148 148 if not opts.get('dry_run'):
149 149 standins = []
150 150 lfdirstate = lfutil.openlfdirstate(ui, repo)
151 151 for f in lfnames:
152 152 standinname = lfutil.standin(f)
153 153 lfutil.writestandin(
154 154 repo,
155 155 standinname,
156 156 hash=b'',
157 157 executable=lfutil.getexecutable(repo.wjoin(f)),
158 158 )
159 159 standins.append(standinname)
160 160 lfdirstate.set_tracked(f)
161 161 lfdirstate.write(repo.currenttransaction())
162 162 bad += [
163 163 lfutil.splitstandin(f)
164 164 for f in repo[None].add(standins)
165 165 if f in m.files()
166 166 ]
167 167
168 168 added = [f for f in lfnames if f not in bad]
169 169 return added, bad
170 170
171 171
172 172 def removelargefiles(ui, repo, isaddremove, matcher, uipathfn, dryrun, **opts):
173 173 after = opts.get('after')
174 174 m = composelargefilematcher(matcher, repo[None].manifest())
175 175 with lfstatus(repo):
176 176 s = repo.status(match=m, clean=not isaddremove)
177 177 manifest = repo[None].manifest()
178 178 modified, added, deleted, clean = [
179 179 [f for f in list if lfutil.standin(f) in manifest]
180 180 for list in (s.modified, s.added, s.deleted, s.clean)
181 181 ]
182 182
183 183 def warn(files, msg):
184 184 for f in files:
185 185 ui.warn(msg % uipathfn(f))
186 186 return int(len(files) > 0)
187 187
188 188 if after:
189 189 remove = deleted
190 190 result = warn(
191 191 modified + added + clean, _(b'not removing %s: file still exists\n')
192 192 )
193 193 else:
194 194 remove = deleted + clean
195 195 result = warn(
196 196 modified,
197 197 _(
198 198 b'not removing %s: file is modified (use -f'
199 199 b' to force removal)\n'
200 200 ),
201 201 )
202 202 result = (
203 203 warn(
204 204 added,
205 205 _(
206 206 b'not removing %s: file has been marked for add'
207 207 b' (use forget to undo)\n'
208 208 ),
209 209 )
210 210 or result
211 211 )
212 212
213 213 # Need to lock because standin files are deleted then removed from the
214 214 # repository and we could race in-between.
215 215 with repo.wlock():
216 216 lfdirstate = lfutil.openlfdirstate(ui, repo)
217 217 for f in sorted(remove):
218 218 if ui.verbose or not m.exact(f):
219 219 ui.status(_(b'removing %s\n') % uipathfn(f))
220 220
221 221 if not dryrun:
222 222 if not after:
223 223 repo.wvfs.unlinkpath(f, ignoremissing=True)
224 224
225 225 if dryrun:
226 226 return result
227 227
228 228 remove = [lfutil.standin(f) for f in remove]
229 229 # If this is being called by addremove, let the original addremove
230 230 # function handle this.
231 231 if not isaddremove:
232 232 for f in remove:
233 233 repo.wvfs.unlinkpath(f, ignoremissing=True)
234 234 repo[None].forget(remove)
235 235
236 236 for f in remove:
237 237 lfdirstate.set_untracked(lfutil.splitstandin(f))
238 238
239 239 lfdirstate.write(repo.currenttransaction())
240 240
241 241 return result
242 242
243 243
244 244 # For overriding mercurial.hgweb.webcommands so that largefiles will
245 245 # appear at their right place in the manifests.
246 246 @eh.wrapfunction(webcommands, 'decodepath')
247 247 def decodepath(orig, path):
248 248 return lfutil.splitstandin(path) or path
249 249
250 250
251 251 # -- Wrappers: modify existing commands --------------------------------
252 252
253 253
254 254 @eh.wrapcommand(
255 255 b'add',
256 256 opts=[
257 257 (b'', b'large', None, _(b'add as largefile')),
258 258 (b'', b'normal', None, _(b'add as normal file')),
259 259 (
260 260 b'',
261 261 b'lfsize',
262 262 b'',
263 263 _(
264 264 b'add all files above this size (in megabytes) '
265 265 b'as largefiles (default: 10)'
266 266 ),
267 267 ),
268 268 ],
269 269 )
270 270 def overrideadd(orig, ui, repo, *pats, **opts):
271 271 if opts.get('normal') and opts.get('large'):
272 272 raise error.Abort(_(b'--normal cannot be used with --large'))
273 273 return orig(ui, repo, *pats, **opts)
274 274
275 275
276 276 @eh.wrapfunction(cmdutil, 'add')
277 277 def cmdutiladd(orig, ui, repo, matcher, prefix, uipathfn, explicitonly, **opts):
278 278 # The --normal flag short circuits this override
279 279 if opts.get('normal'):
280 280 return orig(ui, repo, matcher, prefix, uipathfn, explicitonly, **opts)
281 281
282 282 ladded, lbad = addlargefiles(ui, repo, False, matcher, uipathfn, **opts)
283 283 normalmatcher = composenormalfilematcher(
284 284 matcher, repo[None].manifest(), ladded
285 285 )
286 286 bad = orig(ui, repo, normalmatcher, prefix, uipathfn, explicitonly, **opts)
287 287
288 288 bad.extend(f for f in lbad)
289 289 return bad
290 290
291 291
292 292 @eh.wrapfunction(cmdutil, 'remove')
293 293 def cmdutilremove(
294 294 orig, ui, repo, matcher, prefix, uipathfn, after, force, subrepos, dryrun
295 295 ):
296 296 normalmatcher = composenormalfilematcher(matcher, repo[None].manifest())
297 297 result = orig(
298 298 ui,
299 299 repo,
300 300 normalmatcher,
301 301 prefix,
302 302 uipathfn,
303 303 after,
304 304 force,
305 305 subrepos,
306 306 dryrun,
307 307 )
308 308 return (
309 309 removelargefiles(
310 310 ui, repo, False, matcher, uipathfn, dryrun, after=after, force=force
311 311 )
312 312 or result
313 313 )
314 314
315 315
316 316 @eh.wrapfunction(dirstate.dirstate, '_changing')
317 317 @contextlib.contextmanager
318 318 def _changing(orig, self, repo, change_type):
319 319 pre = sub_dirstate = getattr(self, '_sub_dirstate', None)
320 320 try:
321 321 lfd = getattr(self, '_large_file_dirstate', False)
322 322 if sub_dirstate is None and not lfd:
323 323 sub_dirstate = lfutil.openlfdirstate(repo.ui, repo)
324 324 self._sub_dirstate = sub_dirstate
325 325 if not lfd:
326 326 assert self._sub_dirstate is not None
327 327 with orig(self, repo, change_type):
328 328 if sub_dirstate is None:
329 329 yield
330 330 else:
331 331 with sub_dirstate._changing(repo, change_type):
332 332 yield
333 333 finally:
334 334 self._sub_dirstate = pre
335 335
336 336
337 337 @eh.wrapfunction(dirstate.dirstate, 'running_status')
338 338 @contextlib.contextmanager
339 339 def running_status(orig, self, repo):
340 340 pre = sub_dirstate = getattr(self, '_sub_dirstate', None)
341 341 try:
342 342 lfd = getattr(self, '_large_file_dirstate', False)
343 343 if sub_dirstate is None and not lfd:
344 344 sub_dirstate = lfutil.openlfdirstate(repo.ui, repo)
345 345 self._sub_dirstate = sub_dirstate
346 346 if not lfd:
347 347 assert self._sub_dirstate is not None
348 348 with orig(self, repo):
349 349 if sub_dirstate is None:
350 350 yield
351 351 else:
352 352 with sub_dirstate.running_status(repo):
353 353 yield
354 354 finally:
355 355 self._sub_dirstate = pre
356 356
357 357
358 358 @eh.wrapfunction(subrepo.hgsubrepo, 'status')
359 359 def overridestatusfn(orig, repo, rev2, **opts):
360 360 with lfstatus(repo._repo):
361 361 return orig(repo, rev2, **opts)
362 362
363 363
364 364 @eh.wrapcommand(b'status')
365 365 def overridestatus(orig, ui, repo, *pats, **opts):
366 366 with lfstatus(repo):
367 367 return orig(ui, repo, *pats, **opts)
368 368
369 369
370 370 @eh.wrapfunction(subrepo.hgsubrepo, 'dirty')
371 371 def overridedirty(orig, repo, ignoreupdate=False, missing=False):
372 372 with lfstatus(repo._repo):
373 373 return orig(repo, ignoreupdate=ignoreupdate, missing=missing)
374 374
375 375
376 376 @eh.wrapcommand(b'log')
377 377 def overridelog(orig, ui, repo, *pats, **opts):
378 378 def overridematchandpats(
379 379 orig,
380 380 ctx,
381 381 pats=(),
382 382 opts=None,
383 383 globbed=False,
384 384 default=b'relpath',
385 385 badfn=None,
386 386 ):
387 387 """Matcher that merges root directory with .hglf, suitable for log.
388 388 It is still possible to match .hglf directly.
389 389 For any listed files run log on the standin too.
390 390 matchfn tries both the given filename and with .hglf stripped.
391 391 """
392 392 if opts is None:
393 393 opts = {}
394 394 matchandpats = orig(ctx, pats, opts, globbed, default, badfn=badfn)
395 395 m, p = copy.copy(matchandpats)
396 396
397 397 if m.always():
398 398 # We want to match everything anyway, so there's no benefit trying
399 399 # to add standins.
400 400 return matchandpats
401 401
402 402 pats = set(p)
403 403
404 404 def fixpats(pat, tostandin=lfutil.standin):
405 405 if pat.startswith(b'set:'):
406 406 return pat
407 407
408 408 kindpat = matchmod._patsplit(pat, None)
409 409
410 410 if kindpat[0] is not None:
411 411 return kindpat[0] + b':' + tostandin(kindpat[1])
412 412 return tostandin(kindpat[1])
413 413
414 414 cwd = repo.getcwd()
415 415 if cwd:
416 416 hglf = lfutil.shortname
417 417 back = util.pconvert(repo.pathto(hglf)[: -len(hglf)])
418 418
419 419 def tostandin(f):
420 420 # The file may already be a standin, so truncate the back
421 421 # prefix and test before mangling it. This avoids turning
422 422 # 'glob:../.hglf/foo*' into 'glob:../.hglf/../.hglf/foo*'.
423 423 if f.startswith(back) and lfutil.splitstandin(f[len(back) :]):
424 424 return f
425 425
426 426 # An absolute path is from outside the repo, so truncate the
427 427 # path to the root before building the standin. Otherwise cwd
428 428 # is somewhere in the repo, relative to root, and needs to be
429 429 # prepended before building the standin.
430 430 if os.path.isabs(cwd):
431 431 f = f[len(back) :]
432 432 else:
433 433 f = cwd + b'/' + f
434 434 return back + lfutil.standin(f)
435 435
436 436 else:
437 437
438 438 def tostandin(f):
439 439 if lfutil.isstandin(f):
440 440 return f
441 441 return lfutil.standin(f)
442 442
443 443 pats.update(fixpats(f, tostandin) for f in p)
444 444
445 445 for i in range(0, len(m._files)):
446 446 # Don't add '.hglf' to m.files, since that is already covered by '.'
447 447 if m._files[i] == b'.':
448 448 continue
449 449 standin = lfutil.standin(m._files[i])
450 450 # If the "standin" is a directory, append instead of replace to
451 451 # support naming a directory on the command line with only
452 452 # largefiles. The original directory is kept to support normal
453 453 # files.
454 454 if standin in ctx:
455 455 m._files[i] = standin
456 456 elif m._files[i] not in ctx and repo.wvfs.isdir(standin):
457 457 m._files.append(standin)
458 458
459 459 m._fileset = set(m._files)
460 460 m.always = lambda: False
461 461 origmatchfn = m.matchfn
462 462
463 463 def lfmatchfn(f):
464 464 lf = lfutil.splitstandin(f)
465 465 if lf is not None and origmatchfn(lf):
466 466 return True
467 467 r = origmatchfn(f)
468 468 return r
469 469
470 470 m.matchfn = lfmatchfn
471 471
472 472 ui.debug(b'updated patterns: %s\n' % b', '.join(sorted(pats)))
473 473 return m, pats
474 474
475 475 # For hg log --patch, the match object is used in two different senses:
476 476 # (1) to determine what revisions should be printed out, and
477 477 # (2) to determine what files to print out diffs for.
478 478 # The magic matchandpats override should be used for case (1) but not for
479 479 # case (2).
480 480 oldmatchandpats = scmutil.matchandpats
481 481
482 482 def overridemakefilematcher(orig, repo, pats, opts, badfn=None):
483 483 wctx = repo[None]
484 484 match, pats = oldmatchandpats(wctx, pats, opts, badfn=badfn)
485 485 return lambda ctx: match
486 486
487 487 wrappedmatchandpats = extensions.wrappedfunction(
488 488 scmutil, 'matchandpats', overridematchandpats
489 489 )
490 490 wrappedmakefilematcher = extensions.wrappedfunction(
491 491 logcmdutil, '_makenofollowfilematcher', overridemakefilematcher
492 492 )
493 493 with wrappedmatchandpats, wrappedmakefilematcher:
494 494 return orig(ui, repo, *pats, **opts)
495 495
496 496
497 497 @eh.wrapcommand(
498 498 b'verify',
499 499 opts=[
500 500 (
501 501 b'',
502 502 b'large',
503 503 None,
504 504 _(b'verify that all largefiles in current revision exists'),
505 505 ),
506 506 (
507 507 b'',
508 508 b'lfa',
509 509 None,
510 510 _(b'verify largefiles in all revisions, not just current'),
511 511 ),
512 512 (
513 513 b'',
514 514 b'lfc',
515 515 None,
516 516 _(b'verify local largefile contents, not just existence'),
517 517 ),
518 518 ],
519 519 )
520 520 def overrideverify(orig, ui, repo, *pats, **opts):
521 521 large = opts.pop('large', False)
522 522 all = opts.pop('lfa', False)
523 523 contents = opts.pop('lfc', False)
524 524
525 525 result = orig(ui, repo, *pats, **opts)
526 526 if large or all or contents:
527 527 result = result or lfcommands.verifylfiles(ui, repo, all, contents)
528 528 return result
529 529
530 530
531 531 @eh.wrapcommand(
532 532 b'debugstate',
533 533 opts=[(b'', b'large', None, _(b'display largefiles dirstate'))],
534 534 )
535 535 def overridedebugstate(orig, ui, repo, *pats, **opts):
536 536 large = opts.pop('large', False)
537 537 if large:
538 538
539 539 class fakerepo:
540 540 dirstate = lfutil.openlfdirstate(ui, repo)
541 541
542 542 orig(ui, fakerepo, *pats, **opts)
543 543 else:
544 544 orig(ui, repo, *pats, **opts)
545 545
546 546
547 547 # Before starting the manifest merge, merge.updates will call
548 548 # _checkunknownfile to check if there are any files in the merged-in
549 549 # changeset that collide with unknown files in the working copy.
550 550 #
551 551 # The largefiles are seen as unknown, so this prevents us from merging
552 552 # in a file 'foo' if we already have a largefile with the same name.
553 553 #
554 554 # The overridden function filters the unknown files by removing any
555 555 # largefiles. This makes the merge proceed and we can then handle this
556 556 # case further in the overridden calculateupdates function below.
557 557 @eh.wrapfunction(merge, '_checkunknownfile')
558 558 def overridecheckunknownfile(
559 559 origfn, dirstate, wvfs, dircache, wctx, mctx, f, f2=None
560 560 ):
561 561 if lfutil.standin(dirstate.normalize(f)) in wctx:
562 562 return False
563 563 return origfn(dirstate, wvfs, dircache, wctx, mctx, f, f2)
564 564
565 565
566 566 # The manifest merge handles conflicts on the manifest level. We want
567 567 # to handle changes in largefile-ness of files at this level too.
568 568 #
569 569 # The strategy is to run the original calculateupdates and then process
570 570 # the action list it outputs. There are two cases we need to deal with:
571 571 #
572 572 # 1. Normal file in p1, largefile in p2. Here the largefile is
573 573 # detected via its standin file, which will enter the working copy
574 574 # with a "get" action. It is not "merge" since the standin is all
575 575 # Mercurial is concerned with at this level -- the link to the
576 576 # existing normal file is not relevant here.
577 577 #
578 578 # 2. Largefile in p1, normal file in p2. Here we get a "merge" action
579 579 # since the largefile will be present in the working copy and
580 580 # different from the normal file in p2. Mercurial therefore
581 581 # triggers a merge action.
582 582 #
583 583 # In both cases, we prompt the user and emit new actions to either
584 584 # remove the standin (if the normal file was kept) or to remove the
585 585 # normal file and get the standin (if the largefile was kept). The
586 586 # default prompt answer is to use the largefile version since it was
587 587 # presumably changed on purpose.
588 588 #
589 589 # Finally, the merge.applyupdates function will then take care of
590 590 # writing the files into the working copy and lfcommands.updatelfiles
591 591 # will update the largefiles.
592 592 @eh.wrapfunction(merge, 'calculateupdates')
593 593 def overridecalculateupdates(
594 594 origfn, repo, p1, p2, pas, branchmerge, force, acceptremote, *args, **kwargs
595 595 ):
596 596 overwrite = force and not branchmerge
597 597 mresult = origfn(
598 598 repo, p1, p2, pas, branchmerge, force, acceptremote, *args, **kwargs
599 599 )
600 600
601 601 if overwrite:
602 602 return mresult
603 603
604 604 # Convert to dictionary with filename as key and action as value.
605 605 lfiles = set()
606 606 for f in mresult.files():
607 607 splitstandin = lfutil.splitstandin(f)
608 608 if splitstandin is not None and splitstandin in p1:
609 609 lfiles.add(splitstandin)
610 610 elif lfutil.standin(f) in p1:
611 611 lfiles.add(f)
612 612
613 613 for lfile in sorted(lfiles):
614 614 standin = lfutil.standin(lfile)
615 615 (lm, largs, lmsg) = mresult.getfile(lfile, (None, None, None))
616 616 (sm, sargs, smsg) = mresult.getfile(standin, (None, None, None))
617 617
618 618 if sm in (ACTION_GET, ACTION_DELETED_CHANGED) and lm != ACTION_REMOVE:
619 619 if sm == ACTION_DELETED_CHANGED:
620 620 f1, f2, fa, move, anc = sargs
621 621 sargs = (p2[f2].flags(), False)
622 622 # Case 1: normal file in the working copy, largefile in
623 623 # the second parent
624 624 usermsg = (
625 625 _(
626 626 b'remote turned local normal file %s into a largefile\n'
627 627 b'use (l)argefile or keep (n)ormal file?'
628 628 b'$$ &Largefile $$ &Normal file'
629 629 )
630 630 % lfile
631 631 )
632 632 if repo.ui.promptchoice(usermsg, 0) == 0: # pick remote largefile
633 633 mresult.addfile(
634 634 lfile, ACTION_REMOVE, None, b'replaced by standin'
635 635 )
636 636 mresult.addfile(standin, ACTION_GET, sargs, b'replaces standin')
637 637 else: # keep local normal file
638 638 mresult.addfile(lfile, ACTION_KEEP, None, b'replaces standin')
639 639 if branchmerge:
640 640 mresult.addfile(
641 641 standin,
642 642 ACTION_KEEP,
643 643 None,
644 644 b'replaced by non-standin',
645 645 )
646 646 else:
647 647 mresult.addfile(
648 648 standin,
649 649 ACTION_REMOVE,
650 650 None,
651 651 b'replaced by non-standin',
652 652 )
653 653 if lm in (ACTION_GET, ACTION_DELETED_CHANGED) and sm != ACTION_REMOVE:
654 654 if lm == ACTION_DELETED_CHANGED:
655 655 f1, f2, fa, move, anc = largs
656 656 largs = (p2[f2].flags(), False)
657 657 # Case 2: largefile in the working copy, normal file in
658 658 # the second parent
659 659 usermsg = (
660 660 _(
661 661 b'remote turned local largefile %s into a normal file\n'
662 662 b'keep (l)argefile or use (n)ormal file?'
663 663 b'$$ &Largefile $$ &Normal file'
664 664 )
665 665 % lfile
666 666 )
667 667 if repo.ui.promptchoice(usermsg, 0) == 0: # keep local largefile
668 668 if branchmerge:
669 669 # largefile can be restored from standin safely
670 670 mresult.addfile(
671 671 lfile,
672 672 ACTION_KEEP,
673 673 None,
674 674 b'replaced by standin',
675 675 )
676 676 mresult.addfile(
677 677 standin, ACTION_KEEP, None, b'replaces standin'
678 678 )
679 679 else:
680 680 # "lfile" should be marked as "removed" without
681 681 # removal of itself
682 682 mresult.addfile(
683 683 lfile,
684 684 MERGE_ACTION_LARGEFILE_MARK_REMOVED,
685 685 None,
686 686 b'forget non-standin largefile',
687 687 )
688 688
689 689 # linear-merge should treat this largefile as 're-added'
690 690 mresult.addfile(standin, ACTION_ADD, None, b'keep standin')
691 691 else: # pick remote normal file
692 692 mresult.addfile(lfile, ACTION_GET, largs, b'replaces standin')
693 693 mresult.addfile(
694 694 standin,
695 695 ACTION_REMOVE,
696 696 None,
697 697 b'replaced by non-standin',
698 698 )
699 699
700 700 return mresult
701 701
702 702
703 703 @eh.wrapfunction(mergestatemod, 'recordupdates')
704 704 def mergerecordupdates(orig, repo, actions, branchmerge, getfiledata):
705 705 if MERGE_ACTION_LARGEFILE_MARK_REMOVED in actions:
706 706 lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
707 707 for lfile, args, msg in actions[MERGE_ACTION_LARGEFILE_MARK_REMOVED]:
708 708 # this should be executed before 'orig', to execute 'remove'
709 709 # before all other actions
710 710 repo.dirstate.update_file(lfile, p1_tracked=True, wc_tracked=False)
711 711 # make sure lfile doesn't get synclfdirstate'd as normal
712 712 lfdirstate.update_file(lfile, p1_tracked=False, wc_tracked=True)
713 713
714 714 return orig(repo, actions, branchmerge, getfiledata)
715 715
716 716
717 717 # Override filemerge to prompt the user about how they wish to merge
718 718 # largefiles. This will handle identical edits without prompting the user.
719 719 @eh.wrapfunction(filemerge, 'filemerge')
720 720 def overridefilemerge(
721 721 origfn, repo, wctx, mynode, orig, fcd, fco, fca, labels=None
722 722 ):
723 723 if not lfutil.isstandin(orig) or fcd.isabsent() or fco.isabsent():
724 724 return origfn(repo, wctx, mynode, orig, fcd, fco, fca, labels=labels)
725 725
726 726 ahash = lfutil.readasstandin(fca).lower()
727 727 dhash = lfutil.readasstandin(fcd).lower()
728 728 ohash = lfutil.readasstandin(fco).lower()
729 729 if (
730 730 ohash != ahash
731 731 and ohash != dhash
732 732 and (
733 733 dhash == ahash
734 734 or repo.ui.promptchoice(
735 735 _(
736 736 b'largefile %s has a merge conflict\nancestor was %s\n'
737 737 b'you can keep (l)ocal %s or take (o)ther %s.\n'
738 738 b'what do you want to do?'
739 739 b'$$ &Local $$ &Other'
740 740 )
741 741 % (lfutil.splitstandin(orig), ahash, dhash, ohash),
742 742 0,
743 743 )
744 744 == 1
745 745 )
746 746 ):
747 747 repo.wwrite(fcd.path(), fco.data(), fco.flags())
748 748 return 0, False
749 749
750 750
751 751 @eh.wrapfunction(copiesmod, 'pathcopies')
752 752 def copiespathcopies(orig, ctx1, ctx2, match=None):
753 753 copies = orig(ctx1, ctx2, match=match)
754 754 updated = {}
755 755
756 756 for k, v in copies.items():
757 757 updated[lfutil.splitstandin(k) or k] = lfutil.splitstandin(v) or v
758 758
759 759 return updated
760 760
761 761
762 762 # Copy first changes the matchers to match standins instead of
763 763 # largefiles. Then it overrides util.copyfile in that function it
764 764 # checks if the destination largefile already exists. It also keeps a
765 765 # list of copied files so that the largefiles can be copied and the
766 766 # dirstate updated.
767 767 @eh.wrapfunction(cmdutil, 'copy')
768 768 def overridecopy(orig, ui, repo, pats, opts, rename=False):
769 769 # doesn't remove largefile on rename
770 770 if len(pats) < 2:
771 771 # this isn't legal, let the original function deal with it
772 772 return orig(ui, repo, pats, opts, rename)
773 773
774 774 # This could copy both lfiles and normal files in one command,
775 775 # but we don't want to do that. First replace their matcher to
776 776 # only match normal files and run it, then replace it to just
777 777 # match largefiles and run it again.
778 778 nonormalfiles = False
779 779 nolfiles = False
780 780 manifest = repo[None].manifest()
781 781
782 782 def normalfilesmatchfn(
783 783 orig,
784 784 ctx,
785 785 pats=(),
786 786 opts=None,
787 787 globbed=False,
788 788 default=b'relpath',
789 789 badfn=None,
790 790 ):
791 791 if opts is None:
792 792 opts = {}
793 793 match = orig(ctx, pats, opts, globbed, default, badfn=badfn)
794 794 return composenormalfilematcher(match, manifest)
795 795
796 796 with extensions.wrappedfunction(scmutil, 'match', normalfilesmatchfn):
797 797 try:
798 798 result = orig(ui, repo, pats, opts, rename)
799 799 except error.Abort as e:
800 800 if e.message != _(b'no files to copy'):
801 801 raise e
802 802 else:
803 803 nonormalfiles = True
804 804 result = 0
805 805
806 806 # The first rename can cause our current working directory to be removed.
807 807 # In that case there is nothing left to copy/rename so just quit.
808 808 try:
809 809 repo.getcwd()
810 810 except OSError:
811 811 return result
812 812
813 813 def makestandin(relpath):
814 814 path = pathutil.canonpath(repo.root, repo.getcwd(), relpath)
815 815 return repo.wvfs.join(lfutil.standin(path))
816 816
817 817 fullpats = scmutil.expandpats(pats)
818 818 dest = fullpats[-1]
819 819
820 820 if os.path.isdir(dest):
821 821 if not os.path.isdir(makestandin(dest)):
822 822 os.makedirs(makestandin(dest))
823 823
824 824 try:
825 825 # When we call orig below it creates the standins but we don't add
826 826 # them to the dir state until later so lock during that time.
827 827 wlock = repo.wlock()
828 828
829 829 manifest = repo[None].manifest()
830 830
831 831 def overridematch(
832 832 orig,
833 833 ctx,
834 834 pats=(),
835 835 opts=None,
836 836 globbed=False,
837 837 default=b'relpath',
838 838 badfn=None,
839 839 ):
840 840 if opts is None:
841 841 opts = {}
842 842 newpats = []
843 843 # The patterns were previously mangled to add the standin
844 844 # directory; we need to remove that now
845 845 for pat in pats:
846 846 if matchmod.patkind(pat) is None and lfutil.shortname in pat:
847 847 newpats.append(pat.replace(lfutil.shortname, b''))
848 848 else:
849 849 newpats.append(pat)
850 850 match = orig(ctx, newpats, opts, globbed, default, badfn=badfn)
851 851 m = copy.copy(match)
852 852 lfile = lambda f: lfutil.standin(f) in manifest
853 853 m._files = [lfutil.standin(f) for f in m._files if lfile(f)]
854 854 m._fileset = set(m._files)
855 855 origmatchfn = m.matchfn
856 856
857 857 def matchfn(f):
858 858 lfile = lfutil.splitstandin(f)
859 859 return (
860 860 lfile is not None
861 861 and (f in manifest)
862 862 and origmatchfn(lfile)
863 863 or None
864 864 )
865 865
866 866 m.matchfn = matchfn
867 867 return m
868 868
869 869 listpats = []
870 870 for pat in pats:
871 871 if matchmod.patkind(pat) is not None:
872 872 listpats.append(pat)
873 873 else:
874 874 listpats.append(makestandin(pat))
875 875
876 876 copiedfiles = []
877 877
878 878 def overridecopyfile(orig, src, dest, *args, **kwargs):
879 879 if lfutil.shortname in src and dest.startswith(
880 880 repo.wjoin(lfutil.shortname)
881 881 ):
882 882 destlfile = dest.replace(lfutil.shortname, b'')
883 883 if not opts[b'force'] and os.path.exists(destlfile):
884 884 raise IOError(
885 885 b'', _(b'destination largefile already exists')
886 886 )
887 887 copiedfiles.append((src, dest))
888 888 orig(src, dest, *args, **kwargs)
889 889
890 890 with extensions.wrappedfunction(util, 'copyfile', overridecopyfile):
891 891 with extensions.wrappedfunction(scmutil, 'match', overridematch):
892 892 result += orig(ui, repo, listpats, opts, rename)
893 893
894 894 lfdirstate = lfutil.openlfdirstate(ui, repo)
895 895 for (src, dest) in copiedfiles:
896 896 if lfutil.shortname in src and dest.startswith(
897 897 repo.wjoin(lfutil.shortname)
898 898 ):
899 899 srclfile = src.replace(repo.wjoin(lfutil.standin(b'')), b'')
900 900 destlfile = dest.replace(repo.wjoin(lfutil.standin(b'')), b'')
901 901 destlfiledir = repo.wvfs.dirname(repo.wjoin(destlfile)) or b'.'
902 902 if not os.path.isdir(destlfiledir):
903 903 os.makedirs(destlfiledir)
904 904 if rename:
905 905 os.rename(repo.wjoin(srclfile), repo.wjoin(destlfile))
906 906
907 907 # The file is gone, but this deletes any empty parent
908 908 # directories as a side-effect.
909 909 repo.wvfs.unlinkpath(srclfile, ignoremissing=True)
910 910 lfdirstate.set_untracked(srclfile)
911 911 else:
912 912 util.copyfile(repo.wjoin(srclfile), repo.wjoin(destlfile))
913 913
914 914 lfdirstate.set_tracked(destlfile)
915 915 lfdirstate.write(repo.currenttransaction())
916 916 except error.Abort as e:
917 917 if e.message != _(b'no files to copy'):
918 918 raise e
919 919 else:
920 920 nolfiles = True
921 921 finally:
922 922 wlock.release()
923 923
924 924 if nolfiles and nonormalfiles:
925 925 raise error.Abort(_(b'no files to copy'))
926 926
927 927 return result
928 928
929 929
930 930 # When the user calls revert, we have to be careful to not revert any
931 931 # changes to other largefiles accidentally. This means we have to keep
932 932 # track of the largefiles that are being reverted so we only pull down
933 933 # the necessary largefiles.
934 934 #
935 935 # Standins are only updated (to match the hash of largefiles) before
936 936 # commits. Update the standins then run the original revert, changing
937 937 # the matcher to hit standins instead of largefiles. Based on the
938 938 # resulting standins update the largefiles.
939 939 @eh.wrapfunction(cmdutil, 'revert')
940 940 def overriderevert(orig, ui, repo, ctx, *pats, **opts):
941 941 # Because we put the standins in a bad state (by updating them)
942 942 # and then return them to a correct state we need to lock to
943 943 # prevent others from changing them in their incorrect state.
944 944 with repo.wlock(), repo.dirstate.running_status(repo):
945 945 lfdirstate = lfutil.openlfdirstate(ui, repo)
946 946 s = lfutil.lfdirstatestatus(lfdirstate, repo)
947 947 lfdirstate.write(repo.currenttransaction())
948 948 for lfile in s.modified:
949 949 lfutil.updatestandin(repo, lfile, lfutil.standin(lfile))
950 950 for lfile in s.deleted:
951 951 fstandin = lfutil.standin(lfile)
952 952 if repo.wvfs.exists(fstandin):
953 953 repo.wvfs.unlink(fstandin)
954 954
955 955 oldstandins = lfutil.getstandinsstate(repo)
956 956
957 957 def overridematch(
958 958 orig,
959 959 mctx,
960 960 pats=(),
961 961 opts=None,
962 962 globbed=False,
963 963 default=b'relpath',
964 964 badfn=None,
965 965 ):
966 966 if opts is None:
967 967 opts = {}
968 968 match = orig(mctx, pats, opts, globbed, default, badfn=badfn)
969 969 m = copy.copy(match)
970 970
971 971 # revert supports recursing into subrepos, and though largefiles
972 972 # currently doesn't work correctly in that case, this match is
973 973 # called, so the lfdirstate above may not be the correct one for
974 974 # this invocation of match.
975 975 lfdirstate = lfutil.openlfdirstate(
976 976 mctx.repo().ui, mctx.repo(), False
977 977 )
978 978
979 979 wctx = repo[None]
980 980 matchfiles = []
981 981 for f in m._files:
982 982 standin = lfutil.standin(f)
983 983 if standin in ctx or standin in mctx:
984 984 matchfiles.append(standin)
985 985 elif standin in wctx or lfdirstate.get_entry(f).removed:
986 986 continue
987 987 else:
988 988 matchfiles.append(f)
989 989 m._files = matchfiles
990 990 m._fileset = set(m._files)
991 991 origmatchfn = m.matchfn
992 992
993 993 def matchfn(f):
994 994 lfile = lfutil.splitstandin(f)
995 995 if lfile is not None:
996 996 return origmatchfn(lfile) and (f in ctx or f in mctx)
997 997 return origmatchfn(f)
998 998
999 999 m.matchfn = matchfn
1000 1000 return m
1001 1001
1002 1002 with extensions.wrappedfunction(scmutil, 'match', overridematch):
1003 1003 orig(ui, repo, ctx, *pats, **opts)
1004 1004
1005 1005 newstandins = lfutil.getstandinsstate(repo)
1006 1006 filelist = lfutil.getlfilestoupdate(oldstandins, newstandins)
1007 1007 # lfdirstate should be 'normallookup'-ed for updated files,
1008 1008 # because reverting doesn't touch dirstate for 'normal' files
1009 1009 # when target revision is explicitly specified: in such case,
1010 1010 # 'n' and valid timestamp in dirstate doesn't ensure 'clean'
1011 1011 # of target (standin) file.
1012 1012 lfcommands.updatelfiles(
1013 1013 ui, repo, filelist, printmessage=False, normallookup=True
1014 1014 )
1015 1015
1016 1016
1017 1017 # after pulling changesets, we need to take some extra care to get
1018 1018 # largefiles updated remotely
1019 1019 @eh.wrapcommand(
1020 1020 b'pull',
1021 1021 opts=[
1022 1022 (
1023 1023 b'',
1024 1024 b'all-largefiles',
1025 1025 None,
1026 1026 _(b'download all pulled versions of largefiles (DEPRECATED)'),
1027 1027 ),
1028 1028 (
1029 1029 b'',
1030 1030 b'lfrev',
1031 1031 [],
1032 1032 _(b'download largefiles for these revisions'),
1033 1033 _(b'REV'),
1034 1034 ),
1035 1035 ],
1036 1036 )
1037 1037 def overridepull(orig, ui, repo, source=None, **opts):
1038 1038 revsprepull = len(repo)
1039 1039 if not source:
1040 1040 source = b'default'
1041 1041 repo.lfpullsource = source
1042 1042 result = orig(ui, repo, source, **opts)
1043 1043 revspostpull = len(repo)
1044 1044 lfrevs = opts.get('lfrev', [])
1045 1045 if opts.get('all_largefiles'):
1046 1046 lfrevs.append(b'pulled()')
1047 1047 if lfrevs and revspostpull > revsprepull:
1048 1048 numcached = 0
1049 1049 repo.firstpulled = revsprepull # for pulled() revset expression
1050 1050 try:
1051 1051 for rev in logcmdutil.revrange(repo, lfrevs):
1052 1052 ui.note(_(b'pulling largefiles for revision %d\n') % rev)
1053 1053 (cached, missing) = lfcommands.cachelfiles(ui, repo, rev)
1054 1054 numcached += len(cached)
1055 1055 finally:
1056 1056 del repo.firstpulled
1057 1057 ui.status(_(b"%d largefiles cached\n") % numcached)
1058 1058 return result
1059 1059
1060 1060
1061 1061 @eh.wrapcommand(
1062 1062 b'push',
1063 1063 opts=[
1064 1064 (
1065 1065 b'',
1066 1066 b'lfrev',
1067 1067 [],
1068 1068 _(b'upload largefiles for these revisions'),
1069 1069 _(b'REV'),
1070 1070 )
1071 1071 ],
1072 1072 )
1073 1073 def overridepush(orig, ui, repo, *args, **kwargs):
1074 1074 """Override push command and store --lfrev parameters in opargs"""
1075 1075 lfrevs = kwargs.pop('lfrev', None)
1076 1076 if lfrevs:
1077 1077 opargs = kwargs.setdefault('opargs', {})
1078 1078 opargs[b'lfrevs'] = logcmdutil.revrange(repo, lfrevs)
1079 1079 return orig(ui, repo, *args, **kwargs)
1080 1080
1081 1081
1082 1082 @eh.wrapfunction(exchange, 'pushoperation')
1083 1083 def exchangepushoperation(orig, *args, **kwargs):
1084 1084 """Override pushoperation constructor and store lfrevs parameter"""
1085 1085 lfrevs = kwargs.pop('lfrevs', None)
1086 1086 pushop = orig(*args, **kwargs)
1087 1087 pushop.lfrevs = lfrevs
1088 1088 return pushop
1089 1089
1090 1090
1091 1091 @eh.revsetpredicate(b'pulled()')
1092 1092 def pulledrevsetsymbol(repo, subset, x):
1093 1093 """Changesets that just has been pulled.
1094 1094
1095 1095 Only available with largefiles from pull --lfrev expressions.
1096 1096
1097 1097 .. container:: verbose
1098 1098
1099 1099 Some examples:
1100 1100
1101 1101 - pull largefiles for all new changesets::
1102 1102
1103 1103 hg pull -lfrev "pulled()"
1104 1104
1105 1105 - pull largefiles for all new branch heads::
1106 1106
1107 1107 hg pull -lfrev "head(pulled()) and not closed()"
1108 1108
1109 1109 """
1110 1110
1111 1111 try:
1112 1112 firstpulled = repo.firstpulled
1113 1113 except AttributeError:
1114 1114 raise error.Abort(_(b"pulled() only available in --lfrev"))
1115 1115 return smartset.baseset([r for r in subset if r >= firstpulled])
1116 1116
1117 1117
1118 1118 @eh.wrapcommand(
1119 1119 b'clone',
1120 1120 opts=[
1121 1121 (
1122 1122 b'',
1123 1123 b'all-largefiles',
1124 1124 None,
1125 1125 _(b'download all versions of all largefiles'),
1126 1126 )
1127 1127 ],
1128 1128 )
1129 1129 def overrideclone(orig, ui, source, dest=None, **opts):
1130 1130 d = dest
1131 1131 if d is None:
1132 1132 d = hg.defaultdest(source)
1133 1133 if opts.get('all_largefiles') and not hg.islocal(d):
1134 1134 raise error.Abort(
1135 1135 _(b'--all-largefiles is incompatible with non-local destination %s')
1136 1136 % d
1137 1137 )
1138 1138
1139 1139 return orig(ui, source, dest, **opts)
1140 1140
1141 1141
1142 1142 @eh.wrapfunction(hg, 'clone')
1143 1143 def hgclone(orig, ui, opts, *args, **kwargs):
1144 1144 result = orig(ui, opts, *args, **kwargs)
1145 1145
1146 1146 if result is not None:
1147 1147 sourcerepo, destrepo = result
1148 1148 repo = destrepo.local()
1149 1149
1150 1150 # When cloning to a remote repo (like through SSH), no repo is available
1151 1151 # from the peer. Therefore the largefiles can't be downloaded and the
1152 1152 # hgrc can't be updated.
1153 1153 if not repo:
1154 1154 return result
1155 1155
1156 1156 # Caching is implicitly limited to 'rev' option, since the dest repo was
1157 1157 # truncated at that point. The user may expect a download count with
1158 1158 # this option, so attempt whether or not this is a largefile repo.
1159 1159 if opts.get(b'all_largefiles'):
1160 1160 success, missing = lfcommands.downloadlfiles(ui, repo)
1161 1161
1162 1162 if missing != 0:
1163 1163 return None
1164 1164
1165 1165 return result
1166 1166
1167 1167
1168 1168 @eh.wrapcommand(b'rebase', extension=b'rebase')
1169 1169 def overriderebasecmd(orig, ui, repo, **opts):
1170 if not util.safehasattr(repo, '_largefilesenabled'):
1170 if not hasattr(repo, '_largefilesenabled'):
1171 1171 return orig(ui, repo, **opts)
1172 1172
1173 1173 resuming = opts.get('continue')
1174 1174 repo._lfcommithooks.append(lfutil.automatedcommithook(resuming))
1175 1175 repo._lfstatuswriters.append(lambda *msg, **opts: None)
1176 1176 try:
1177 1177 with ui.configoverride(
1178 1178 {(b'rebase', b'experimental.inmemory'): False}, b"largefiles"
1179 1179 ):
1180 1180 return orig(ui, repo, **opts)
1181 1181 finally:
1182 1182 repo._lfstatuswriters.pop()
1183 1183 repo._lfcommithooks.pop()
1184 1184
1185 1185
1186 1186 @eh.extsetup
1187 1187 def overriderebase(ui):
1188 1188 try:
1189 1189 rebase = extensions.find(b'rebase')
1190 1190 except KeyError:
1191 1191 pass
1192 1192 else:
1193 1193
1194 1194 def _dorebase(orig, *args, **kwargs):
1195 1195 kwargs['inmemory'] = False
1196 1196 return orig(*args, **kwargs)
1197 1197
1198 1198 extensions.wrapfunction(rebase, '_dorebase', _dorebase)
1199 1199
1200 1200
1201 1201 @eh.wrapcommand(b'archive')
1202 1202 def overridearchivecmd(orig, ui, repo, dest, **opts):
1203 1203 with lfstatus(repo.unfiltered()):
1204 1204 return orig(ui, repo.unfiltered(), dest, **opts)
1205 1205
1206 1206
1207 1207 @eh.wrapfunction(webcommands, 'archive')
1208 1208 def hgwebarchive(orig, web):
1209 1209 with lfstatus(web.repo):
1210 1210 return orig(web)
1211 1211
1212 1212
1213 1213 @eh.wrapfunction(archival, 'archive')
1214 1214 def overridearchive(
1215 1215 orig,
1216 1216 repo,
1217 1217 dest,
1218 1218 node,
1219 1219 kind,
1220 1220 decode=True,
1221 1221 match=None,
1222 1222 prefix=b'',
1223 1223 mtime=None,
1224 1224 subrepos=None,
1225 1225 ):
1226 1226 # For some reason setting repo.lfstatus in hgwebarchive only changes the
1227 1227 # unfiltered repo's attr, so check that as well.
1228 1228 if not repo.lfstatus and not repo.unfiltered().lfstatus:
1229 1229 return orig(
1230 1230 repo, dest, node, kind, decode, match, prefix, mtime, subrepos
1231 1231 )
1232 1232
1233 1233 # No need to lock because we are only reading history and
1234 1234 # largefile caches, neither of which are modified.
1235 1235 if node is not None:
1236 1236 lfcommands.cachelfiles(repo.ui, repo, node)
1237 1237
1238 1238 if kind not in archival.archivers:
1239 1239 raise error.Abort(_(b"unknown archive type '%s'") % kind)
1240 1240
1241 1241 ctx = repo[node]
1242 1242
1243 1243 if kind == b'files':
1244 1244 if prefix:
1245 1245 raise error.Abort(_(b'cannot give prefix when archiving to files'))
1246 1246 else:
1247 1247 prefix = archival.tidyprefix(dest, kind, prefix)
1248 1248
1249 1249 def write(name, mode, islink, getdata):
1250 1250 if match and not match(name):
1251 1251 return
1252 1252 data = getdata()
1253 1253 if decode:
1254 1254 data = repo.wwritedata(name, data)
1255 1255 archiver.addfile(prefix + name, mode, islink, data)
1256 1256
1257 1257 archiver = archival.archivers[kind](dest, mtime or ctx.date()[0])
1258 1258
1259 1259 if repo.ui.configbool(b"ui", b"archivemeta"):
1260 1260 write(
1261 1261 b'.hg_archival.txt',
1262 1262 0o644,
1263 1263 False,
1264 1264 lambda: archival.buildmetadata(ctx),
1265 1265 )
1266 1266
1267 1267 for f in ctx:
1268 1268 ff = ctx.flags(f)
1269 1269 getdata = ctx[f].data
1270 1270 lfile = lfutil.splitstandin(f)
1271 1271 if lfile is not None:
1272 1272 if node is not None:
1273 1273 path = lfutil.findfile(repo, getdata().strip())
1274 1274
1275 1275 if path is None:
1276 1276 raise error.Abort(
1277 1277 _(
1278 1278 b'largefile %s not found in repo store or system cache'
1279 1279 )
1280 1280 % lfile
1281 1281 )
1282 1282 else:
1283 1283 path = lfile
1284 1284
1285 1285 f = lfile
1286 1286
1287 1287 getdata = lambda: util.readfile(path)
1288 1288 write(f, b'x' in ff and 0o755 or 0o644, b'l' in ff, getdata)
1289 1289
1290 1290 if subrepos:
1291 1291 for subpath in sorted(ctx.substate):
1292 1292 sub = ctx.workingsub(subpath)
1293 1293 submatch = matchmod.subdirmatcher(subpath, match)
1294 1294 subprefix = prefix + subpath + b'/'
1295 1295
1296 1296 # TODO: Only hgsubrepo instances have `_repo`, so figure out how to
1297 1297 # infer and possibly set lfstatus in hgsubrepoarchive. That would
1298 1298 # allow only hgsubrepos to set this, instead of the current scheme
1299 1299 # where the parent sets this for the child.
1300 1300 with (
1301 util.safehasattr(sub, '_repo')
1301 hasattr(sub, '_repo')
1302 1302 and lfstatus(sub._repo)
1303 1303 or util.nullcontextmanager()
1304 1304 ):
1305 1305 sub.archive(archiver, subprefix, submatch)
1306 1306
1307 1307 archiver.done()
1308 1308
1309 1309
1310 1310 @eh.wrapfunction(subrepo.hgsubrepo, 'archive')
1311 1311 def hgsubrepoarchive(orig, repo, archiver, prefix, match=None, decode=True):
1312 lfenabled = util.safehasattr(repo._repo, '_largefilesenabled')
1312 lfenabled = hasattr(repo._repo, '_largefilesenabled')
1313 1313 if not lfenabled or not repo._repo.lfstatus:
1314 1314 return orig(repo, archiver, prefix, match, decode)
1315 1315
1316 1316 repo._get(repo._state + (b'hg',))
1317 1317 rev = repo._state[1]
1318 1318 ctx = repo._repo[rev]
1319 1319
1320 1320 if ctx.node() is not None:
1321 1321 lfcommands.cachelfiles(repo.ui, repo._repo, ctx.node())
1322 1322
1323 1323 def write(name, mode, islink, getdata):
1324 1324 # At this point, the standin has been replaced with the largefile name,
1325 1325 # so the normal matcher works here without the lfutil variants.
1326 1326 if match and not match(f):
1327 1327 return
1328 1328 data = getdata()
1329 1329 if decode:
1330 1330 data = repo._repo.wwritedata(name, data)
1331 1331
1332 1332 archiver.addfile(prefix + name, mode, islink, data)
1333 1333
1334 1334 for f in ctx:
1335 1335 ff = ctx.flags(f)
1336 1336 getdata = ctx[f].data
1337 1337 lfile = lfutil.splitstandin(f)
1338 1338 if lfile is not None:
1339 1339 if ctx.node() is not None:
1340 1340 path = lfutil.findfile(repo._repo, getdata().strip())
1341 1341
1342 1342 if path is None:
1343 1343 raise error.Abort(
1344 1344 _(
1345 1345 b'largefile %s not found in repo store or system cache'
1346 1346 )
1347 1347 % lfile
1348 1348 )
1349 1349 else:
1350 1350 path = lfile
1351 1351
1352 1352 f = lfile
1353 1353
1354 1354 getdata = lambda: util.readfile(os.path.join(prefix, path))
1355 1355
1356 1356 write(f, b'x' in ff and 0o755 or 0o644, b'l' in ff, getdata)
1357 1357
1358 1358 for subpath in sorted(ctx.substate):
1359 1359 sub = ctx.workingsub(subpath)
1360 1360 submatch = matchmod.subdirmatcher(subpath, match)
1361 1361 subprefix = prefix + subpath + b'/'
1362 1362 # TODO: Only hgsubrepo instances have `_repo`, so figure out how to
1363 1363 # infer and possibly set lfstatus at the top of this function. That
1364 1364 # would allow only hgsubrepos to set this, instead of the current scheme
1365 1365 # where the parent sets this for the child.
1366 1366 with (
1367 util.safehasattr(sub, '_repo')
1367 hasattr(sub, '_repo')
1368 1368 and lfstatus(sub._repo)
1369 1369 or util.nullcontextmanager()
1370 1370 ):
1371 1371 sub.archive(archiver, subprefix, submatch, decode)
1372 1372
1373 1373
1374 1374 # If a largefile is modified, the change is not reflected in its
1375 1375 # standin until a commit. cmdutil.bailifchanged() raises an exception
1376 1376 # if the repo has uncommitted changes. Wrap it to also check if
1377 1377 # largefiles were changed. This is used by bisect, backout and fetch.
1378 1378 @eh.wrapfunction(cmdutil, 'bailifchanged')
1379 1379 def overridebailifchanged(orig, repo, *args, **kwargs):
1380 1380 orig(repo, *args, **kwargs)
1381 1381 with lfstatus(repo):
1382 1382 s = repo.status()
1383 1383 if s.modified or s.added or s.removed or s.deleted:
1384 1384 raise error.Abort(_(b'uncommitted changes'))
1385 1385
1386 1386
1387 1387 @eh.wrapfunction(cmdutil, 'postcommitstatus')
1388 1388 def postcommitstatus(orig, repo, *args, **kwargs):
1389 1389 with lfstatus(repo):
1390 1390 return orig(repo, *args, **kwargs)
1391 1391
1392 1392
1393 1393 @eh.wrapfunction(cmdutil, 'forget')
1394 1394 def cmdutilforget(
1395 1395 orig, ui, repo, match, prefix, uipathfn, explicitonly, dryrun, interactive
1396 1396 ):
1397 1397 normalmatcher = composenormalfilematcher(match, repo[None].manifest())
1398 1398 bad, forgot = orig(
1399 1399 ui,
1400 1400 repo,
1401 1401 normalmatcher,
1402 1402 prefix,
1403 1403 uipathfn,
1404 1404 explicitonly,
1405 1405 dryrun,
1406 1406 interactive,
1407 1407 )
1408 1408 m = composelargefilematcher(match, repo[None].manifest())
1409 1409
1410 1410 with lfstatus(repo):
1411 1411 s = repo.status(match=m, clean=True)
1412 1412 manifest = repo[None].manifest()
1413 1413 forget = sorted(s.modified + s.added + s.deleted + s.clean)
1414 1414 forget = [f for f in forget if lfutil.standin(f) in manifest]
1415 1415
1416 1416 for f in forget:
1417 1417 fstandin = lfutil.standin(f)
1418 1418 if fstandin not in repo.dirstate and not repo.wvfs.isdir(fstandin):
1419 1419 ui.warn(
1420 1420 _(b'not removing %s: file is already untracked\n') % uipathfn(f)
1421 1421 )
1422 1422 bad.append(f)
1423 1423
1424 1424 for f in forget:
1425 1425 if ui.verbose or not m.exact(f):
1426 1426 ui.status(_(b'removing %s\n') % uipathfn(f))
1427 1427
1428 1428 # Need to lock because standin files are deleted then removed from the
1429 1429 # repository and we could race in-between.
1430 1430 with repo.wlock():
1431 1431 lfdirstate = lfutil.openlfdirstate(ui, repo)
1432 1432 for f in forget:
1433 1433 lfdirstate.set_untracked(f)
1434 1434 lfdirstate.write(repo.currenttransaction())
1435 1435 standins = [lfutil.standin(f) for f in forget]
1436 1436 for f in standins:
1437 1437 repo.wvfs.unlinkpath(f, ignoremissing=True)
1438 1438 rejected = repo[None].forget(standins)
1439 1439
1440 1440 bad.extend(f for f in rejected if f in m.files())
1441 1441 forgot.extend(f for f in forget if f not in rejected)
1442 1442 return bad, forgot
1443 1443
1444 1444
1445 1445 def _getoutgoings(repo, other, missing, addfunc):
1446 1446 """get pairs of filename and largefile hash in outgoing revisions
1447 1447 in 'missing'.
1448 1448
1449 1449 largefiles already existing on 'other' repository are ignored.
1450 1450
1451 1451 'addfunc' is invoked with each unique pairs of filename and
1452 1452 largefile hash value.
1453 1453 """
1454 1454 knowns = set()
1455 1455 lfhashes = set()
1456 1456
1457 1457 def dedup(fn, lfhash):
1458 1458 k = (fn, lfhash)
1459 1459 if k not in knowns:
1460 1460 knowns.add(k)
1461 1461 lfhashes.add(lfhash)
1462 1462
1463 1463 lfutil.getlfilestoupload(repo, missing, dedup)
1464 1464 if lfhashes:
1465 1465 lfexists = storefactory.openstore(repo, other).exists(lfhashes)
1466 1466 for fn, lfhash in knowns:
1467 1467 if not lfexists[lfhash]: # lfhash doesn't exist on "other"
1468 1468 addfunc(fn, lfhash)
1469 1469
1470 1470
1471 1471 def outgoinghook(ui, repo, other, opts, missing):
1472 1472 if opts.pop(b'large', None):
1473 1473 lfhashes = set()
1474 1474 if ui.debugflag:
1475 1475 toupload = {}
1476 1476
1477 1477 def addfunc(fn, lfhash):
1478 1478 if fn not in toupload:
1479 1479 toupload[fn] = [] # pytype: disable=unsupported-operands
1480 1480 toupload[fn].append(lfhash)
1481 1481 lfhashes.add(lfhash)
1482 1482
1483 1483 def showhashes(fn):
1484 1484 for lfhash in sorted(toupload[fn]):
1485 1485 ui.debug(b' %s\n' % lfhash)
1486 1486
1487 1487 else:
1488 1488 toupload = set()
1489 1489
1490 1490 def addfunc(fn, lfhash):
1491 1491 toupload.add(fn)
1492 1492 lfhashes.add(lfhash)
1493 1493
1494 1494 def showhashes(fn):
1495 1495 pass
1496 1496
1497 1497 _getoutgoings(repo, other, missing, addfunc)
1498 1498
1499 1499 if not toupload:
1500 1500 ui.status(_(b'largefiles: no files to upload\n'))
1501 1501 else:
1502 1502 ui.status(
1503 1503 _(b'largefiles to upload (%d entities):\n') % (len(lfhashes))
1504 1504 )
1505 1505 for file in sorted(toupload):
1506 1506 ui.status(lfutil.splitstandin(file) + b'\n')
1507 1507 showhashes(file)
1508 1508 ui.status(b'\n')
1509 1509
1510 1510
1511 1511 @eh.wrapcommand(
1512 1512 b'outgoing', opts=[(b'', b'large', None, _(b'display outgoing largefiles'))]
1513 1513 )
1514 1514 def _outgoingcmd(orig, *args, **kwargs):
1515 1515 # Nothing to do here other than add the extra help option- the hook above
1516 1516 # processes it.
1517 1517 return orig(*args, **kwargs)
1518 1518
1519 1519
1520 1520 def summaryremotehook(ui, repo, opts, changes):
1521 1521 largeopt = opts.get(b'large', False)
1522 1522 if changes is None:
1523 1523 if largeopt:
1524 1524 return (False, True) # only outgoing check is needed
1525 1525 else:
1526 1526 return (False, False)
1527 1527 elif largeopt:
1528 1528 url, branch, peer, outgoing = changes[1]
1529 1529 if peer is None:
1530 1530 # i18n: column positioning for "hg summary"
1531 1531 ui.status(_(b'largefiles: (no remote repo)\n'))
1532 1532 return
1533 1533
1534 1534 toupload = set()
1535 1535 lfhashes = set()
1536 1536
1537 1537 def addfunc(fn, lfhash):
1538 1538 toupload.add(fn)
1539 1539 lfhashes.add(lfhash)
1540 1540
1541 1541 _getoutgoings(repo, peer, outgoing.missing, addfunc)
1542 1542
1543 1543 if not toupload:
1544 1544 # i18n: column positioning for "hg summary"
1545 1545 ui.status(_(b'largefiles: (no files to upload)\n'))
1546 1546 else:
1547 1547 # i18n: column positioning for "hg summary"
1548 1548 ui.status(
1549 1549 _(b'largefiles: %d entities for %d files to upload\n')
1550 1550 % (len(lfhashes), len(toupload))
1551 1551 )
1552 1552
1553 1553
1554 1554 @eh.wrapcommand(
1555 1555 b'summary', opts=[(b'', b'large', None, _(b'display outgoing largefiles'))]
1556 1556 )
1557 1557 def overridesummary(orig, ui, repo, *pats, **opts):
1558 1558 with lfstatus(repo):
1559 1559 orig(ui, repo, *pats, **opts)
1560 1560
1561 1561
1562 1562 @eh.wrapfunction(scmutil, 'addremove')
1563 1563 def scmutiladdremove(
1564 1564 orig,
1565 1565 repo,
1566 1566 matcher,
1567 1567 prefix,
1568 1568 uipathfn,
1569 1569 opts=None,
1570 1570 open_tr=None,
1571 1571 ):
1572 1572 if opts is None:
1573 1573 opts = {}
1574 1574 if not lfutil.islfilesrepo(repo):
1575 1575 return orig(repo, matcher, prefix, uipathfn, opts, open_tr=open_tr)
1576 1576
1577 1577 # open the transaction and changing_files context
1578 1578 if open_tr is not None:
1579 1579 open_tr()
1580 1580
1581 1581 # Get the list of missing largefiles so we can remove them
1582 1582 with repo.dirstate.running_status(repo):
1583 1583 lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
1584 1584 unsure, s, mtime_boundary = lfdirstate.status(
1585 1585 matchmod.always(),
1586 1586 subrepos=[],
1587 1587 ignored=False,
1588 1588 clean=False,
1589 1589 unknown=False,
1590 1590 )
1591 1591
1592 1592 # Call into the normal remove code, but the removing of the standin, we want
1593 1593 # to have handled by original addremove. Monkey patching here makes sure
1594 1594 # we don't remove the standin in the largefiles code, preventing a very
1595 1595 # confused state later.
1596 1596 if s.deleted:
1597 1597 m = copy.copy(matcher)
1598 1598
1599 1599 # The m._files and m._map attributes are not changed to the deleted list
1600 1600 # because that affects the m.exact() test, which in turn governs whether
1601 1601 # or not the file name is printed, and how. Simply limit the original
1602 1602 # matches to those in the deleted status list.
1603 1603 matchfn = m.matchfn
1604 1604 m.matchfn = lambda f: f in s.deleted and matchfn(f)
1605 1605
1606 1606 removelargefiles(
1607 1607 repo.ui,
1608 1608 repo,
1609 1609 True,
1610 1610 m,
1611 1611 uipathfn,
1612 1612 opts.get(b'dry_run'),
1613 1613 **pycompat.strkwargs(opts)
1614 1614 )
1615 1615 # Call into the normal add code, and any files that *should* be added as
1616 1616 # largefiles will be
1617 1617 added, bad = addlargefiles(
1618 1618 repo.ui, repo, True, matcher, uipathfn, **pycompat.strkwargs(opts)
1619 1619 )
1620 1620 # Now that we've handled largefiles, hand off to the original addremove
1621 1621 # function to take care of the rest. Make sure it doesn't do anything with
1622 1622 # largefiles by passing a matcher that will ignore them.
1623 1623 matcher = composenormalfilematcher(matcher, repo[None].manifest(), added)
1624 1624
1625 1625 return orig(repo, matcher, prefix, uipathfn, opts, open_tr=open_tr)
1626 1626
1627 1627
1628 1628 # Calling purge with --all will cause the largefiles to be deleted.
1629 1629 # Override repo.status to prevent this from happening.
1630 1630 @eh.wrapcommand(b'purge')
1631 1631 def overridepurge(orig, ui, repo, *dirs, **opts):
1632 1632 # XXX Monkey patching a repoview will not work. The assigned attribute will
1633 1633 # be set on the unfiltered repo, but we will only lookup attributes in the
1634 1634 # unfiltered repo if the lookup in the repoview object itself fails. As the
1635 1635 # monkey patched method exists on the repoview class the lookup will not
1636 1636 # fail. As a result, the original version will shadow the monkey patched
1637 1637 # one, defeating the monkey patch.
1638 1638 #
1639 1639 # As a work around we use an unfiltered repo here. We should do something
1640 1640 # cleaner instead.
1641 1641 repo = repo.unfiltered()
1642 1642 oldstatus = repo.status
1643 1643
1644 1644 def overridestatus(
1645 1645 node1=b'.',
1646 1646 node2=None,
1647 1647 match=None,
1648 1648 ignored=False,
1649 1649 clean=False,
1650 1650 unknown=False,
1651 1651 listsubrepos=False,
1652 1652 ):
1653 1653 r = oldstatus(
1654 1654 node1, node2, match, ignored, clean, unknown, listsubrepos
1655 1655 )
1656 1656 lfdirstate = lfutil.openlfdirstate(ui, repo)
1657 1657 unknown = [
1658 1658 f for f in r.unknown if not lfdirstate.get_entry(f).any_tracked
1659 1659 ]
1660 1660 ignored = [
1661 1661 f for f in r.ignored if not lfdirstate.get_entry(f).any_tracked
1662 1662 ]
1663 1663 return scmutil.status(
1664 1664 r.modified, r.added, r.removed, r.deleted, unknown, ignored, r.clean
1665 1665 )
1666 1666
1667 1667 repo.status = overridestatus
1668 1668 orig(ui, repo, *dirs, **opts)
1669 1669 repo.status = oldstatus
1670 1670
1671 1671
1672 1672 @eh.wrapcommand(b'rollback')
1673 1673 def overriderollback(orig, ui, repo, **opts):
1674 1674 with repo.wlock():
1675 1675 before = repo.dirstate.parents()
1676 1676 orphans = {
1677 1677 f
1678 1678 for f in repo.dirstate
1679 1679 if lfutil.isstandin(f) and not repo.dirstate.get_entry(f).removed
1680 1680 }
1681 1681 result = orig(ui, repo, **opts)
1682 1682 after = repo.dirstate.parents()
1683 1683 if before == after:
1684 1684 return result # no need to restore standins
1685 1685
1686 1686 pctx = repo[b'.']
1687 1687 for f in repo.dirstate:
1688 1688 if lfutil.isstandin(f):
1689 1689 orphans.discard(f)
1690 1690 if repo.dirstate.get_entry(f).removed:
1691 1691 repo.wvfs.unlinkpath(f, ignoremissing=True)
1692 1692 elif f in pctx:
1693 1693 fctx = pctx[f]
1694 1694 repo.wwrite(f, fctx.data(), fctx.flags())
1695 1695 else:
1696 1696 # content of standin is not so important in 'a',
1697 1697 # 'm' or 'n' (coming from the 2nd parent) cases
1698 1698 lfutil.writestandin(repo, f, b'', False)
1699 1699 for standin in orphans:
1700 1700 repo.wvfs.unlinkpath(standin, ignoremissing=True)
1701 1701
1702 1702 return result
1703 1703
1704 1704
1705 1705 @eh.wrapcommand(b'transplant', extension=b'transplant')
1706 1706 def overridetransplant(orig, ui, repo, *revs, **opts):
1707 1707 resuming = opts.get('continue')
1708 1708 repo._lfcommithooks.append(lfutil.automatedcommithook(resuming))
1709 1709 repo._lfstatuswriters.append(lambda *msg, **opts: None)
1710 1710 try:
1711 1711 result = orig(ui, repo, *revs, **opts)
1712 1712 finally:
1713 1713 repo._lfstatuswriters.pop()
1714 1714 repo._lfcommithooks.pop()
1715 1715 return result
1716 1716
1717 1717
1718 1718 @eh.wrapcommand(b'cat')
1719 1719 def overridecat(orig, ui, repo, file1, *pats, **opts):
1720 1720 ctx = logcmdutil.revsingle(repo, opts.get('rev'))
1721 1721 err = 1
1722 1722 notbad = set()
1723 1723 m = scmutil.match(ctx, (file1,) + pats, pycompat.byteskwargs(opts))
1724 1724 origmatchfn = m.matchfn
1725 1725
1726 1726 def lfmatchfn(f):
1727 1727 if origmatchfn(f):
1728 1728 return True
1729 1729 lf = lfutil.splitstandin(f)
1730 1730 if lf is None:
1731 1731 return False
1732 1732 notbad.add(lf)
1733 1733 return origmatchfn(lf)
1734 1734
1735 1735 m.matchfn = lfmatchfn
1736 1736 origbadfn = m.bad
1737 1737
1738 1738 def lfbadfn(f, msg):
1739 1739 if not f in notbad:
1740 1740 origbadfn(f, msg)
1741 1741
1742 1742 m.bad = lfbadfn
1743 1743
1744 1744 origvisitdirfn = m.visitdir
1745 1745
1746 1746 def lfvisitdirfn(dir):
1747 1747 if dir == lfutil.shortname:
1748 1748 return True
1749 1749 ret = origvisitdirfn(dir)
1750 1750 if ret:
1751 1751 return ret
1752 1752 lf = lfutil.splitstandin(dir)
1753 1753 if lf is None:
1754 1754 return False
1755 1755 return origvisitdirfn(lf)
1756 1756
1757 1757 m.visitdir = lfvisitdirfn
1758 1758
1759 1759 for f in ctx.walk(m):
1760 1760 with cmdutil.makefileobj(ctx, opts.get('output'), pathname=f) as fp:
1761 1761 lf = lfutil.splitstandin(f)
1762 1762 if lf is None or origmatchfn(f):
1763 1763 # duplicating unreachable code from commands.cat
1764 1764 data = ctx[f].data()
1765 1765 if opts.get('decode'):
1766 1766 data = repo.wwritedata(f, data)
1767 1767 fp.write(data)
1768 1768 else:
1769 1769 hash = lfutil.readasstandin(ctx[f])
1770 1770 if not lfutil.inusercache(repo.ui, hash):
1771 1771 store = storefactory.openstore(repo)
1772 1772 success, missing = store.get([(lf, hash)])
1773 1773 if len(success) != 1:
1774 1774 raise error.Abort(
1775 1775 _(
1776 1776 b'largefile %s is not in cache and could not be '
1777 1777 b'downloaded'
1778 1778 )
1779 1779 % lf
1780 1780 )
1781 1781 path = lfutil.usercachepath(repo.ui, hash)
1782 1782 with open(path, b"rb") as fpin:
1783 1783 for chunk in util.filechunkiter(fpin):
1784 1784 fp.write(chunk)
1785 1785 err = 0
1786 1786 return err
1787 1787
1788 1788
1789 1789 @eh.wrapfunction(merge, '_update')
1790 1790 def mergeupdate(orig, repo, node, branchmerge, force, *args, **kwargs):
1791 1791 matcher = kwargs.get('matcher', None)
1792 1792 # note if this is a partial update
1793 1793 partial = matcher and not matcher.always()
1794 1794 with repo.wlock(), repo.dirstate.changing_parents(repo):
1795 1795 # branch | | |
1796 1796 # merge | force | partial | action
1797 1797 # -------+-------+---------+--------------
1798 1798 # x | x | x | linear-merge
1799 1799 # o | x | x | branch-merge
1800 1800 # x | o | x | overwrite (as clean update)
1801 1801 # o | o | x | force-branch-merge (*1)
1802 1802 # x | x | o | (*)
1803 1803 # o | x | o | (*)
1804 1804 # x | o | o | overwrite (as revert)
1805 1805 # o | o | o | (*)
1806 1806 #
1807 1807 # (*) don't care
1808 1808 # (*1) deprecated, but used internally (e.g: "rebase --collapse")
1809 1809 with repo.dirstate.running_status(repo):
1810 1810 lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
1811 1811 unsure, s, mtime_boundary = lfdirstate.status(
1812 1812 matchmod.always(),
1813 1813 subrepos=[],
1814 1814 ignored=False,
1815 1815 clean=True,
1816 1816 unknown=False,
1817 1817 )
1818 1818 oldclean = set(s.clean)
1819 1819 pctx = repo[b'.']
1820 1820 dctx = repo[node]
1821 1821 for lfile in unsure + s.modified:
1822 1822 lfileabs = repo.wvfs.join(lfile)
1823 1823 if not repo.wvfs.exists(lfileabs):
1824 1824 continue
1825 1825 lfhash = lfutil.hashfile(lfileabs)
1826 1826 standin = lfutil.standin(lfile)
1827 1827 lfutil.writestandin(
1828 1828 repo, standin, lfhash, lfutil.getexecutable(lfileabs)
1829 1829 )
1830 1830 if standin in pctx and lfhash == lfutil.readasstandin(
1831 1831 pctx[standin]
1832 1832 ):
1833 1833 oldclean.add(lfile)
1834 1834 for lfile in s.added:
1835 1835 fstandin = lfutil.standin(lfile)
1836 1836 if fstandin not in dctx:
1837 1837 # in this case, content of standin file is meaningless
1838 1838 # (in dctx, lfile is unknown, or normal file)
1839 1839 continue
1840 1840 lfutil.updatestandin(repo, lfile, fstandin)
1841 1841 # mark all clean largefiles as dirty, just in case the update gets
1842 1842 # interrupted before largefiles and lfdirstate are synchronized
1843 1843 for lfile in oldclean:
1844 1844 entry = lfdirstate.get_entry(lfile)
1845 1845 lfdirstate.hacky_extension_update_file(
1846 1846 lfile,
1847 1847 wc_tracked=entry.tracked,
1848 1848 p1_tracked=entry.p1_tracked,
1849 1849 p2_info=entry.p2_info,
1850 1850 possibly_dirty=True,
1851 1851 )
1852 1852 lfdirstate.write(repo.currenttransaction())
1853 1853
1854 1854 oldstandins = lfutil.getstandinsstate(repo)
1855 1855 wc = kwargs.get('wc')
1856 1856 if wc and wc.isinmemory():
1857 1857 # largefiles is not a good candidate for in-memory merge (large
1858 1858 # files, custom dirstate, matcher usage).
1859 1859 raise error.ProgrammingError(
1860 1860 b'largefiles is not compatible with in-memory merge'
1861 1861 )
1862 1862 result = orig(repo, node, branchmerge, force, *args, **kwargs)
1863 1863
1864 1864 newstandins = lfutil.getstandinsstate(repo)
1865 1865 filelist = lfutil.getlfilestoupdate(oldstandins, newstandins)
1866 1866
1867 1867 # to avoid leaving all largefiles as dirty and thus rehash them, mark
1868 1868 # all the ones that didn't change as clean
1869 1869 for lfile in oldclean.difference(filelist):
1870 1870 lfdirstate.update_file(lfile, p1_tracked=True, wc_tracked=True)
1871 1871
1872 1872 if branchmerge or force or partial:
1873 1873 filelist.extend(s.deleted + s.removed)
1874 1874
1875 1875 lfcommands.updatelfiles(
1876 1876 repo.ui, repo, filelist=filelist, normallookup=partial
1877 1877 )
1878 1878
1879 1879 return result
1880 1880
1881 1881
1882 1882 @eh.wrapfunction(scmutil, 'marktouched')
1883 1883 def scmutilmarktouched(orig, repo, files, *args, **kwargs):
1884 1884 result = orig(repo, files, *args, **kwargs)
1885 1885
1886 1886 filelist = []
1887 1887 for f in files:
1888 1888 lf = lfutil.splitstandin(f)
1889 1889 if lf is not None:
1890 1890 filelist.append(lf)
1891 1891 if filelist:
1892 1892 lfcommands.updatelfiles(
1893 1893 repo.ui,
1894 1894 repo,
1895 1895 filelist=filelist,
1896 1896 printmessage=False,
1897 1897 normallookup=True,
1898 1898 )
1899 1899
1900 1900 return result
1901 1901
1902 1902
1903 1903 @eh.wrapfunction(upgrade_actions, 'preservedrequirements')
1904 1904 @eh.wrapfunction(upgrade_actions, 'supporteddestrequirements')
1905 1905 def upgraderequirements(orig, repo):
1906 1906 reqs = orig(repo)
1907 1907 if b'largefiles' in repo.requirements:
1908 1908 reqs.add(b'largefiles')
1909 1909 return reqs
1910 1910
1911 1911
1912 1912 _lfscheme = b'largefile://'
1913 1913
1914 1914
1915 1915 @eh.wrapfunction(urlmod, 'open')
1916 1916 def openlargefile(orig, ui, url_, data=None, **kwargs):
1917 1917 if url_.startswith(_lfscheme):
1918 1918 if data:
1919 1919 msg = b"cannot use data on a 'largefile://' url"
1920 1920 raise error.ProgrammingError(msg)
1921 1921 lfid = url_[len(_lfscheme) :]
1922 1922 return storefactory.getlfile(ui, lfid)
1923 1923 else:
1924 1924 return orig(ui, url_, data=data, **kwargs)
@@ -1,96 +1,96 b''
1 1 # This software may be used and distributed according to the terms of the
2 2 # GNU General Public License version 2 or any later version.
3 3
4 4
5 5 import re
6 6
7 7 from mercurial.i18n import _
8 8 from mercurial.pycompat import getattr
9 9 from mercurial import (
10 10 error,
11 11 hg,
12 12 util,
13 13 )
14 14 from mercurial.utils import (
15 15 urlutil,
16 16 )
17 17
18 18 from . import (
19 19 lfutil,
20 20 localstore,
21 21 wirestore,
22 22 )
23 23
24 24
25 25 # During clone this function is passed the src's ui object
26 26 # but it needs the dest's ui object so it can read out of
27 27 # the config file. Use repo.ui instead.
28 28 def openstore(repo=None, remote=None, put=False, ui=None):
29 29 if ui is None:
30 30 ui = repo.ui
31 31
32 32 if not remote:
33 33 lfpullsource = getattr(repo, 'lfpullsource', None)
34 34 if put:
35 35 path = urlutil.get_unique_push_path(
36 36 b'lfpullsource', repo, ui, lfpullsource
37 37 )
38 38 else:
39 39 path = urlutil.get_unique_pull_path_obj(
40 40 b'lfpullsource', ui, lfpullsource
41 41 )
42 42
43 43 # XXX we should not explicitly pass b'default', as this will result in
44 44 # b'default' being returned if no `paths.default` was defined. We
45 45 # should explicitely handle the lack of value instead.
46 46 if repo is None:
47 47 path = urlutil.get_unique_pull_path_obj(
48 48 b'lfs',
49 49 ui,
50 50 b'default',
51 51 )
52 52 remote = hg.peer(repo or ui, {}, path)
53 53 elif path.loc == b'default-push' or path.loc == b'default':
54 54 remote = repo
55 55 else:
56 56 remote = hg.peer(repo or ui, {}, path)
57 57
58 58 # The path could be a scheme so use Mercurial's normal functionality
59 59 # to resolve the scheme to a repository and use its path
60 path = util.safehasattr(remote, 'url') and remote.url() or remote.path
60 path = hasattr(remote, 'url') and remote.url() or remote.path
61 61
62 62 match = _scheme_re.match(path)
63 63 if not match: # regular filesystem path
64 64 scheme = b'file'
65 65 else:
66 66 scheme = match.group(1)
67 67
68 68 try:
69 69 storeproviders = _storeprovider[scheme]
70 70 except KeyError:
71 71 raise error.Abort(_(b'unsupported URL scheme %r') % scheme)
72 72
73 73 for classobj in storeproviders:
74 74 try:
75 75 return classobj(ui, repo, remote)
76 76 except lfutil.storeprotonotcapable:
77 77 pass
78 78
79 79 raise error.Abort(
80 80 _(b'%s does not appear to be a largefile store')
81 81 % urlutil.hidepassword(path)
82 82 )
83 83
84 84
85 85 _storeprovider = {
86 86 b'file': [localstore.localstore],
87 87 b'http': [wirestore.wirestore],
88 88 b'https': [wirestore.wirestore],
89 89 b'ssh': [wirestore.wirestore],
90 90 }
91 91
92 92 _scheme_re = re.compile(br'^([a-zA-Z0-9+-.]+)://')
93 93
94 94
95 95 def getlfile(ui, hash):
96 96 return util.chunkbuffer(openstore(ui=ui)._get(hash))
@@ -1,789 +1,789 b''
1 1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import contextlib
10 10 import errno
11 11 import hashlib
12 12 import json
13 13 import os
14 14 import re
15 15 import socket
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.pycompat import getattr
19 19 from mercurial.node import hex
20 20
21 21 from mercurial import (
22 22 encoding,
23 23 error,
24 24 httpconnection as httpconnectionmod,
25 25 pathutil,
26 26 pycompat,
27 27 url as urlmod,
28 28 util,
29 29 vfs as vfsmod,
30 30 worker,
31 31 )
32 32
33 33 from mercurial.utils import (
34 34 stringutil,
35 35 urlutil,
36 36 )
37 37
38 38 from ..largefiles import lfutil
39 39
40 40 # 64 bytes for SHA256
41 41 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
42 42
43 43
44 44 class lfsvfs(vfsmod.vfs):
45 45 def join(self, path):
46 46 """split the path at first two characters, like: XX/XXXXX..."""
47 47 if not _lfsre.match(path):
48 48 raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
49 49 return super(lfsvfs, self).join(path[0:2], path[2:])
50 50
51 51 def walk(self, path=None, onerror=None):
52 52 """Yield (dirpath, [], oids) tuple for blobs under path
53 53
54 54 Oids only exist in the root of this vfs, so dirpath is always ''.
55 55 """
56 56 root = os.path.normpath(self.base)
57 57 # when dirpath == root, dirpath[prefixlen:] becomes empty
58 58 # because len(dirpath) < prefixlen.
59 59 prefixlen = len(pathutil.normasprefix(root))
60 60 oids = []
61 61
62 62 for dirpath, dirs, files in os.walk(
63 63 self.reljoin(self.base, path or b''), onerror=onerror
64 64 ):
65 65 dirpath = dirpath[prefixlen:]
66 66
67 67 # Silently skip unexpected files and directories
68 68 if len(dirpath) == 2:
69 69 oids.extend(
70 70 [dirpath + f for f in files if _lfsre.match(dirpath + f)]
71 71 )
72 72
73 73 yield (b'', [], oids)
74 74
75 75
76 76 class nullvfs(lfsvfs):
77 77 def __init__(self):
78 78 pass
79 79
80 80 def exists(self, oid):
81 81 return False
82 82
83 83 def read(self, oid):
84 84 # store.read() calls into here if the blob doesn't exist in its
85 85 # self.vfs. Raise the same error as a normal vfs when asked to read a
86 86 # file that doesn't exist. The only difference is the full file path
87 87 # isn't available in the error.
88 88 raise IOError(
89 89 errno.ENOENT,
90 90 pycompat.sysstr(b'%s: No such file or directory' % oid),
91 91 )
92 92
93 93 def walk(self, path=None, onerror=None):
94 94 return (b'', [], [])
95 95
96 96 def write(self, oid, data):
97 97 pass
98 98
99 99
100 100 class lfsuploadfile(httpconnectionmod.httpsendfile):
101 101 """a file-like object that supports keepalive."""
102 102
103 103 def __init__(self, ui, filename):
104 104 super(lfsuploadfile, self).__init__(ui, filename, b'rb')
105 105 self.read = self._data.read
106 106
107 107 def _makeprogress(self):
108 108 return None # progress is handled by the worker client
109 109
110 110
111 111 class local:
112 112 """Local blobstore for large file contents.
113 113
114 114 This blobstore is used both as a cache and as a staging area for large blobs
115 115 to be uploaded to the remote blobstore.
116 116 """
117 117
118 118 def __init__(self, repo):
119 119 fullpath = repo.svfs.join(b'lfs/objects')
120 120 self.vfs = lfsvfs(fullpath)
121 121
122 122 if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
123 123 self.cachevfs = nullvfs()
124 124 else:
125 125 usercache = lfutil._usercachedir(repo.ui, b'lfs')
126 126 self.cachevfs = lfsvfs(usercache)
127 127 self.ui = repo.ui
128 128
129 129 def open(self, oid):
130 130 """Open a read-only file descriptor to the named blob, in either the
131 131 usercache or the local store."""
132 132 return open(self.path(oid), 'rb')
133 133
134 134 def path(self, oid):
135 135 """Build the path for the given blob ``oid``.
136 136
137 137 If the blob exists locally, the path may point to either the usercache
138 138 or the local store. If it doesn't, it will point to the local store.
139 139 This is meant for situations where existing code that isn't LFS aware
140 140 needs to open a blob. Generally, prefer the ``open`` method on this
141 141 class.
142 142 """
143 143 # The usercache is the most likely place to hold the file. Commit will
144 144 # write to both it and the local store, as will anything that downloads
145 145 # the blobs. However, things like clone without an update won't
146 146 # populate the local store. For an init + push of a local clone,
147 147 # the usercache is the only place it _could_ be. If not present, the
148 148 # missing file msg here will indicate the local repo, not the usercache.
149 149 if self.cachevfs.exists(oid):
150 150 return self.cachevfs.join(oid)
151 151
152 152 return self.vfs.join(oid)
153 153
154 154 def download(self, oid, src, content_length):
155 155 """Read the blob from the remote source in chunks, verify the content,
156 156 and write to this local blobstore."""
157 157 sha256 = hashlib.sha256()
158 158 size = 0
159 159
160 160 with self.vfs(oid, b'wb', atomictemp=True) as fp:
161 161 for chunk in util.filechunkiter(src, size=1048576):
162 162 fp.write(chunk)
163 163 sha256.update(chunk)
164 164 size += len(chunk)
165 165
166 166 # If the server advertised a length longer than what we actually
167 167 # received, then we should expect that the server crashed while
168 168 # producing the response (but the server has no way of telling us
169 169 # that), and we really don't need to try to write the response to
170 170 # the localstore, because it's not going to match the expected.
171 171 # The server also uses this method to store data uploaded by the
172 172 # client, so if this happens on the server side, it's possible
173 173 # that the client crashed or an antivirus interfered with the
174 174 # upload.
175 175 if content_length is not None and int(content_length) != size:
176 176 msg = (
177 177 b"Response length (%d) does not match Content-Length "
178 178 b"header (%d) for %s"
179 179 )
180 180 raise LfsRemoteError(_(msg) % (size, int(content_length), oid))
181 181
182 182 realoid = hex(sha256.digest())
183 183 if realoid != oid:
184 184 raise LfsCorruptionError(
185 185 _(b'corrupt remote lfs object: %s') % oid
186 186 )
187 187
188 188 self._linktousercache(oid)
189 189
190 190 def write(self, oid, data):
191 191 """Write blob to local blobstore.
192 192
193 193 This should only be called from the filelog during a commit or similar.
194 194 As such, there is no need to verify the data. Imports from a remote
195 195 store must use ``download()`` instead."""
196 196 with self.vfs(oid, b'wb', atomictemp=True) as fp:
197 197 fp.write(data)
198 198
199 199 self._linktousercache(oid)
200 200
201 201 def linkfromusercache(self, oid):
202 202 """Link blobs found in the user cache into this store.
203 203
204 204 The server module needs to do this when it lets the client know not to
205 205 upload the blob, to ensure it is always available in this store.
206 206 Normally this is done implicitly when the client reads or writes the
207 207 blob, but that doesn't happen when the server tells the client that it
208 208 already has the blob.
209 209 """
210 210 if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
211 211 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
212 212 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
213 213
214 214 def _linktousercache(self, oid):
215 215 # XXX: should we verify the content of the cache, and hardlink back to
216 216 # the local store on success, but truncate, write and link on failure?
217 217 if not self.cachevfs.exists(oid) and not isinstance(
218 218 self.cachevfs, nullvfs
219 219 ):
220 220 self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
221 221 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
222 222
223 223 def read(self, oid, verify=True):
224 224 """Read blob from local blobstore."""
225 225 if not self.vfs.exists(oid):
226 226 blob = self._read(self.cachevfs, oid, verify)
227 227
228 228 # Even if revlog will verify the content, it needs to be verified
229 229 # now before making the hardlink to avoid propagating corrupt blobs.
230 230 # Don't abort if corruption is detected, because `hg verify` will
231 231 # give more useful info about the corruption- simply don't add the
232 232 # hardlink.
233 233 if verify or hex(hashlib.sha256(blob).digest()) == oid:
234 234 self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
235 235 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
236 236 else:
237 237 self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
238 238 blob = self._read(self.vfs, oid, verify)
239 239 return blob
240 240
241 241 def _read(self, vfs, oid, verify):
242 242 """Read blob (after verifying) from the given store"""
243 243 blob = vfs.read(oid)
244 244 if verify:
245 245 _verify(oid, blob)
246 246 return blob
247 247
248 248 def verify(self, oid):
249 249 """Indicate whether or not the hash of the underlying file matches its
250 250 name."""
251 251 sha256 = hashlib.sha256()
252 252
253 253 with self.open(oid) as fp:
254 254 for chunk in util.filechunkiter(fp, size=1048576):
255 255 sha256.update(chunk)
256 256
257 257 return oid == hex(sha256.digest())
258 258
259 259 def has(self, oid):
260 260 """Returns True if the local blobstore contains the requested blob,
261 261 False otherwise."""
262 262 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
263 263
264 264
265 265 def _urlerrorreason(urlerror):
266 266 """Create a friendly message for the given URLError to be used in an
267 267 LfsRemoteError message.
268 268 """
269 269 inst = urlerror
270 270
271 271 if isinstance(urlerror.reason, Exception):
272 272 inst = urlerror.reason
273 273
274 if util.safehasattr(inst, 'reason'):
274 if hasattr(inst, 'reason'):
275 275 try: # usually it is in the form (errno, strerror)
276 276 reason = inst.reason.args[1]
277 277 except (AttributeError, IndexError):
278 278 # it might be anything, for example a string
279 279 reason = inst.reason
280 280 if isinstance(reason, str):
281 281 # SSLError of Python 2.7.9 contains a unicode
282 282 reason = encoding.unitolocal(reason)
283 283 return reason
284 284 elif getattr(inst, "strerror", None):
285 285 return encoding.strtolocal(inst.strerror)
286 286 else:
287 287 return stringutil.forcebytestr(urlerror)
288 288
289 289
290 290 class lfsauthhandler(util.urlreq.basehandler):
291 291 handler_order = 480 # Before HTTPDigestAuthHandler (== 490)
292 292
293 293 def http_error_401(self, req, fp, code, msg, headers):
294 294 """Enforces that any authentication performed is HTTP Basic
295 295 Authentication. No authentication is also acceptable.
296 296 """
297 297 authreq = headers.get('www-authenticate', None)
298 298 if authreq:
299 299 scheme = authreq.split()[0]
300 300
301 301 if scheme.lower() != 'basic':
302 302 msg = _(b'the server must support Basic Authentication')
303 303 raise util.urlerr.httperror(
304 304 req.get_full_url(),
305 305 code,
306 306 encoding.strfromlocal(msg),
307 307 headers,
308 308 fp,
309 309 )
310 310 return None
311 311
312 312
313 313 class _gitlfsremote:
314 314 def __init__(self, repo, url):
315 315 ui = repo.ui
316 316 self.ui = ui
317 317 baseurl, authinfo = url.authinfo()
318 318 self.baseurl = baseurl.rstrip(b'/')
319 319 useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
320 320 if not useragent:
321 321 useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
322 322 self.urlopener = urlmod.opener(ui, authinfo, useragent)
323 323 self.urlopener.add_handler(lfsauthhandler())
324 324 self.retry = ui.configint(b'lfs', b'retry')
325 325
326 326 def writebatch(self, pointers, fromstore):
327 327 """Batch upload from local to remote blobstore."""
328 328 self._batch(_deduplicate(pointers), fromstore, b'upload')
329 329
330 330 def readbatch(self, pointers, tostore):
331 331 """Batch download from remote to local blostore."""
332 332 self._batch(_deduplicate(pointers), tostore, b'download')
333 333
334 334 def _batchrequest(self, pointers, action):
335 335 """Get metadata about objects pointed by pointers for given action
336 336
337 337 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
338 338 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
339 339 """
340 340 objects = [
341 341 {'oid': pycompat.strurl(p.oid()), 'size': p.size()}
342 342 for p in pointers
343 343 ]
344 344 requestdata = pycompat.bytesurl(
345 345 json.dumps(
346 346 {
347 347 'objects': objects,
348 348 'operation': pycompat.strurl(action),
349 349 }
350 350 )
351 351 )
352 352 url = b'%s/objects/batch' % self.baseurl
353 353 batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
354 354 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
355 355 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
356 356 try:
357 357 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
358 358 rawjson = rsp.read()
359 359 except util.urlerr.httperror as ex:
360 360 hints = {
361 361 400: _(
362 362 b'check that lfs serving is enabled on %s and "%s" is '
363 363 b'supported'
364 364 )
365 365 % (self.baseurl, action),
366 366 404: _(b'the "lfs.url" config may be used to override %s')
367 367 % self.baseurl,
368 368 }
369 369 hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
370 370 raise LfsRemoteError(
371 371 _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
372 372 hint=hint,
373 373 )
374 374 except util.urlerr.urlerror as ex:
375 375 hint = (
376 376 _(b'the "lfs.url" config may be used to override %s')
377 377 % self.baseurl
378 378 )
379 379 raise LfsRemoteError(
380 380 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
381 381 )
382 382 try:
383 383 response = pycompat.json_loads(rawjson)
384 384 except ValueError:
385 385 raise LfsRemoteError(
386 386 _(b'LFS server returns invalid JSON: %s')
387 387 % rawjson.encode("utf-8")
388 388 )
389 389
390 390 if self.ui.debugflag:
391 391 self.ui.debug(b'Status: %d\n' % rsp.status)
392 392 # lfs-test-server and hg serve return headers in different order
393 393 headers = pycompat.bytestr(rsp.info()).strip()
394 394 self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
395 395
396 396 if 'objects' in response:
397 397 response['objects'] = sorted(
398 398 response['objects'], key=lambda p: p['oid']
399 399 )
400 400 self.ui.debug(
401 401 b'%s\n'
402 402 % pycompat.bytesurl(
403 403 json.dumps(
404 404 response,
405 405 indent=2,
406 406 separators=('', ': '),
407 407 sort_keys=True,
408 408 )
409 409 )
410 410 )
411 411
412 412 def encodestr(x):
413 413 if isinstance(x, str):
414 414 return x.encode('utf-8')
415 415 return x
416 416
417 417 return pycompat.rapply(encodestr, response)
418 418
419 419 def _checkforservererror(self, pointers, responses, action):
420 420 """Scans errors from objects
421 421
422 422 Raises LfsRemoteError if any objects have an error"""
423 423 for response in responses:
424 424 # The server should return 404 when objects cannot be found. Some
425 425 # server implementation (ex. lfs-test-server) does not set "error"
426 426 # but just removes "download" from "actions". Treat that case
427 427 # as the same as 404 error.
428 428 if b'error' not in response:
429 429 if action == b'download' and action not in response.get(
430 430 b'actions', []
431 431 ):
432 432 code = 404
433 433 else:
434 434 continue
435 435 else:
436 436 # An error dict without a code doesn't make much sense, so
437 437 # treat as a server error.
438 438 code = response.get(b'error').get(b'code', 500)
439 439
440 440 ptrmap = {p.oid(): p for p in pointers}
441 441 p = ptrmap.get(response[b'oid'], None)
442 442 if p:
443 443 filename = getattr(p, 'filename', b'unknown')
444 444 errors = {
445 445 404: b'The object does not exist',
446 446 410: b'The object was removed by the owner',
447 447 422: b'Validation error',
448 448 500: b'Internal server error',
449 449 }
450 450 msg = errors.get(code, b'status code %d' % code)
451 451 raise LfsRemoteError(
452 452 _(b'LFS server error for "%s": %s') % (filename, msg)
453 453 )
454 454 else:
455 455 raise LfsRemoteError(
456 456 _(b'LFS server error. Unsolicited response for oid %s')
457 457 % response[b'oid']
458 458 )
459 459
460 460 def _extractobjects(self, response, pointers, action):
461 461 """extract objects from response of the batch API
462 462
463 463 response: parsed JSON object returned by batch API
464 464 return response['objects'] filtered by action
465 465 raise if any object has an error
466 466 """
467 467 # Scan errors from objects - fail early
468 468 objects = response.get(b'objects', [])
469 469 self._checkforservererror(pointers, objects, action)
470 470
471 471 # Filter objects with given action. Practically, this skips uploading
472 472 # objects which exist in the server.
473 473 filteredobjects = [
474 474 o for o in objects if action in o.get(b'actions', [])
475 475 ]
476 476
477 477 return filteredobjects
478 478
479 479 def _basictransfer(self, obj, action, localstore):
480 480 """Download or upload a single object using basic transfer protocol
481 481
482 482 obj: dict, an object description returned by batch API
483 483 action: string, one of ['upload', 'download']
484 484 localstore: blobstore.local
485 485
486 486 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
487 487 basic-transfers.md
488 488 """
489 489 oid = obj[b'oid']
490 490 href = obj[b'actions'][action].get(b'href')
491 491 headers = obj[b'actions'][action].get(b'header', {}).items()
492 492
493 493 request = util.urlreq.request(pycompat.strurl(href))
494 494 if action == b'upload':
495 495 # If uploading blobs, read data from local blobstore.
496 496 if not localstore.verify(oid):
497 497 raise error.Abort(
498 498 _(b'detected corrupt lfs object: %s') % oid,
499 499 hint=_(b'run hg verify'),
500 500 )
501 501
502 502 for k, v in headers:
503 503 request.add_header(pycompat.strurl(k), pycompat.strurl(v))
504 504
505 505 try:
506 506 if action == b'upload':
507 507 request.data = lfsuploadfile(self.ui, localstore.path(oid))
508 508 request.get_method = lambda: 'PUT'
509 509 request.add_header('Content-Type', 'application/octet-stream')
510 510 request.add_header('Content-Length', request.data.length)
511 511
512 512 with contextlib.closing(self.urlopener.open(request)) as res:
513 513 contentlength = res.info().get(b"content-length")
514 514 ui = self.ui # Shorten debug lines
515 515 if self.ui.debugflag:
516 516 ui.debug(b'Status: %d\n' % res.status)
517 517 # lfs-test-server and hg serve return headers in different
518 518 # order
519 519 headers = pycompat.bytestr(res.info()).strip()
520 520 ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
521 521
522 522 if action == b'download':
523 523 # If downloading blobs, store downloaded data to local
524 524 # blobstore
525 525 localstore.download(oid, res, contentlength)
526 526 else:
527 527 blocks = []
528 528 while True:
529 529 data = res.read(1048576)
530 530 if not data:
531 531 break
532 532 blocks.append(data)
533 533
534 534 response = b"".join(blocks)
535 535 if response:
536 536 ui.debug(b'lfs %s response: %s' % (action, response))
537 537 except util.urlerr.httperror as ex:
538 538 if self.ui.debugflag:
539 539 self.ui.debug(
540 540 b'%s: %s\n' % (oid, ex.read())
541 541 ) # XXX: also bytes?
542 542 raise LfsRemoteError(
543 543 _(b'LFS HTTP error: %s (oid=%s, action=%s)')
544 544 % (stringutil.forcebytestr(ex), oid, action)
545 545 )
546 546 except util.urlerr.urlerror as ex:
547 547 hint = _(b'attempted connection to %s') % pycompat.bytesurl(
548 548 util.urllibcompat.getfullurl(request)
549 549 )
550 550 raise LfsRemoteError(
551 551 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
552 552 )
553 553 finally:
554 554 if request.data:
555 555 request.data.close()
556 556
557 557 def _batch(self, pointers, localstore, action):
558 558 if action not in [b'upload', b'download']:
559 559 raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
560 560
561 561 response = self._batchrequest(pointers, action)
562 562 objects = self._extractobjects(response, pointers, action)
563 563 total = sum(x.get(b'size', 0) for x in objects)
564 564 sizes = {}
565 565 for obj in objects:
566 566 sizes[obj.get(b'oid')] = obj.get(b'size', 0)
567 567 topic = {
568 568 b'upload': _(b'lfs uploading'),
569 569 b'download': _(b'lfs downloading'),
570 570 }[action]
571 571 if len(objects) > 1:
572 572 self.ui.note(
573 573 _(b'lfs: need to transfer %d objects (%s)\n')
574 574 % (len(objects), util.bytecount(total))
575 575 )
576 576
577 577 def transfer(chunk):
578 578 for obj in chunk:
579 579 objsize = obj.get(b'size', 0)
580 580 if self.ui.verbose:
581 581 if action == b'download':
582 582 msg = _(b'lfs: downloading %s (%s)\n')
583 583 elif action == b'upload':
584 584 msg = _(b'lfs: uploading %s (%s)\n')
585 585 self.ui.note(
586 586 msg % (obj.get(b'oid'), util.bytecount(objsize))
587 587 )
588 588 retry = self.retry
589 589 while True:
590 590 try:
591 591 self._basictransfer(obj, action, localstore)
592 592 yield 1, obj.get(b'oid')
593 593 break
594 594 except socket.error as ex:
595 595 if retry > 0:
596 596 self.ui.note(
597 597 _(b'lfs: failed: %r (remaining retry %d)\n')
598 598 % (stringutil.forcebytestr(ex), retry)
599 599 )
600 600 retry -= 1
601 601 continue
602 602 raise
603 603
604 604 # Until https multiplexing gets sorted out. It's not clear if
605 605 # ConnectionManager.set_ready() is externally synchronized for thread
606 606 # safety with Windows workers.
607 607 if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
608 608 # The POSIX workers are forks of this process, so before spinning
609 609 # them up, close all pooled connections. Otherwise, there's no way
610 610 # to coordinate between them about who is using what, and the
611 611 # transfers will get corrupted.
612 612 #
613 613 # TODO: add a function to keepalive.ConnectionManager to mark all
614 614 # ready connections as in use, and roll that back after the fork?
615 615 # That would allow the existing pool of connections in this process
616 616 # to be preserved.
617 617 def prefork():
618 618 for h in self.urlopener.handlers:
619 619 getattr(h, "close_all", lambda: None)()
620 620
621 621 oids = worker.worker(
622 622 self.ui,
623 623 0.1,
624 624 transfer,
625 625 (),
626 626 sorted(objects, key=lambda o: o.get(b'oid')),
627 627 prefork=prefork,
628 628 )
629 629 else:
630 630 oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
631 631
632 632 with self.ui.makeprogress(
633 633 topic, unit=_(b"bytes"), total=total
634 634 ) as progress:
635 635 progress.update(0)
636 636 processed = 0
637 637 blobs = 0
638 638 for _one, oid in oids:
639 639 processed += sizes[oid]
640 640 blobs += 1
641 641 progress.update(processed)
642 642 self.ui.note(_(b'lfs: processed: %s\n') % oid)
643 643
644 644 if blobs > 0:
645 645 if action == b'upload':
646 646 self.ui.status(
647 647 _(b'lfs: uploaded %d files (%s)\n')
648 648 % (blobs, util.bytecount(processed))
649 649 )
650 650 elif action == b'download':
651 651 self.ui.status(
652 652 _(b'lfs: downloaded %d files (%s)\n')
653 653 % (blobs, util.bytecount(processed))
654 654 )
655 655
656 656 def __del__(self):
657 657 # copied from mercurial/httppeer.py
658 658 urlopener = getattr(self, 'urlopener', None)
659 659 if urlopener:
660 660 for h in urlopener.handlers:
661 661 h.close()
662 662 getattr(h, "close_all", lambda: None)()
663 663
664 664
665 665 class _dummyremote:
666 666 """Dummy store storing blobs to temp directory."""
667 667
668 668 def __init__(self, repo, url):
669 669 fullpath = repo.vfs.join(b'lfs', url.path)
670 670 self.vfs = lfsvfs(fullpath)
671 671
672 672 def writebatch(self, pointers, fromstore):
673 673 for p in _deduplicate(pointers):
674 674 content = fromstore.read(p.oid(), verify=True)
675 675 with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
676 676 fp.write(content)
677 677
678 678 def readbatch(self, pointers, tostore):
679 679 for p in _deduplicate(pointers):
680 680 with self.vfs(p.oid(), b'rb') as fp:
681 681 tostore.download(p.oid(), fp, None)
682 682
683 683
684 684 class _nullremote:
685 685 """Null store storing blobs to /dev/null."""
686 686
687 687 def __init__(self, repo, url):
688 688 pass
689 689
690 690 def writebatch(self, pointers, fromstore):
691 691 pass
692 692
693 693 def readbatch(self, pointers, tostore):
694 694 pass
695 695
696 696
697 697 class _promptremote:
698 698 """Prompt user to set lfs.url when accessed."""
699 699
700 700 def __init__(self, repo, url):
701 701 pass
702 702
703 703 def writebatch(self, pointers, fromstore, ui=None):
704 704 self._prompt()
705 705
706 706 def readbatch(self, pointers, tostore, ui=None):
707 707 self._prompt()
708 708
709 709 def _prompt(self):
710 710 raise error.Abort(_(b'lfs.url needs to be configured'))
711 711
712 712
713 713 _storemap = {
714 714 b'https': _gitlfsremote,
715 715 b'http': _gitlfsremote,
716 716 b'file': _dummyremote,
717 717 b'null': _nullremote,
718 718 None: _promptremote,
719 719 }
720 720
721 721
722 722 def _deduplicate(pointers):
723 723 """Remove any duplicate oids that exist in the list"""
724 724 reduced = util.sortdict()
725 725 for p in pointers:
726 726 reduced[p.oid()] = p
727 727 return reduced.values()
728 728
729 729
730 730 def _verify(oid, content):
731 731 realoid = hex(hashlib.sha256(content).digest())
732 732 if realoid != oid:
733 733 raise LfsCorruptionError(
734 734 _(b'detected corrupt lfs object: %s') % oid,
735 735 hint=_(b'run hg verify'),
736 736 )
737 737
738 738
739 739 def remote(repo, remote=None):
740 740 """remotestore factory. return a store in _storemap depending on config
741 741
742 742 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
743 743 infer the endpoint, based on the remote repository using the same path
744 744 adjustments as git. As an extension, 'http' is supported as well so that
745 745 ``hg serve`` works out of the box.
746 746
747 747 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
748 748 """
749 749 lfsurl = repo.ui.config(b'lfs', b'url')
750 750 url = urlutil.url(lfsurl or b'')
751 751 if lfsurl is None:
752 752 if remote:
753 753 path = remote
754 elif util.safehasattr(repo, '_subtoppath'):
754 elif hasattr(repo, '_subtoppath'):
755 755 # The pull command sets this during the optional update phase, which
756 756 # tells exactly where the pull originated, whether 'paths.default'
757 757 # or explicit.
758 758 path = repo._subtoppath
759 759 else:
760 760 # TODO: investigate 'paths.remote:lfsurl' style path customization,
761 761 # and fall back to inferring from 'paths.remote' if unspecified.
762 762 path = repo.ui.config(b'paths', b'default') or b''
763 763
764 764 defaulturl = urlutil.url(path)
765 765
766 766 # TODO: support local paths as well.
767 767 # TODO: consider the ssh -> https transformation that git applies
768 768 if defaulturl.scheme in (b'http', b'https'):
769 769 if defaulturl.path and defaulturl.path[:-1] != b'/':
770 770 defaulturl.path += b'/'
771 771 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
772 772
773 773 url = urlutil.url(bytes(defaulturl))
774 774 repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
775 775
776 776 scheme = url.scheme
777 777 if scheme not in _storemap:
778 778 raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
779 779 return _storemap[scheme](repo, url)
780 780
781 781
782 782 class LfsRemoteError(error.StorageError):
783 783 pass
784 784
785 785
786 786 class LfsCorruptionError(error.Abort):
787 787 """Raised when a corrupt blob is detected, aborting an operation
788 788
789 789 It exists to allow specialized handling on the server side."""
@@ -1,369 +1,368 b''
1 1 # wireprotolfsserver.py - lfs protocol server side implementation
2 2 #
3 3 # Copyright 2018 Matt Harbison <matt_harbison@yahoo.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import datetime
10 10 import errno
11 11 import json
12 12 import traceback
13 13
14 14 from mercurial.hgweb import common as hgwebcommon
15 15
16 16 from mercurial import (
17 17 exthelper,
18 18 pycompat,
19 util,
20 19 wireprotoserver,
21 20 )
22 21
23 22 from . import blobstore
24 23
25 24 HTTP_OK = hgwebcommon.HTTP_OK
26 25 HTTP_CREATED = hgwebcommon.HTTP_CREATED
27 26 HTTP_BAD_REQUEST = hgwebcommon.HTTP_BAD_REQUEST
28 27 HTTP_NOT_FOUND = hgwebcommon.HTTP_NOT_FOUND
29 28 HTTP_METHOD_NOT_ALLOWED = hgwebcommon.HTTP_METHOD_NOT_ALLOWED
30 29 HTTP_NOT_ACCEPTABLE = hgwebcommon.HTTP_NOT_ACCEPTABLE
31 30 HTTP_UNSUPPORTED_MEDIA_TYPE = hgwebcommon.HTTP_UNSUPPORTED_MEDIA_TYPE
32 31
33 32 eh = exthelper.exthelper()
34 33
35 34
36 35 @eh.wrapfunction(wireprotoserver, 'handlewsgirequest')
37 36 def handlewsgirequest(orig, rctx, req, res, checkperm):
38 37 """Wrap wireprotoserver.handlewsgirequest() to possibly process an LFS
39 38 request if it is left unprocessed by the wrapped method.
40 39 """
41 40 if orig(rctx, req, res, checkperm):
42 41 return True
43 42
44 43 if not rctx.repo.ui.configbool(b'experimental', b'lfs.serve'):
45 44 return False
46 45
47 if not util.safehasattr(rctx.repo.svfs, 'lfslocalblobstore'):
46 if not hasattr(rctx.repo.svfs, 'lfslocalblobstore'):
48 47 return False
49 48
50 49 if not req.dispatchpath:
51 50 return False
52 51
53 52 try:
54 53 if req.dispatchpath == b'.git/info/lfs/objects/batch':
55 54 checkperm(rctx, req, b'pull')
56 55 return _processbatchrequest(rctx.repo, req, res)
57 56 # TODO: reserve and use a path in the proposed http wireprotocol /api/
58 57 # namespace?
59 58 elif req.dispatchpath.startswith(b'.hg/lfs/objects'):
60 59 return _processbasictransfer(
61 60 rctx.repo, req, res, lambda perm: checkperm(rctx, req, perm)
62 61 )
63 62 return False
64 63 except hgwebcommon.ErrorResponse as e:
65 64 # XXX: copied from the handler surrounding wireprotoserver._callhttp()
66 65 # in the wrapped function. Should this be moved back to hgweb to
67 66 # be a common handler?
68 67 for k, v in e.headers:
69 68 res.headers[k] = v
70 69 res.status = hgwebcommon.statusmessage(e.code, pycompat.bytestr(e))
71 70 res.setbodybytes(b'0\n%s\n' % pycompat.bytestr(e))
72 71 return True
73 72
74 73
75 74 def _sethttperror(res, code, message=None):
76 75 res.status = hgwebcommon.statusmessage(code, message=message)
77 76 res.headers[b'Content-Type'] = b'text/plain; charset=utf-8'
78 77 res.setbodybytes(b'')
79 78
80 79
81 80 def _logexception(req):
82 81 """Write information about the current exception to wsgi.errors."""
83 82 tb = pycompat.sysbytes(traceback.format_exc())
84 83 errorlog = req.rawenv[b'wsgi.errors']
85 84
86 85 uri = b''
87 86 if req.apppath:
88 87 uri += req.apppath
89 88 uri += b'/' + req.dispatchpath
90 89
91 90 errorlog.write(
92 91 b"Exception happened while processing request '%s':\n%s" % (uri, tb)
93 92 )
94 93
95 94
96 95 def _processbatchrequest(repo, req, res):
97 96 """Handle a request for the Batch API, which is the gateway to granting file
98 97 access.
99 98
100 99 https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
101 100 """
102 101
103 102 # Mercurial client request:
104 103 #
105 104 # HOST: localhost:$HGPORT
106 105 # ACCEPT: application/vnd.git-lfs+json
107 106 # ACCEPT-ENCODING: identity
108 107 # USER-AGENT: git-lfs/2.3.4 (Mercurial 4.5.2+1114-f48b9754f04c+20180316)
109 108 # Content-Length: 125
110 109 # Content-Type: application/vnd.git-lfs+json
111 110 #
112 111 # {
113 112 # "objects": [
114 113 # {
115 114 # "oid": "31cf...8e5b"
116 115 # "size": 12
117 116 # }
118 117 # ]
119 118 # "operation": "upload"
120 119 # }
121 120
122 121 if req.method != b'POST':
123 122 _sethttperror(res, HTTP_METHOD_NOT_ALLOWED)
124 123 return True
125 124
126 125 if req.headers[b'Content-Type'] != b'application/vnd.git-lfs+json':
127 126 _sethttperror(res, HTTP_UNSUPPORTED_MEDIA_TYPE)
128 127 return True
129 128
130 129 if req.headers[b'Accept'] != b'application/vnd.git-lfs+json':
131 130 _sethttperror(res, HTTP_NOT_ACCEPTABLE)
132 131 return True
133 132
134 133 # XXX: specify an encoding?
135 134 lfsreq = pycompat.json_loads(req.bodyfh.read())
136 135
137 136 # If no transfer handlers are explicitly requested, 'basic' is assumed.
138 137 if 'basic' not in lfsreq.get('transfers', ['basic']):
139 138 _sethttperror(
140 139 res,
141 140 HTTP_BAD_REQUEST,
142 141 b'Only the basic LFS transfer handler is supported',
143 142 )
144 143 return True
145 144
146 145 operation = lfsreq.get('operation')
147 146 operation = pycompat.bytestr(operation)
148 147
149 148 if operation not in (b'upload', b'download'):
150 149 _sethttperror(
151 150 res,
152 151 HTTP_BAD_REQUEST,
153 152 b'Unsupported LFS transfer operation: %s' % operation,
154 153 )
155 154 return True
156 155
157 156 localstore = repo.svfs.lfslocalblobstore
158 157
159 158 objects = [
160 159 p
161 160 for p in _batchresponseobjects(
162 161 req, lfsreq.get('objects', []), operation, localstore
163 162 )
164 163 ]
165 164
166 165 rsp = {
167 166 'transfer': 'basic',
168 167 'objects': objects,
169 168 }
170 169
171 170 res.status = hgwebcommon.statusmessage(HTTP_OK)
172 171 res.headers[b'Content-Type'] = b'application/vnd.git-lfs+json'
173 172 res.setbodybytes(pycompat.bytestr(json.dumps(rsp)))
174 173
175 174 return True
176 175
177 176
178 177 def _batchresponseobjects(req, objects, action, store):
179 178 """Yield one dictionary of attributes for the Batch API response for each
180 179 object in the list.
181 180
182 181 req: The parsedrequest for the Batch API request
183 182 objects: The list of objects in the Batch API object request list
184 183 action: 'upload' or 'download'
185 184 store: The local blob store for servicing requests"""
186 185
187 186 # Successful lfs-test-server response to solict an upload:
188 187 # {
189 188 # u'objects': [{
190 189 # u'size': 12,
191 190 # u'oid': u'31cf...8e5b',
192 191 # u'actions': {
193 192 # u'upload': {
194 193 # u'href': u'http://localhost:$HGPORT/objects/31cf...8e5b',
195 194 # u'expires_at': u'0001-01-01T00:00:00Z',
196 195 # u'header': {
197 196 # u'Accept': u'application/vnd.git-lfs'
198 197 # }
199 198 # }
200 199 # }
201 200 # }]
202 201 # }
203 202
204 203 # TODO: Sort out the expires_at/expires_in/authenticated keys.
205 204
206 205 for obj in objects:
207 206 # Convert unicode to ASCII to create a filesystem path
208 207 soid = obj.get('oid')
209 208 oid = soid.encode('ascii')
210 209 rsp = {
211 210 'oid': soid,
212 211 'size': obj.get('size'), # XXX: should this check the local size?
213 212 # 'authenticated': True,
214 213 }
215 214
216 215 exists = True
217 216 verifies = False
218 217
219 218 # Verify an existing file on the upload request, so that the client is
220 219 # solicited to re-upload if it corrupt locally. Download requests are
221 220 # also verified, so the error can be flagged in the Batch API response.
222 221 # (Maybe we can use this to short circuit the download for `hg verify`,
223 222 # IFF the client can assert that the remote end is an hg server.)
224 223 # Otherwise, it's potentially overkill on download, since it is also
225 224 # verified as the file is streamed to the caller.
226 225 try:
227 226 verifies = store.verify(oid)
228 227 if verifies and action == b'upload':
229 228 # The client will skip this upload, but make sure it remains
230 229 # available locally.
231 230 store.linkfromusercache(oid)
232 231 except IOError as inst:
233 232 if inst.errno != errno.ENOENT:
234 233 _logexception(req)
235 234
236 235 rsp['error'] = {
237 236 'code': 500,
238 237 'message': inst.strerror or 'Internal Server Server',
239 238 }
240 239 yield rsp
241 240 continue
242 241
243 242 exists = False
244 243
245 244 # Items are always listed for downloads. They are dropped for uploads
246 245 # IFF they already exist locally.
247 246 if action == b'download':
248 247 if not exists:
249 248 rsp['error'] = {
250 249 'code': 404,
251 250 'message': "The object does not exist",
252 251 }
253 252 yield rsp
254 253 continue
255 254
256 255 elif not verifies:
257 256 rsp['error'] = {
258 257 'code': 422, # XXX: is this the right code?
259 258 'message': "The object is corrupt",
260 259 }
261 260 yield rsp
262 261 continue
263 262
264 263 elif verifies:
265 264 yield rsp # Skip 'actions': already uploaded
266 265 continue
267 266
268 267 expiresat = datetime.datetime.now() + datetime.timedelta(minutes=10)
269 268
270 269 def _buildheader():
271 270 # The spec doesn't mention the Accept header here, but avoid
272 271 # a gratuitous deviation from lfs-test-server in the test
273 272 # output.
274 273 hdr = {'Accept': 'application/vnd.git-lfs'}
275 274
276 275 auth = req.headers.get(b'Authorization', b'')
277 276 if auth.startswith(b'Basic '):
278 277 hdr['Authorization'] = pycompat.strurl(auth)
279 278
280 279 return hdr
281 280
282 281 rsp['actions'] = {
283 282 '%s'
284 283 % pycompat.strurl(action): {
285 284 'href': pycompat.strurl(
286 285 b'%s%s/.hg/lfs/objects/%s' % (req.baseurl, req.apppath, oid)
287 286 ),
288 287 # datetime.isoformat() doesn't include the 'Z' suffix
289 288 "expires_at": expiresat.strftime('%Y-%m-%dT%H:%M:%SZ'),
290 289 'header': _buildheader(),
291 290 }
292 291 }
293 292
294 293 yield rsp
295 294
296 295
297 296 def _processbasictransfer(repo, req, res, checkperm):
298 297 """Handle a single file upload (PUT) or download (GET) action for the Basic
299 298 Transfer Adapter.
300 299
301 300 After determining if the request is for an upload or download, the access
302 301 must be checked by calling ``checkperm()`` with either 'pull' or 'upload'
303 302 before accessing the files.
304 303
305 304 https://github.com/git-lfs/git-lfs/blob/master/docs/api/basic-transfers.md
306 305 """
307 306
308 307 method = req.method
309 308 oid = req.dispatchparts[-1]
310 309 localstore = repo.svfs.lfslocalblobstore
311 310
312 311 if len(req.dispatchparts) != 4:
313 312 _sethttperror(res, HTTP_NOT_FOUND)
314 313 return True
315 314
316 315 if method == b'PUT':
317 316 checkperm(b'upload')
318 317
319 318 # TODO: verify Content-Type?
320 319
321 320 existed = localstore.has(oid)
322 321
323 322 # TODO: how to handle timeouts? The body proxy handles limiting to
324 323 # Content-Length, but what happens if a client sends less than it
325 324 # says it will?
326 325
327 326 statusmessage = hgwebcommon.statusmessage
328 327 try:
329 328 localstore.download(oid, req.bodyfh, req.headers[b'Content-Length'])
330 329 res.status = statusmessage(HTTP_OK if existed else HTTP_CREATED)
331 330 except blobstore.LfsCorruptionError:
332 331 _logexception(req)
333 332
334 333 # XXX: Is this the right code?
335 334 res.status = statusmessage(422, b'corrupt blob')
336 335
337 336 # There's no payload here, but this is the header that lfs-test-server
338 337 # sends back. This eliminates some gratuitous test output conditionals.
339 338 res.headers[b'Content-Type'] = b'text/plain; charset=utf-8'
340 339 res.setbodybytes(b'')
341 340
342 341 return True
343 342 elif method == b'GET':
344 343 checkperm(b'pull')
345 344
346 345 res.status = hgwebcommon.statusmessage(HTTP_OK)
347 346 res.headers[b'Content-Type'] = b'application/octet-stream'
348 347
349 348 try:
350 349 # TODO: figure out how to send back the file in chunks, instead of
351 350 # reading the whole thing. (Also figure out how to send back
352 351 # an error status if an IOError occurs after a partial write
353 352 # in that case. Here, everything is read before starting.)
354 353 res.setbodybytes(localstore.read(oid))
355 354 except blobstore.LfsCorruptionError:
356 355 _logexception(req)
357 356
358 357 # XXX: Is this the right code?
359 358 res.status = hgwebcommon.statusmessage(422, b'corrupt blob')
360 359 res.setbodybytes(b'')
361 360
362 361 return True
363 362 else:
364 363 _sethttperror(
365 364 res,
366 365 HTTP_METHOD_NOT_ALLOWED,
367 366 message=b'Unsupported LFS transfer method: %s' % method,
368 367 )
369 368 return True
@@ -1,545 +1,544 b''
1 1 # wrapper.py - methods wrapping core mercurial logic
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import hashlib
10 10
11 11 from mercurial.i18n import _
12 12 from mercurial.node import bin, hex, short
13 13 from mercurial.pycompat import (
14 14 getattr,
15 15 setattr,
16 16 )
17 17
18 18 from mercurial import (
19 19 bundle2,
20 20 changegroup,
21 21 cmdutil,
22 22 context,
23 23 error,
24 24 exchange,
25 25 exthelper,
26 26 localrepo,
27 27 revlog,
28 28 scmutil,
29 util,
30 29 vfs as vfsmod,
31 30 wireprotov1server,
32 31 )
33 32
34 33 from mercurial.upgrade_utils import (
35 34 actions as upgrade_actions,
36 35 engine as upgrade_engine,
37 36 )
38 37
39 38 from mercurial.interfaces import repository
40 39
41 40 from mercurial.utils import (
42 41 storageutil,
43 42 stringutil,
44 43 )
45 44
46 45 from ..largefiles import lfutil
47 46
48 47 from . import (
49 48 blobstore,
50 49 pointer,
51 50 )
52 51
53 52 eh = exthelper.exthelper()
54 53
55 54
56 55 @eh.wrapfunction(localrepo, 'makefilestorage')
57 56 def localrepomakefilestorage(orig, requirements, features, **kwargs):
58 57 if b'lfs' in requirements:
59 58 features.add(repository.REPO_FEATURE_LFS)
60 59
61 60 return orig(requirements=requirements, features=features, **kwargs)
62 61
63 62
64 63 @eh.wrapfunction(changegroup, 'allsupportedversions')
65 64 def allsupportedversions(orig, ui):
66 65 versions = orig(ui)
67 66 versions.add(b'03')
68 67 return versions
69 68
70 69
71 70 @eh.wrapfunction(wireprotov1server, '_capabilities')
72 71 def _capabilities(orig, repo, proto):
73 72 '''Wrap server command to announce lfs server capability'''
74 73 caps = orig(repo, proto)
75 if util.safehasattr(repo.svfs, 'lfslocalblobstore'):
74 if hasattr(repo.svfs, 'lfslocalblobstore'):
76 75 # Advertise a slightly different capability when lfs is *required*, so
77 76 # that the client knows it MUST load the extension. If lfs is not
78 77 # required on the server, there's no reason to autoload the extension
79 78 # on the client.
80 79 if b'lfs' in repo.requirements:
81 80 caps.append(b'lfs-serve')
82 81
83 82 caps.append(b'lfs')
84 83 return caps
85 84
86 85
87 86 def bypasscheckhash(self, text):
88 87 return False
89 88
90 89
91 90 def readfromstore(self, text):
92 91 """Read filelog content from local blobstore transform for flagprocessor.
93 92
94 93 Default tranform for flagprocessor, returning contents from blobstore.
95 94 Returns a 2-typle (text, validatehash) where validatehash is True as the
96 95 contents of the blobstore should be checked using checkhash.
97 96 """
98 97 p = pointer.deserialize(text)
99 98 oid = p.oid()
100 99 store = self.opener.lfslocalblobstore
101 100 if not store.has(oid):
102 101 p.filename = self.filename
103 102 self.opener.lfsremoteblobstore.readbatch([p], store)
104 103
105 104 # The caller will validate the content
106 105 text = store.read(oid, verify=False)
107 106
108 107 # pack hg filelog metadata
109 108 hgmeta = {}
110 109 for k in p.keys():
111 110 if k.startswith(b'x-hg-'):
112 111 name = k[len(b'x-hg-') :]
113 112 hgmeta[name] = p[k]
114 113 if hgmeta or text.startswith(b'\1\n'):
115 114 text = storageutil.packmeta(hgmeta, text)
116 115
117 116 return (text, True)
118 117
119 118
120 119 def writetostore(self, text):
121 120 # hg filelog metadata (includes rename, etc)
122 121 hgmeta, offset = storageutil.parsemeta(text)
123 122 if offset and offset > 0:
124 123 # lfs blob does not contain hg filelog metadata
125 124 text = text[offset:]
126 125
127 126 # git-lfs only supports sha256
128 127 oid = hex(hashlib.sha256(text).digest())
129 128 self.opener.lfslocalblobstore.write(oid, text)
130 129
131 130 # replace contents with metadata
132 131 longoid = b'sha256:%s' % oid
133 132 metadata = pointer.gitlfspointer(oid=longoid, size=b'%d' % len(text))
134 133
135 134 # by default, we expect the content to be binary. however, LFS could also
136 135 # be used for non-binary content. add a special entry for non-binary data.
137 136 # this will be used by filectx.isbinary().
138 137 if not stringutil.binary(text):
139 138 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
140 139 metadata[b'x-is-binary'] = b'0'
141 140
142 141 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
143 142 if hgmeta is not None:
144 143 for k, v in hgmeta.items():
145 144 metadata[b'x-hg-%s' % k] = v
146 145
147 146 rawtext = metadata.serialize()
148 147 return (rawtext, False)
149 148
150 149
151 150 def _islfs(rlog, node=None, rev=None):
152 151 if rev is None:
153 152 if node is None:
154 153 # both None - likely working copy content where node is not ready
155 154 return False
156 155 rev = rlog.rev(node)
157 156 else:
158 157 node = rlog.node(rev)
159 158 if node == rlog.nullid:
160 159 return False
161 160 flags = rlog.flags(rev)
162 161 return bool(flags & revlog.REVIDX_EXTSTORED)
163 162
164 163
165 164 # Wrapping may also be applied by remotefilelog
166 165 def filelogaddrevision(
167 166 orig,
168 167 self,
169 168 text,
170 169 transaction,
171 170 link,
172 171 p1,
173 172 p2,
174 173 cachedelta=None,
175 174 node=None,
176 175 flags=revlog.REVIDX_DEFAULT_FLAGS,
177 176 **kwds
178 177 ):
179 178 # The matcher isn't available if reposetup() wasn't called.
180 179 lfstrack = self._revlog.opener.options.get(b'lfstrack')
181 180
182 181 if lfstrack:
183 182 textlen = len(text)
184 183 # exclude hg rename meta from file size
185 184 meta, offset = storageutil.parsemeta(text)
186 185 if offset:
187 186 textlen -= offset
188 187
189 188 if lfstrack(self._revlog.filename, textlen):
190 189 flags |= revlog.REVIDX_EXTSTORED
191 190
192 191 return orig(
193 192 self,
194 193 text,
195 194 transaction,
196 195 link,
197 196 p1,
198 197 p2,
199 198 cachedelta=cachedelta,
200 199 node=node,
201 200 flags=flags,
202 201 **kwds
203 202 )
204 203
205 204
206 205 # Wrapping may also be applied by remotefilelog
207 206 def filelogrenamed(orig, self, node):
208 207 if _islfs(self._revlog, node):
209 208 rawtext = self._revlog.rawdata(node)
210 209 if not rawtext:
211 210 return False
212 211 metadata = pointer.deserialize(rawtext)
213 212 if b'x-hg-copy' in metadata and b'x-hg-copyrev' in metadata:
214 213 return metadata[b'x-hg-copy'], bin(metadata[b'x-hg-copyrev'])
215 214 else:
216 215 return False
217 216 return orig(self, node)
218 217
219 218
220 219 # Wrapping may also be applied by remotefilelog
221 220 def filelogsize(orig, self, rev):
222 221 if _islfs(self._revlog, rev=rev):
223 222 # fast path: use lfs metadata to answer size
224 223 rawtext = self._revlog.rawdata(rev)
225 224 metadata = pointer.deserialize(rawtext)
226 225 return int(metadata[b'size'])
227 226 return orig(self, rev)
228 227
229 228
230 229 @eh.wrapfunction(revlog, '_verify_revision')
231 230 def _verify_revision(orig, rl, skipflags, state, node):
232 231 if _islfs(rl, node=node):
233 232 rawtext = rl.rawdata(node)
234 233 metadata = pointer.deserialize(rawtext)
235 234
236 235 # Don't skip blobs that are stored locally, as local verification is
237 236 # relatively cheap and there's no other way to verify the raw data in
238 237 # the revlog.
239 238 if rl.opener.lfslocalblobstore.has(metadata.oid()):
240 239 skipflags &= ~revlog.REVIDX_EXTSTORED
241 240 elif skipflags & revlog.REVIDX_EXTSTORED:
242 241 # The wrapped method will set `skipread`, but there's enough local
243 242 # info to check renames.
244 243 state[b'safe_renamed'].add(node)
245 244
246 245 orig(rl, skipflags, state, node)
247 246
248 247
249 248 @eh.wrapfunction(context.basefilectx, 'cmp')
250 249 def filectxcmp(orig, self, fctx):
251 250 """returns True if text is different than fctx"""
252 251 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
253 252 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
254 253 # fast path: check LFS oid
255 254 p1 = pointer.deserialize(self.rawdata())
256 255 p2 = pointer.deserialize(fctx.rawdata())
257 256 return p1.oid() != p2.oid()
258 257 return orig(self, fctx)
259 258
260 259
261 260 @eh.wrapfunction(context.basefilectx, 'isbinary')
262 261 def filectxisbinary(orig, self):
263 262 if self.islfs():
264 263 # fast path: use lfs metadata to answer isbinary
265 264 metadata = pointer.deserialize(self.rawdata())
266 265 # if lfs metadata says nothing, assume it's binary by default
267 266 return bool(int(metadata.get(b'x-is-binary', 1)))
268 267 return orig(self)
269 268
270 269
271 270 def filectxislfs(self):
272 271 return _islfs(self.filelog()._revlog, self.filenode())
273 272
274 273
275 274 @eh.wrapfunction(cmdutil, '_updatecatformatter')
276 275 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
277 276 orig(fm, ctx, matcher, path, decode)
278 277 fm.data(rawdata=ctx[path].rawdata())
279 278
280 279
281 280 @eh.wrapfunction(scmutil, 'wrapconvertsink')
282 281 def convertsink(orig, sink):
283 282 sink = orig(sink)
284 283 if sink.repotype == b'hg':
285 284
286 285 class lfssink(sink.__class__):
287 286 def putcommit(
288 287 self,
289 288 files,
290 289 copies,
291 290 parents,
292 291 commit,
293 292 source,
294 293 revmap,
295 294 full,
296 295 cleanp2,
297 296 ):
298 297 pc = super(lfssink, self).putcommit
299 298 node = pc(
300 299 files,
301 300 copies,
302 301 parents,
303 302 commit,
304 303 source,
305 304 revmap,
306 305 full,
307 306 cleanp2,
308 307 )
309 308
310 309 if b'lfs' not in self.repo.requirements:
311 310 ctx = self.repo[node]
312 311
313 312 # The file list may contain removed files, so check for
314 313 # membership before assuming it is in the context.
315 314 if any(f in ctx and ctx[f].islfs() for f, n in files):
316 315 self.repo.requirements.add(b'lfs')
317 316 scmutil.writereporequirements(self.repo)
318 317
319 318 return node
320 319
321 320 sink.__class__ = lfssink
322 321
323 322 return sink
324 323
325 324
326 325 # bundlerepo uses "vfsmod.readonlyvfs(othervfs)", we need to make sure lfs
327 326 # options and blob stores are passed from othervfs to the new readonlyvfs.
328 327 @eh.wrapfunction(vfsmod.readonlyvfs, '__init__')
329 328 def vfsinit(orig, self, othervfs):
330 329 orig(self, othervfs)
331 330 # copy lfs related options
332 331 for k, v in othervfs.options.items():
333 332 if k.startswith(b'lfs'):
334 333 self.options[k] = v
335 334 # also copy lfs blobstores. note: this can run before reposetup, so lfs
336 335 # blobstore attributes are not always ready at this time.
337 336 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
338 if util.safehasattr(othervfs, name):
337 if hasattr(othervfs, name):
339 338 setattr(self, name, getattr(othervfs, name))
340 339
341 340
342 341 def _prefetchfiles(repo, revmatches):
343 342 """Ensure that required LFS blobs are present, fetching them as a group if
344 343 needed."""
345 if not util.safehasattr(repo.svfs, 'lfslocalblobstore'):
344 if not hasattr(repo.svfs, 'lfslocalblobstore'):
346 345 return
347 346
348 347 pointers = []
349 348 oids = set()
350 349 localstore = repo.svfs.lfslocalblobstore
351 350
352 351 for rev, match in revmatches:
353 352 ctx = repo[rev]
354 353 for f in ctx.walk(match):
355 354 p = pointerfromctx(ctx, f)
356 355 if p and p.oid() not in oids and not localstore.has(p.oid()):
357 356 p.filename = f
358 357 pointers.append(p)
359 358 oids.add(p.oid())
360 359
361 360 if pointers:
362 361 # Recalculating the repo store here allows 'paths.default' that is set
363 362 # on the repo by a clone command to be used for the update.
364 363 blobstore.remote(repo).readbatch(pointers, localstore)
365 364
366 365
367 366 def _canskipupload(repo):
368 367 # Skip if this hasn't been passed to reposetup()
369 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
368 if not hasattr(repo.svfs, 'lfsremoteblobstore'):
370 369 return True
371 370
372 371 # if remotestore is a null store, upload is a no-op and can be skipped
373 372 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
374 373
375 374
376 375 def candownload(repo):
377 376 # Skip if this hasn't been passed to reposetup()
378 if not util.safehasattr(repo.svfs, 'lfsremoteblobstore'):
377 if not hasattr(repo.svfs, 'lfsremoteblobstore'):
379 378 return False
380 379
381 380 # if remotestore is a null store, downloads will lead to nothing
382 381 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
383 382
384 383
385 384 def uploadblobsfromrevs(repo, revs):
386 385 """upload lfs blobs introduced by revs"""
387 386 if _canskipupload(repo):
388 387 return
389 388 pointers = extractpointers(repo, revs)
390 389 uploadblobs(repo, pointers)
391 390
392 391
393 392 def prepush(pushop):
394 393 """Prepush hook.
395 394
396 395 Read through the revisions to push, looking for filelog entries that can be
397 396 deserialized into metadata so that we can block the push on their upload to
398 397 the remote blobstore.
399 398 """
400 399 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
401 400
402 401
403 402 @eh.wrapfunction(exchange, 'push')
404 403 def push(orig, repo, remote, *args, **kwargs):
405 404 """bail on push if the extension isn't enabled on remote when needed, and
406 405 update the remote store based on the destination path."""
407 406 if b'lfs' in repo.requirements:
408 407 # If the remote peer is for a local repo, the requirement tests in the
409 408 # base class method enforce lfs support. Otherwise, some revisions in
410 409 # this repo use lfs, and the remote repo needs the extension loaded.
411 410 if not remote.local() and not remote.capable(b'lfs'):
412 411 # This is a copy of the message in exchange.push() when requirements
413 412 # are missing between local repos.
414 413 m = _(b"required features are not supported in the destination: %s")
415 414 raise error.Abort(
416 415 m % b'lfs', hint=_(b'enable the lfs extension on the server')
417 416 )
418 417
419 418 # Repositories where this extension is disabled won't have the field.
420 419 # But if there's a requirement, then the extension must be loaded AND
421 420 # there may be blobs to push.
422 421 remotestore = repo.svfs.lfsremoteblobstore
423 422 try:
424 423 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
425 424 return orig(repo, remote, *args, **kwargs)
426 425 finally:
427 426 repo.svfs.lfsremoteblobstore = remotestore
428 427 else:
429 428 return orig(repo, remote, *args, **kwargs)
430 429
431 430
432 431 # when writing a bundle via "hg bundle" command, upload related LFS blobs
433 432 @eh.wrapfunction(bundle2, 'writenewbundle')
434 433 def writenewbundle(
435 434 orig, ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
436 435 ):
437 436 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
438 437 uploadblobsfromrevs(repo, outgoing.missing)
439 438 return orig(
440 439 ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
441 440 )
442 441
443 442
444 443 def extractpointers(repo, revs):
445 444 """return a list of lfs pointers added by given revs"""
446 445 repo.ui.debug(b'lfs: computing set of blobs to upload\n')
447 446 pointers = {}
448 447
449 448 makeprogress = repo.ui.makeprogress
450 449 with makeprogress(
451 450 _(b'lfs search'), _(b'changesets'), len(revs)
452 451 ) as progress:
453 452 for r in revs:
454 453 ctx = repo[r]
455 454 for p in pointersfromctx(ctx).values():
456 455 pointers[p.oid()] = p
457 456 progress.increment()
458 457 return sorted(pointers.values(), key=lambda p: p.oid())
459 458
460 459
461 460 def pointerfromctx(ctx, f, removed=False):
462 461 """return a pointer for the named file from the given changectx, or None if
463 462 the file isn't LFS.
464 463
465 464 Optionally, the pointer for a file deleted from the context can be returned.
466 465 Since no such pointer is actually stored, and to distinguish from a non LFS
467 466 file, this pointer is represented by an empty dict.
468 467 """
469 468 _ctx = ctx
470 469 if f not in ctx:
471 470 if not removed:
472 471 return None
473 472 if f in ctx.p1():
474 473 _ctx = ctx.p1()
475 474 elif f in ctx.p2():
476 475 _ctx = ctx.p2()
477 476 else:
478 477 return None
479 478 fctx = _ctx[f]
480 479 if not _islfs(fctx.filelog()._revlog, fctx.filenode()):
481 480 return None
482 481 try:
483 482 p = pointer.deserialize(fctx.rawdata())
484 483 if ctx == _ctx:
485 484 return p
486 485 return {}
487 486 except pointer.InvalidPointer as ex:
488 487 raise error.Abort(
489 488 _(b'lfs: corrupted pointer (%s@%s): %s\n')
490 489 % (f, short(_ctx.node()), ex)
491 490 )
492 491
493 492
494 493 def pointersfromctx(ctx, removed=False):
495 494 """return a dict {path: pointer} for given single changectx.
496 495
497 496 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
498 497 stored for the path is an empty dict.
499 498 """
500 499 result = {}
501 500 m = ctx.repo().narrowmatch()
502 501
503 502 # TODO: consider manifest.fastread() instead
504 503 for f in ctx.files():
505 504 if not m(f):
506 505 continue
507 506 p = pointerfromctx(ctx, f, removed=removed)
508 507 if p is not None:
509 508 result[f] = p
510 509 return result
511 510
512 511
513 512 def uploadblobs(repo, pointers):
514 513 """upload given pointers from local blobstore"""
515 514 if not pointers:
516 515 return
517 516
518 517 remoteblob = repo.svfs.lfsremoteblobstore
519 518 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
520 519
521 520
522 521 @eh.wrapfunction(upgrade_engine, 'finishdatamigration')
523 522 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
524 523 orig(ui, srcrepo, dstrepo, requirements)
525 524
526 525 # Skip if this hasn't been passed to reposetup()
527 if util.safehasattr(srcrepo.svfs, 'lfslocalblobstore') and util.safehasattr(
526 if hasattr(srcrepo.svfs, 'lfslocalblobstore') and hasattr(
528 527 dstrepo.svfs, 'lfslocalblobstore'
529 528 ):
530 529 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
531 530 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
532 531
533 532 for dirpath, dirs, files in srclfsvfs.walk():
534 533 for oid in files:
535 534 ui.write(_(b'copying lfs blob %s\n') % oid)
536 535 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
537 536
538 537
539 538 @eh.wrapfunction(upgrade_actions, 'preservedrequirements')
540 539 @eh.wrapfunction(upgrade_actions, 'supporteddestrequirements')
541 540 def upgraderequirements(orig, repo):
542 541 reqs = orig(repo)
543 542 if b'lfs' in repo.requirements:
544 543 reqs.add(b'lfs')
545 544 return reqs
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now