##// END OF EJS Templates
match: use '' instead of '.' for root directory (API)...
Martin von Zweigbergk -
r42913:27d6956d default
parent child Browse files
Show More
@@ -1,463 +1,463 b''
1 1 # narrowcommands.py - command modifications for narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import itertools
10 10 import os
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial import (
14 14 bundle2,
15 15 cmdutil,
16 16 commands,
17 17 discovery,
18 18 encoding,
19 19 error,
20 20 exchange,
21 21 extensions,
22 22 hg,
23 23 narrowspec,
24 24 node,
25 25 pycompat,
26 26 registrar,
27 27 repair,
28 28 repository,
29 29 repoview,
30 30 sparse,
31 31 util,
32 32 wireprototypes,
33 33 )
34 34
35 35 table = {}
36 36 command = registrar.command(table)
37 37
38 38 def setup():
39 39 """Wraps user-facing mercurial commands with narrow-aware versions."""
40 40
41 41 entry = extensions.wrapcommand(commands.table, 'clone', clonenarrowcmd)
42 42 entry[1].append(('', 'narrow', None,
43 43 _("create a narrow clone of select files")))
44 44 entry[1].append(('', 'depth', '',
45 45 _("limit the history fetched by distance from heads")))
46 46 entry[1].append(('', 'narrowspec', '',
47 47 _("read narrowspecs from file")))
48 48 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
49 49 if 'sparse' not in extensions.enabled():
50 50 entry[1].append(('', 'include', [],
51 51 _("specifically fetch this file/directory")))
52 52 entry[1].append(
53 53 ('', 'exclude', [],
54 54 _("do not fetch this file/directory, even if included")))
55 55
56 56 entry = extensions.wrapcommand(commands.table, 'pull', pullnarrowcmd)
57 57 entry[1].append(('', 'depth', '',
58 58 _("limit the history fetched by distance from heads")))
59 59
60 60 extensions.wrapcommand(commands.table, 'archive', archivenarrowcmd)
61 61
62 62 def clonenarrowcmd(orig, ui, repo, *args, **opts):
63 63 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
64 64 opts = pycompat.byteskwargs(opts)
65 65 wrappedextraprepare = util.nullcontextmanager()
66 66 narrowspecfile = opts['narrowspec']
67 67
68 68 if narrowspecfile:
69 69 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
70 70 ui.status(_("reading narrowspec from '%s'\n") % filepath)
71 71 try:
72 72 fdata = util.readfile(filepath)
73 73 except IOError as inst:
74 74 raise error.Abort(_("cannot read narrowspecs from '%s': %s") %
75 75 (filepath, encoding.strtolocal(inst.strerror)))
76 76
77 77 includes, excludes, profiles = sparse.parseconfig(ui, fdata, 'narrow')
78 78 if profiles:
79 79 raise error.Abort(_("cannot specify other files using '%include' in"
80 80 " narrowspec"))
81 81
82 82 narrowspec.validatepatterns(includes)
83 83 narrowspec.validatepatterns(excludes)
84 84
85 85 # narrowspec is passed so we should assume that user wants narrow clone
86 86 opts['narrow'] = True
87 87 opts['include'].extend(includes)
88 88 opts['exclude'].extend(excludes)
89 89
90 90 if opts['narrow']:
91 91 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
92 92 orig(pullop, kwargs)
93 93
94 94 if opts.get('depth'):
95 95 kwargs['depth'] = opts['depth']
96 96 wrappedextraprepare = extensions.wrappedfunction(exchange,
97 97 '_pullbundle2extraprepare', pullbundle2extraprepare_widen)
98 98
99 99 with wrappedextraprepare:
100 100 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
101 101
102 102 def pullnarrowcmd(orig, ui, repo, *args, **opts):
103 103 """Wraps pull command to allow modifying narrow spec."""
104 104 wrappedextraprepare = util.nullcontextmanager()
105 105 if repository.NARROW_REQUIREMENT in repo.requirements:
106 106
107 107 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
108 108 orig(pullop, kwargs)
109 109 if opts.get(r'depth'):
110 110 kwargs['depth'] = opts[r'depth']
111 111 wrappedextraprepare = extensions.wrappedfunction(exchange,
112 112 '_pullbundle2extraprepare', pullbundle2extraprepare_widen)
113 113
114 114 with wrappedextraprepare:
115 115 return orig(ui, repo, *args, **opts)
116 116
117 117 def archivenarrowcmd(orig, ui, repo, *args, **opts):
118 118 """Wraps archive command to narrow the default includes."""
119 119 if repository.NARROW_REQUIREMENT in repo.requirements:
120 120 repo_includes, repo_excludes = repo.narrowpats
121 121 includes = set(opts.get(r'include', []))
122 122 excludes = set(opts.get(r'exclude', []))
123 123 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
124 124 includes, excludes, repo_includes, repo_excludes)
125 125 if includes:
126 126 opts[r'include'] = includes
127 127 if excludes:
128 128 opts[r'exclude'] = excludes
129 129 return orig(ui, repo, *args, **opts)
130 130
131 131 def pullbundle2extraprepare(orig, pullop, kwargs):
132 132 repo = pullop.repo
133 133 if repository.NARROW_REQUIREMENT not in repo.requirements:
134 134 return orig(pullop, kwargs)
135 135
136 136 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
137 137 raise error.Abort(_("server does not support narrow clones"))
138 138 orig(pullop, kwargs)
139 139 kwargs['narrow'] = True
140 140 include, exclude = repo.narrowpats
141 141 kwargs['oldincludepats'] = include
142 142 kwargs['oldexcludepats'] = exclude
143 143 if include:
144 144 kwargs['includepats'] = include
145 145 if exclude:
146 146 kwargs['excludepats'] = exclude
147 147 # calculate known nodes only in ellipses cases because in non-ellipses cases
148 148 # we have all the nodes
149 149 if wireprototypes.ELLIPSESCAP in pullop.remote.capabilities():
150 150 kwargs['known'] = [node.hex(ctx.node()) for ctx in
151 151 repo.set('::%ln', pullop.common)
152 152 if ctx.node() != node.nullid]
153 153 if not kwargs['known']:
154 154 # Mercurial serializes an empty list as '' and deserializes it as
155 155 # [''], so delete it instead to avoid handling the empty string on
156 156 # the server.
157 157 del kwargs['known']
158 158
159 159 extensions.wrapfunction(exchange,'_pullbundle2extraprepare',
160 160 pullbundle2extraprepare)
161 161
162 162 def _narrow(ui, repo, remote, commoninc, oldincludes, oldexcludes,
163 163 newincludes, newexcludes, force):
164 164 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
165 165 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
166 166
167 167 # This is essentially doing "hg outgoing" to find all local-only
168 168 # commits. We will then check that the local-only commits don't
169 169 # have any changes to files that will be untracked.
170 170 unfi = repo.unfiltered()
171 171 outgoing = discovery.findcommonoutgoing(unfi, remote,
172 172 commoninc=commoninc)
173 173 ui.status(_('looking for local changes to affected paths\n'))
174 174 localnodes = []
175 175 for n in itertools.chain(outgoing.missing, outgoing.excluded):
176 176 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
177 177 localnodes.append(n)
178 178 revstostrip = unfi.revs('descendants(%ln)', localnodes)
179 179 hiddenrevs = repoview.filterrevs(repo, 'visible')
180 180 visibletostrip = list(repo.changelog.node(r)
181 181 for r in (revstostrip - hiddenrevs))
182 182 if visibletostrip:
183 183 ui.status(_('The following changeset(s) or their ancestors have '
184 184 'local changes not on the remote:\n'))
185 185 maxnodes = 10
186 186 if ui.verbose or len(visibletostrip) <= maxnodes:
187 187 for n in visibletostrip:
188 188 ui.status('%s\n' % node.short(n))
189 189 else:
190 190 for n in visibletostrip[:maxnodes]:
191 191 ui.status('%s\n' % node.short(n))
192 192 ui.status(_('...and %d more, use --verbose to list all\n') %
193 193 (len(visibletostrip) - maxnodes))
194 194 if not force:
195 195 raise error.Abort(_('local changes found'),
196 196 hint=_('use --force-delete-local-changes to '
197 197 'ignore'))
198 198
199 199 with ui.uninterruptible():
200 200 if revstostrip:
201 201 tostrip = [unfi.changelog.node(r) for r in revstostrip]
202 202 if repo['.'].node() in tostrip:
203 203 # stripping working copy, so move to a different commit first
204 204 urev = max(repo.revs('(::%n) - %ln + null',
205 205 repo['.'].node(), visibletostrip))
206 206 hg.clean(repo, urev)
207 207 overrides = {('devel', 'strip-obsmarkers'): False}
208 208 with ui.configoverride(overrides, 'narrow'):
209 209 repair.strip(ui, unfi, tostrip, topic='narrow')
210 210
211 211 todelete = []
212 212 for f, f2, size in repo.store.datafiles():
213 213 if f.startswith('data/'):
214 214 file = f[5:-2]
215 215 if not newmatch(file):
216 216 todelete.append(f)
217 217 elif f.startswith('meta/'):
218 218 dir = f[5:-13]
219 dirs = ['.'] + sorted(util.dirs({dir})) + [dir]
219 dirs = [''] + sorted(util.dirs({dir})) + [dir]
220 220 include = True
221 221 for d in dirs:
222 222 visit = newmatch.visitdir(d)
223 223 if not visit:
224 224 include = False
225 225 break
226 226 if visit == 'all':
227 227 break
228 228 if not include:
229 229 todelete.append(f)
230 230
231 231 repo.destroying()
232 232
233 233 with repo.transaction('narrowing'):
234 234 # Update narrowspec before removing revlogs, so repo won't be
235 235 # corrupt in case of crash
236 236 repo.setnarrowpats(newincludes, newexcludes)
237 237
238 238 for f in todelete:
239 239 ui.status(_('deleting %s\n') % f)
240 240 util.unlinkpath(repo.svfs.join(f))
241 241 repo.store.markremoved(f)
242 242
243 243 narrowspec.updateworkingcopy(repo, assumeclean=True)
244 244 narrowspec.copytoworkingcopy(repo)
245 245
246 246 repo.destroyed()
247 247
248 248 def _widen(ui, repo, remote, commoninc, oldincludes, oldexcludes,
249 249 newincludes, newexcludes):
250 250 # for now we assume that if a server has ellipses enabled, we will be
251 251 # exchanging ellipses nodes. In future we should add ellipses as a client
252 252 # side requirement (maybe) to distinguish a client is shallow or not and
253 253 # then send that information to server whether we want ellipses or not.
254 254 # Theoretically a non-ellipses repo should be able to use narrow
255 255 # functionality from an ellipses enabled server
256 256 ellipsesremote = wireprototypes.ELLIPSESCAP in remote.capabilities()
257 257
258 258 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
259 259 orig(pullop, kwargs)
260 260 # The old{in,ex}cludepats have already been set by orig()
261 261 kwargs['includepats'] = newincludes
262 262 kwargs['excludepats'] = newexcludes
263 263 wrappedextraprepare = extensions.wrappedfunction(exchange,
264 264 '_pullbundle2extraprepare', pullbundle2extraprepare_widen)
265 265
266 266 # define a function that narrowbundle2 can call after creating the
267 267 # backup bundle, but before applying the bundle from the server
268 268 def setnewnarrowpats():
269 269 repo.setnarrowpats(newincludes, newexcludes)
270 270 repo.setnewnarrowpats = setnewnarrowpats
271 271 # silence the devel-warning of applying an empty changegroup
272 272 overrides = {('devel', 'all-warnings'): False}
273 273
274 274 with ui.uninterruptible():
275 275 common = commoninc[0]
276 276 if ellipsesremote:
277 277 ds = repo.dirstate
278 278 p1, p2 = ds.p1(), ds.p2()
279 279 with ds.parentchange():
280 280 ds.setparents(node.nullid, node.nullid)
281 281 with wrappedextraprepare:
282 282 with repo.ui.configoverride(overrides, 'widen'):
283 283 exchange.pull(repo, remote, heads=common)
284 284 with ds.parentchange():
285 285 ds.setparents(p1, p2)
286 286 else:
287 287 with remote.commandexecutor() as e:
288 288 bundle = e.callcommand('narrow_widen', {
289 289 'oldincludes': oldincludes,
290 290 'oldexcludes': oldexcludes,
291 291 'newincludes': newincludes,
292 292 'newexcludes': newexcludes,
293 293 'cgversion': '03',
294 294 'commonheads': common,
295 295 'known': [],
296 296 'ellipses': False,
297 297 }).result()
298 298
299 299 with repo.transaction('widening') as tr:
300 300 with repo.ui.configoverride(overrides, 'widen'):
301 301 tgetter = lambda: tr
302 302 bundle2.processbundle(repo, bundle,
303 303 transactiongetter=tgetter)
304 304
305 305 with repo.transaction('widening'):
306 306 repo.setnewnarrowpats()
307 307 narrowspec.updateworkingcopy(repo)
308 308 narrowspec.copytoworkingcopy(repo)
309 309
310 310 # TODO(rdamazio): Make new matcher format and update description
311 311 @command('tracked',
312 312 [('', 'addinclude', [], _('new paths to include')),
313 313 ('', 'removeinclude', [], _('old paths to no longer include')),
314 314 ('', 'addexclude', [], _('new paths to exclude')),
315 315 ('', 'import-rules', '', _('import narrowspecs from a file')),
316 316 ('', 'removeexclude', [], _('old paths to no longer exclude')),
317 317 ('', 'clear', False, _('whether to replace the existing narrowspec')),
318 318 ('', 'force-delete-local-changes', False,
319 319 _('forces deletion of local changes when narrowing')),
320 320 ('', 'update-working-copy', False,
321 321 _('update working copy when the store has changed')),
322 322 ] + commands.remoteopts,
323 323 _('[OPTIONS]... [REMOTE]'),
324 324 inferrepo=True)
325 325 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
326 326 """show or change the current narrowspec
327 327
328 328 With no argument, shows the current narrowspec entries, one per line. Each
329 329 line will be prefixed with 'I' or 'X' for included or excluded patterns,
330 330 respectively.
331 331
332 332 The narrowspec is comprised of expressions to match remote files and/or
333 333 directories that should be pulled into your client.
334 334 The narrowspec has *include* and *exclude* expressions, with excludes always
335 335 trumping includes: that is, if a file matches an exclude expression, it will
336 336 be excluded even if it also matches an include expression.
337 337 Excluding files that were never included has no effect.
338 338
339 339 Each included or excluded entry is in the format described by
340 340 'hg help patterns'.
341 341
342 342 The options allow you to add or remove included and excluded expressions.
343 343
344 344 If --clear is specified, then all previous includes and excludes are DROPPED
345 345 and replaced by the new ones specified to --addinclude and --addexclude.
346 346 If --clear is specified without any further options, the narrowspec will be
347 347 empty and will not match any files.
348 348
349 349 --import-rules accepts a path to a file containing rules, allowing you to
350 350 add --addinclude, --addexclude rules in bulk. Like the other include and
351 351 exclude switches, the changes are applied immediately.
352 352 """
353 353 opts = pycompat.byteskwargs(opts)
354 354 if repository.NARROW_REQUIREMENT not in repo.requirements:
355 355 raise error.Abort(_('the tracked command is only supported on '
356 356 'respositories cloned with --narrow'))
357 357
358 358 # Before supporting, decide whether it "hg tracked --clear" should mean
359 359 # tracking no paths or all paths.
360 360 if opts['clear']:
361 361 raise error.Abort(_('the --clear option is not yet supported'))
362 362
363 363 # import rules from a file
364 364 newrules = opts.get('import_rules')
365 365 if newrules:
366 366 try:
367 367 filepath = os.path.join(encoding.getcwd(), newrules)
368 368 fdata = util.readfile(filepath)
369 369 except IOError as inst:
370 370 raise error.Abort(_("cannot read narrowspecs from '%s': %s") %
371 371 (filepath, encoding.strtolocal(inst.strerror)))
372 372 includepats, excludepats, profiles = sparse.parseconfig(ui, fdata,
373 373 'narrow')
374 374 if profiles:
375 375 raise error.Abort(_("including other spec files using '%include' "
376 376 "is not supported in narrowspec"))
377 377 opts['addinclude'].extend(includepats)
378 378 opts['addexclude'].extend(excludepats)
379 379
380 380 addedincludes = narrowspec.parsepatterns(opts['addinclude'])
381 381 removedincludes = narrowspec.parsepatterns(opts['removeinclude'])
382 382 addedexcludes = narrowspec.parsepatterns(opts['addexclude'])
383 383 removedexcludes = narrowspec.parsepatterns(opts['removeexclude'])
384 384
385 385 update_working_copy = opts['update_working_copy']
386 386 only_show = not (addedincludes or removedincludes or addedexcludes or
387 387 removedexcludes or newrules or update_working_copy)
388 388
389 389 oldincludes, oldexcludes = repo.narrowpats
390 390
391 391 # filter the user passed additions and deletions into actual additions and
392 392 # deletions of excludes and includes
393 393 addedincludes -= oldincludes
394 394 removedincludes &= oldincludes
395 395 addedexcludes -= oldexcludes
396 396 removedexcludes &= oldexcludes
397 397
398 398 widening = addedincludes or removedexcludes
399 399 narrowing = removedincludes or addedexcludes
400 400
401 401 # Only print the current narrowspec.
402 402 if only_show:
403 403 ui.pager('tracked')
404 404 fm = ui.formatter('narrow', opts)
405 405 for i in sorted(oldincludes):
406 406 fm.startitem()
407 407 fm.write('status', '%s ', 'I', label='narrow.included')
408 408 fm.write('pat', '%s\n', i, label='narrow.included')
409 409 for i in sorted(oldexcludes):
410 410 fm.startitem()
411 411 fm.write('status', '%s ', 'X', label='narrow.excluded')
412 412 fm.write('pat', '%s\n', i, label='narrow.excluded')
413 413 fm.end()
414 414 return 0
415 415
416 416 if update_working_copy:
417 417 with repo.wlock(), repo.lock(), repo.transaction('narrow-wc'):
418 418 narrowspec.updateworkingcopy(repo)
419 419 narrowspec.copytoworkingcopy(repo)
420 420 return 0
421 421
422 422 if not widening and not narrowing:
423 423 ui.status(_("nothing to widen or narrow\n"))
424 424 return 0
425 425
426 426 with repo.wlock(), repo.lock():
427 427 cmdutil.bailifchanged(repo)
428 428
429 429 # Find the revisions we have in common with the remote. These will
430 430 # be used for finding local-only changes for narrowing. They will
431 431 # also define the set of revisions to update for widening.
432 432 remotepath = ui.expandpath(remotepath or 'default')
433 433 url, branches = hg.parseurl(remotepath)
434 434 ui.status(_('comparing with %s\n') % util.hidepassword(url))
435 435 remote = hg.peer(repo, opts, url)
436 436
437 437 # check narrow support before doing anything if widening needs to be
438 438 # performed. In future we should also abort if client is ellipses and
439 439 # server does not support ellipses
440 440 if widening and wireprototypes.NARROWCAP not in remote.capabilities():
441 441 raise error.Abort(_("server does not support narrow clones"))
442 442
443 443 commoninc = discovery.findcommonincoming(repo, remote)
444 444
445 445 if narrowing:
446 446 newincludes = oldincludes - removedincludes
447 447 newexcludes = oldexcludes | addedexcludes
448 448 _narrow(ui, repo, remote, commoninc, oldincludes, oldexcludes,
449 449 newincludes, newexcludes,
450 450 opts['force_delete_local_changes'])
451 451 # _narrow() updated the narrowspec and _widen() below needs to
452 452 # use the updated values as its base (otherwise removed includes
453 453 # and addedexcludes will be lost in the resulting narrowspec)
454 454 oldincludes = newincludes
455 455 oldexcludes = newexcludes
456 456
457 457 if widening:
458 458 newincludes = oldincludes | addedincludes
459 459 newexcludes = oldexcludes - removedexcludes
460 460 _widen(ui, repo, remote, commoninc, oldincludes, oldexcludes,
461 461 newincludes, newexcludes)
462 462
463 463 return 0
@@ -1,1423 +1,1423 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 repository,
29 29 util,
30 30 )
31 31
32 32 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
33 33 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
34 34 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
35 35
36 36 LFS_REQUIREMENT = 'lfs'
37 37
38 38 readexactly = util.readexactly
39 39
40 40 def getchunk(stream):
41 41 """return the next chunk from stream as a string"""
42 42 d = readexactly(stream, 4)
43 43 l = struct.unpack(">l", d)[0]
44 44 if l <= 4:
45 45 if l:
46 46 raise error.Abort(_("invalid chunk length %d") % l)
47 47 return ""
48 48 return readexactly(stream, l - 4)
49 49
50 50 def chunkheader(length):
51 51 """return a changegroup chunk header (string)"""
52 52 return struct.pack(">l", length + 4)
53 53
54 54 def closechunk():
55 55 """return a changegroup chunk header (string) for a zero-length chunk"""
56 56 return struct.pack(">l", 0)
57 57
58 58 def _fileheader(path):
59 59 """Obtain a changegroup chunk header for a named path."""
60 60 return chunkheader(len(path)) + path
61 61
62 62 def writechunks(ui, chunks, filename, vfs=None):
63 63 """Write chunks to a file and return its filename.
64 64
65 65 The stream is assumed to be a bundle file.
66 66 Existing files will not be overwritten.
67 67 If no filename is specified, a temporary file is created.
68 68 """
69 69 fh = None
70 70 cleanup = None
71 71 try:
72 72 if filename:
73 73 if vfs:
74 74 fh = vfs.open(filename, "wb")
75 75 else:
76 76 # Increase default buffer size because default is usually
77 77 # small (4k is common on Linux).
78 78 fh = open(filename, "wb", 131072)
79 79 else:
80 80 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 81 fh = os.fdopen(fd, r"wb")
82 82 cleanup = filename
83 83 for c in chunks:
84 84 fh.write(c)
85 85 cleanup = None
86 86 return filename
87 87 finally:
88 88 if fh is not None:
89 89 fh.close()
90 90 if cleanup is not None:
91 91 if filename and vfs:
92 92 vfs.unlink(cleanup)
93 93 else:
94 94 os.unlink(cleanup)
95 95
96 96 class cg1unpacker(object):
97 97 """Unpacker for cg1 changegroup streams.
98 98
99 99 A changegroup unpacker handles the framing of the revision data in
100 100 the wire format. Most consumers will want to use the apply()
101 101 method to add the changes from the changegroup to a repository.
102 102
103 103 If you're forwarding a changegroup unmodified to another consumer,
104 104 use getchunks(), which returns an iterator of changegroup
105 105 chunks. This is mostly useful for cases where you need to know the
106 106 data stream has ended by observing the end of the changegroup.
107 107
108 108 deltachunk() is useful only if you're applying delta data. Most
109 109 consumers should prefer apply() instead.
110 110
111 111 A few other public methods exist. Those are used only for
112 112 bundlerepo and some debug commands - their use is discouraged.
113 113 """
114 114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 115 deltaheadersize = deltaheader.size
116 116 version = '01'
117 117 _grouplistcount = 1 # One list of files after the manifests
118 118
119 119 def __init__(self, fh, alg, extras=None):
120 120 if alg is None:
121 121 alg = 'UN'
122 122 if alg not in util.compengines.supportedbundletypes:
123 123 raise error.Abort(_('unknown stream compression type: %s')
124 124 % alg)
125 125 if alg == 'BZ':
126 126 alg = '_truncatedBZ'
127 127
128 128 compengine = util.compengines.forbundletype(alg)
129 129 self._stream = compengine.decompressorreader(fh)
130 130 self._type = alg
131 131 self.extras = extras or {}
132 132 self.callback = None
133 133
134 134 # These methods (compressed, read, seek, tell) all appear to only
135 135 # be used by bundlerepo, but it's a little hard to tell.
136 136 def compressed(self):
137 137 return self._type is not None and self._type != 'UN'
138 138 def read(self, l):
139 139 return self._stream.read(l)
140 140 def seek(self, pos):
141 141 return self._stream.seek(pos)
142 142 def tell(self):
143 143 return self._stream.tell()
144 144 def close(self):
145 145 return self._stream.close()
146 146
147 147 def _chunklength(self):
148 148 d = readexactly(self._stream, 4)
149 149 l = struct.unpack(">l", d)[0]
150 150 if l <= 4:
151 151 if l:
152 152 raise error.Abort(_("invalid chunk length %d") % l)
153 153 return 0
154 154 if self.callback:
155 155 self.callback()
156 156 return l - 4
157 157
158 158 def changelogheader(self):
159 159 """v10 does not have a changelog header chunk"""
160 160 return {}
161 161
162 162 def manifestheader(self):
163 163 """v10 does not have a manifest header chunk"""
164 164 return {}
165 165
166 166 def filelogheader(self):
167 167 """return the header of the filelogs chunk, v10 only has the filename"""
168 168 l = self._chunklength()
169 169 if not l:
170 170 return {}
171 171 fname = readexactly(self._stream, l)
172 172 return {'filename': fname}
173 173
174 174 def _deltaheader(self, headertuple, prevnode):
175 175 node, p1, p2, cs = headertuple
176 176 if prevnode is None:
177 177 deltabase = p1
178 178 else:
179 179 deltabase = prevnode
180 180 flags = 0
181 181 return node, p1, p2, deltabase, cs, flags
182 182
183 183 def deltachunk(self, prevnode):
184 184 l = self._chunklength()
185 185 if not l:
186 186 return {}
187 187 headerdata = readexactly(self._stream, self.deltaheadersize)
188 188 header = self.deltaheader.unpack(headerdata)
189 189 delta = readexactly(self._stream, l - self.deltaheadersize)
190 190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 191 return (node, p1, p2, cs, deltabase, delta, flags)
192 192
193 193 def getchunks(self):
194 194 """returns all the chunks contains in the bundle
195 195
196 196 Used when you need to forward the binary stream to a file or another
197 197 network API. To do so, it parse the changegroup data, otherwise it will
198 198 block in case of sshrepo because it don't know the end of the stream.
199 199 """
200 200 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
201 201 # and a list of filelogs. For changegroup 3, we expect 4 parts:
202 202 # changelog, manifestlog, a list of tree manifestlogs, and a list of
203 203 # filelogs.
204 204 #
205 205 # Changelog and manifestlog parts are terminated with empty chunks. The
206 206 # tree and file parts are a list of entry sections. Each entry section
207 207 # is a series of chunks terminating in an empty chunk. The list of these
208 208 # entry sections is terminated in yet another empty chunk, so we know
209 209 # we've reached the end of the tree/file list when we reach an empty
210 210 # chunk that was proceeded by no non-empty chunks.
211 211
212 212 parts = 0
213 213 while parts < 2 + self._grouplistcount:
214 214 noentries = True
215 215 while True:
216 216 chunk = getchunk(self)
217 217 if not chunk:
218 218 # The first two empty chunks represent the end of the
219 219 # changelog and the manifestlog portions. The remaining
220 220 # empty chunks represent either A) the end of individual
221 221 # tree or file entries in the file list, or B) the end of
222 222 # the entire list. It's the end of the entire list if there
223 223 # were no entries (i.e. noentries is True).
224 224 if parts < 2:
225 225 parts += 1
226 226 elif noentries:
227 227 parts += 1
228 228 break
229 229 noentries = False
230 230 yield chunkheader(len(chunk))
231 231 pos = 0
232 232 while pos < len(chunk):
233 233 next = pos + 2**20
234 234 yield chunk[pos:next]
235 235 pos = next
236 236 yield closechunk()
237 237
238 238 def _unpackmanifests(self, repo, revmap, trp, prog):
239 239 self.callback = prog.increment
240 240 # no need to check for empty manifest group here:
241 241 # if the result of the merge of 1 and 2 is the same in 3 and 4,
242 242 # no new manifest will be created and the manifest group will
243 243 # be empty during the pull
244 244 self.manifestheader()
245 245 deltas = self.deltaiter()
246 246 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
247 247 prog.complete()
248 248 self.callback = None
249 249
250 250 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
251 251 expectedtotal=None):
252 252 """Add the changegroup returned by source.read() to this repo.
253 253 srctype is a string like 'push', 'pull', or 'unbundle'. url is
254 254 the URL of the repo where this changegroup is coming from.
255 255
256 256 Return an integer summarizing the change to this repo:
257 257 - nothing changed or no source: 0
258 258 - more heads than before: 1+added heads (2..n)
259 259 - fewer heads than before: -1-removed heads (-2..-n)
260 260 - number of heads stays the same: 1
261 261 """
262 262 repo = repo.unfiltered()
263 263 def csmap(x):
264 264 repo.ui.debug("add changeset %s\n" % short(x))
265 265 return len(cl)
266 266
267 267 def revmap(x):
268 268 return cl.rev(x)
269 269
270 270 changesets = files = revisions = 0
271 271
272 272 try:
273 273 # The transaction may already carry source information. In this
274 274 # case we use the top level data. We overwrite the argument
275 275 # because we need to use the top level value (if they exist)
276 276 # in this function.
277 277 srctype = tr.hookargs.setdefault('source', srctype)
278 278 tr.hookargs.setdefault('url', url)
279 279 repo.hook('prechangegroup',
280 280 throw=True, **pycompat.strkwargs(tr.hookargs))
281 281
282 282 # write changelog data to temp files so concurrent readers
283 283 # will not see an inconsistent view
284 284 cl = repo.changelog
285 285 cl.delayupdate(tr)
286 286 oldheads = set(cl.heads())
287 287
288 288 trp = weakref.proxy(tr)
289 289 # pull off the changeset group
290 290 repo.ui.status(_("adding changesets\n"))
291 291 clstart = len(cl)
292 292 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
293 293 total=expectedtotal)
294 294 self.callback = progress.increment
295 295
296 296 efiles = set()
297 297 def onchangelog(cl, node):
298 298 efiles.update(cl.readfiles(node))
299 299
300 300 self.changelogheader()
301 301 deltas = self.deltaiter()
302 302 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
303 303 efiles = len(efiles)
304 304
305 305 if not cgnodes:
306 306 repo.ui.develwarn('applied empty changelog from changegroup',
307 307 config='warn-empty-changegroup')
308 308 clend = len(cl)
309 309 changesets = clend - clstart
310 310 progress.complete()
311 311 self.callback = None
312 312
313 313 # pull off the manifest group
314 314 repo.ui.status(_("adding manifests\n"))
315 315 # We know that we'll never have more manifests than we had
316 316 # changesets.
317 317 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
318 318 total=changesets)
319 319 self._unpackmanifests(repo, revmap, trp, progress)
320 320
321 321 needfiles = {}
322 322 if repo.ui.configbool('server', 'validate'):
323 323 cl = repo.changelog
324 324 ml = repo.manifestlog
325 325 # validate incoming csets have their manifests
326 326 for cset in pycompat.xrange(clstart, clend):
327 327 mfnode = cl.changelogrevision(cset).manifest
328 328 mfest = ml[mfnode].readdelta()
329 329 # store file cgnodes we must see
330 330 for f, n in mfest.iteritems():
331 331 needfiles.setdefault(f, set()).add(n)
332 332
333 333 # process the files
334 334 repo.ui.status(_("adding file changes\n"))
335 335 newrevs, newfiles = _addchangegroupfiles(
336 336 repo, self, revmap, trp, efiles, needfiles)
337 337 revisions += newrevs
338 338 files += newfiles
339 339
340 340 deltaheads = 0
341 341 if oldheads:
342 342 heads = cl.heads()
343 343 deltaheads = len(heads) - len(oldheads)
344 344 for h in heads:
345 345 if h not in oldheads and repo[h].closesbranch():
346 346 deltaheads -= 1
347 347 htext = ""
348 348 if deltaheads:
349 349 htext = _(" (%+d heads)") % deltaheads
350 350
351 351 repo.ui.status(_("added %d changesets"
352 352 " with %d changes to %d files%s\n")
353 353 % (changesets, revisions, files, htext))
354 354 repo.invalidatevolatilesets()
355 355
356 356 if changesets > 0:
357 357 if 'node' not in tr.hookargs:
358 358 tr.hookargs['node'] = hex(cl.node(clstart))
359 359 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
360 360 hookargs = dict(tr.hookargs)
361 361 else:
362 362 hookargs = dict(tr.hookargs)
363 363 hookargs['node'] = hex(cl.node(clstart))
364 364 hookargs['node_last'] = hex(cl.node(clend - 1))
365 365 repo.hook('pretxnchangegroup',
366 366 throw=True, **pycompat.strkwargs(hookargs))
367 367
368 368 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
369 369 phaseall = None
370 370 if srctype in ('push', 'serve'):
371 371 # Old servers can not push the boundary themselves.
372 372 # New servers won't push the boundary if changeset already
373 373 # exists locally as secret
374 374 #
375 375 # We should not use added here but the list of all change in
376 376 # the bundle
377 377 if repo.publishing():
378 378 targetphase = phaseall = phases.public
379 379 else:
380 380 # closer target phase computation
381 381
382 382 # Those changesets have been pushed from the
383 383 # outside, their phases are going to be pushed
384 384 # alongside. Therefor `targetphase` is
385 385 # ignored.
386 386 targetphase = phaseall = phases.draft
387 387 if added:
388 388 phases.registernew(repo, tr, targetphase, added)
389 389 if phaseall is not None:
390 390 phases.advanceboundary(repo, tr, phaseall, cgnodes)
391 391
392 392 if changesets > 0:
393 393
394 394 def runhooks():
395 395 # These hooks run when the lock releases, not when the
396 396 # transaction closes. So it's possible for the changelog
397 397 # to have changed since we last saw it.
398 398 if clstart >= len(repo):
399 399 return
400 400
401 401 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
402 402
403 403 for n in added:
404 404 args = hookargs.copy()
405 405 args['node'] = hex(n)
406 406 del args['node_last']
407 407 repo.hook("incoming", **pycompat.strkwargs(args))
408 408
409 409 newheads = [h for h in repo.heads()
410 410 if h not in oldheads]
411 411 repo.ui.log("incoming",
412 412 "%d incoming changes - new heads: %s\n",
413 413 len(added),
414 414 ', '.join([hex(c[:6]) for c in newheads]))
415 415
416 416 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
417 417 lambda tr: repo._afterlock(runhooks))
418 418 finally:
419 419 repo.ui.flush()
420 420 # never return 0 here:
421 421 if deltaheads < 0:
422 422 ret = deltaheads - 1
423 423 else:
424 424 ret = deltaheads + 1
425 425 return ret
426 426
427 427 def deltaiter(self):
428 428 """
429 429 returns an iterator of the deltas in this changegroup
430 430
431 431 Useful for passing to the underlying storage system to be stored.
432 432 """
433 433 chain = None
434 434 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
435 435 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
436 436 yield chunkdata
437 437 chain = chunkdata[0]
438 438
439 439 class cg2unpacker(cg1unpacker):
440 440 """Unpacker for cg2 streams.
441 441
442 442 cg2 streams add support for generaldelta, so the delta header
443 443 format is slightly different. All other features about the data
444 444 remain the same.
445 445 """
446 446 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
447 447 deltaheadersize = deltaheader.size
448 448 version = '02'
449 449
450 450 def _deltaheader(self, headertuple, prevnode):
451 451 node, p1, p2, deltabase, cs = headertuple
452 452 flags = 0
453 453 return node, p1, p2, deltabase, cs, flags
454 454
455 455 class cg3unpacker(cg2unpacker):
456 456 """Unpacker for cg3 streams.
457 457
458 458 cg3 streams add support for exchanging treemanifests and revlog
459 459 flags. It adds the revlog flags to the delta header and an empty chunk
460 460 separating manifests and files.
461 461 """
462 462 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
463 463 deltaheadersize = deltaheader.size
464 464 version = '03'
465 465 _grouplistcount = 2 # One list of manifests and one list of files
466 466
467 467 def _deltaheader(self, headertuple, prevnode):
468 468 node, p1, p2, deltabase, cs, flags = headertuple
469 469 return node, p1, p2, deltabase, cs, flags
470 470
471 471 def _unpackmanifests(self, repo, revmap, trp, prog):
472 472 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
473 473 for chunkdata in iter(self.filelogheader, {}):
474 474 # If we get here, there are directory manifests in the changegroup
475 475 d = chunkdata["filename"]
476 476 repo.ui.debug("adding %s revisions\n" % d)
477 477 deltas = self.deltaiter()
478 478 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
479 479 raise error.Abort(_("received dir revlog group is empty"))
480 480
481 481 class headerlessfixup(object):
482 482 def __init__(self, fh, h):
483 483 self._h = h
484 484 self._fh = fh
485 485 def read(self, n):
486 486 if self._h:
487 487 d, self._h = self._h[:n], self._h[n:]
488 488 if len(d) < n:
489 489 d += readexactly(self._fh, n - len(d))
490 490 return d
491 491 return readexactly(self._fh, n)
492 492
493 493 def _revisiondeltatochunks(delta, headerfn):
494 494 """Serialize a revisiondelta to changegroup chunks."""
495 495
496 496 # The captured revision delta may be encoded as a delta against
497 497 # a base revision or as a full revision. The changegroup format
498 498 # requires that everything on the wire be deltas. So for full
499 499 # revisions, we need to invent a header that says to rewrite
500 500 # data.
501 501
502 502 if delta.delta is not None:
503 503 prefix, data = b'', delta.delta
504 504 elif delta.basenode == nullid:
505 505 data = delta.revision
506 506 prefix = mdiff.trivialdiffheader(len(data))
507 507 else:
508 508 data = delta.revision
509 509 prefix = mdiff.replacediffheader(delta.baserevisionsize,
510 510 len(data))
511 511
512 512 meta = headerfn(delta)
513 513
514 514 yield chunkheader(len(meta) + len(prefix) + len(data))
515 515 yield meta
516 516 if prefix:
517 517 yield prefix
518 518 yield data
519 519
520 520 def _sortnodesellipsis(store, nodes, cl, lookup):
521 521 """Sort nodes for changegroup generation."""
522 522 # Ellipses serving mode.
523 523 #
524 524 # In a perfect world, we'd generate better ellipsis-ified graphs
525 525 # for non-changelog revlogs. In practice, we haven't started doing
526 526 # that yet, so the resulting DAGs for the manifestlog and filelogs
527 527 # are actually full of bogus parentage on all the ellipsis
528 528 # nodes. This has the side effect that, while the contents are
529 529 # correct, the individual DAGs might be completely out of whack in
530 530 # a case like 882681bc3166 and its ancestors (back about 10
531 531 # revisions or so) in the main hg repo.
532 532 #
533 533 # The one invariant we *know* holds is that the new (potentially
534 534 # bogus) DAG shape will be valid if we order the nodes in the
535 535 # order that they're introduced in dramatis personae by the
536 536 # changelog, so what we do is we sort the non-changelog histories
537 537 # by the order in which they are used by the changelog.
538 538 key = lambda n: cl.rev(lookup(n))
539 539 return sorted(nodes, key=key)
540 540
541 541 def _resolvenarrowrevisioninfo(cl, store, ischangelog, rev, linkrev,
542 542 linknode, clrevtolocalrev, fullclnodes,
543 543 precomputedellipsis):
544 544 linkparents = precomputedellipsis[linkrev]
545 545 def local(clrev):
546 546 """Turn a changelog revnum into a local revnum.
547 547
548 548 The ellipsis dag is stored as revnums on the changelog,
549 549 but when we're producing ellipsis entries for
550 550 non-changelog revlogs, we need to turn those numbers into
551 551 something local. This does that for us, and during the
552 552 changelog sending phase will also expand the stored
553 553 mappings as needed.
554 554 """
555 555 if clrev == nullrev:
556 556 return nullrev
557 557
558 558 if ischangelog:
559 559 return clrev
560 560
561 561 # Walk the ellipsis-ized changelog breadth-first looking for a
562 562 # change that has been linked from the current revlog.
563 563 #
564 564 # For a flat manifest revlog only a single step should be necessary
565 565 # as all relevant changelog entries are relevant to the flat
566 566 # manifest.
567 567 #
568 568 # For a filelog or tree manifest dirlog however not every changelog
569 569 # entry will have been relevant, so we need to skip some changelog
570 570 # nodes even after ellipsis-izing.
571 571 walk = [clrev]
572 572 while walk:
573 573 p = walk[0]
574 574 walk = walk[1:]
575 575 if p in clrevtolocalrev:
576 576 return clrevtolocalrev[p]
577 577 elif p in fullclnodes:
578 578 walk.extend([pp for pp in cl.parentrevs(p)
579 579 if pp != nullrev])
580 580 elif p in precomputedellipsis:
581 581 walk.extend([pp for pp in precomputedellipsis[p]
582 582 if pp != nullrev])
583 583 else:
584 584 # In this case, we've got an ellipsis with parents
585 585 # outside the current bundle (likely an
586 586 # incremental pull). We "know" that we can use the
587 587 # value of this same revlog at whatever revision
588 588 # is pointed to by linknode. "Know" is in scare
589 589 # quotes because I haven't done enough examination
590 590 # of edge cases to convince myself this is really
591 591 # a fact - it works for all the (admittedly
592 592 # thorough) cases in our testsuite, but I would be
593 593 # somewhat unsurprised to find a case in the wild
594 594 # where this breaks down a bit. That said, I don't
595 595 # know if it would hurt anything.
596 596 for i in pycompat.xrange(rev, 0, -1):
597 597 if store.linkrev(i) == clrev:
598 598 return i
599 599 # We failed to resolve a parent for this node, so
600 600 # we crash the changegroup construction.
601 601 raise error.Abort(
602 602 'unable to resolve parent while packing %r %r'
603 603 ' for changeset %r' % (store.indexfile, rev, clrev))
604 604
605 605 return nullrev
606 606
607 607 if not linkparents or (
608 608 store.parentrevs(rev) == (nullrev, nullrev)):
609 609 p1, p2 = nullrev, nullrev
610 610 elif len(linkparents) == 1:
611 611 p1, = sorted(local(p) for p in linkparents)
612 612 p2 = nullrev
613 613 else:
614 614 p1, p2 = sorted(local(p) for p in linkparents)
615 615
616 616 p1node, p2node = store.node(p1), store.node(p2)
617 617
618 618 return p1node, p2node, linknode
619 619
620 620 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
621 621 topic=None,
622 622 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
623 623 precomputedellipsis=None):
624 624 """Calculate deltas for a set of revisions.
625 625
626 626 Is a generator of ``revisiondelta`` instances.
627 627
628 628 If topic is not None, progress detail will be generated using this
629 629 topic name (e.g. changesets, manifests, etc).
630 630 """
631 631 if not nodes:
632 632 return
633 633
634 634 cl = repo.changelog
635 635
636 636 if ischangelog:
637 637 # `hg log` shows changesets in storage order. To preserve order
638 638 # across clones, send out changesets in storage order.
639 639 nodesorder = 'storage'
640 640 elif ellipses:
641 641 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
642 642 nodesorder = 'nodes'
643 643 else:
644 644 nodesorder = None
645 645
646 646 # Perform ellipses filtering and revision massaging. We do this before
647 647 # emitrevisions() because a) filtering out revisions creates less work
648 648 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
649 649 # assumptions about delta choices and we would possibly send a delta
650 650 # referencing a missing base revision.
651 651 #
652 652 # Also, calling lookup() has side-effects with regards to populating
653 653 # data structures. If we don't call lookup() for each node or if we call
654 654 # lookup() after the first pass through each node, things can break -
655 655 # possibly intermittently depending on the python hash seed! For that
656 656 # reason, we store a mapping of all linknodes during the initial node
657 657 # pass rather than use lookup() on the output side.
658 658 if ellipses:
659 659 filtered = []
660 660 adjustedparents = {}
661 661 linknodes = {}
662 662
663 663 for node in nodes:
664 664 rev = store.rev(node)
665 665 linknode = lookup(node)
666 666 linkrev = cl.rev(linknode)
667 667 clrevtolocalrev[linkrev] = rev
668 668
669 669 # If linknode is in fullclnodes, it means the corresponding
670 670 # changeset was a full changeset and is being sent unaltered.
671 671 if linknode in fullclnodes:
672 672 linknodes[node] = linknode
673 673
674 674 # If the corresponding changeset wasn't in the set computed
675 675 # as relevant to us, it should be dropped outright.
676 676 elif linkrev not in precomputedellipsis:
677 677 continue
678 678
679 679 else:
680 680 # We could probably do this later and avoid the dict
681 681 # holding state. But it likely doesn't matter.
682 682 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
683 683 cl, store, ischangelog, rev, linkrev, linknode,
684 684 clrevtolocalrev, fullclnodes, precomputedellipsis)
685 685
686 686 adjustedparents[node] = (p1node, p2node)
687 687 linknodes[node] = linknode
688 688
689 689 filtered.append(node)
690 690
691 691 nodes = filtered
692 692
693 693 # We expect the first pass to be fast, so we only engage the progress
694 694 # meter for constructing the revision deltas.
695 695 progress = None
696 696 if topic is not None:
697 697 progress = repo.ui.makeprogress(topic, unit=_('chunks'),
698 698 total=len(nodes))
699 699
700 700 configtarget = repo.ui.config('devel', 'bundle.delta')
701 701 if configtarget not in ('', 'p1', 'full'):
702 702 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
703 703 repo.ui.warn(msg % configtarget)
704 704
705 705 deltamode = repository.CG_DELTAMODE_STD
706 706 if forcedeltaparentprev:
707 707 deltamode = repository.CG_DELTAMODE_PREV
708 708 elif configtarget == 'p1':
709 709 deltamode = repository.CG_DELTAMODE_P1
710 710 elif configtarget == 'full':
711 711 deltamode = repository.CG_DELTAMODE_FULL
712 712
713 713 revisions = store.emitrevisions(
714 714 nodes,
715 715 nodesorder=nodesorder,
716 716 revisiondata=True,
717 717 assumehaveparentrevisions=not ellipses,
718 718 deltamode=deltamode)
719 719
720 720 for i, revision in enumerate(revisions):
721 721 if progress:
722 722 progress.update(i + 1)
723 723
724 724 if ellipses:
725 725 linknode = linknodes[revision.node]
726 726
727 727 if revision.node in adjustedparents:
728 728 p1node, p2node = adjustedparents[revision.node]
729 729 revision.p1node = p1node
730 730 revision.p2node = p2node
731 731 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
732 732
733 733 else:
734 734 linknode = lookup(revision.node)
735 735
736 736 revision.linknode = linknode
737 737 yield revision
738 738
739 739 if progress:
740 740 progress.complete()
741 741
742 742 class cgpacker(object):
743 743 def __init__(self, repo, oldmatcher, matcher, version,
744 744 builddeltaheader, manifestsend,
745 745 forcedeltaparentprev=False,
746 746 bundlecaps=None, ellipses=False,
747 747 shallow=False, ellipsisroots=None, fullnodes=None):
748 748 """Given a source repo, construct a bundler.
749 749
750 750 oldmatcher is a matcher that matches on files the client already has.
751 751 These will not be included in the changegroup.
752 752
753 753 matcher is a matcher that matches on files to include in the
754 754 changegroup. Used to facilitate sparse changegroups.
755 755
756 756 forcedeltaparentprev indicates whether delta parents must be against
757 757 the previous revision in a delta group. This should only be used for
758 758 compatibility with changegroup version 1.
759 759
760 760 builddeltaheader is a callable that constructs the header for a group
761 761 delta.
762 762
763 763 manifestsend is a chunk to send after manifests have been fully emitted.
764 764
765 765 ellipses indicates whether ellipsis serving mode is enabled.
766 766
767 767 bundlecaps is optional and can be used to specify the set of
768 768 capabilities which can be used to build the bundle. While bundlecaps is
769 769 unused in core Mercurial, extensions rely on this feature to communicate
770 770 capabilities to customize the changegroup packer.
771 771
772 772 shallow indicates whether shallow data might be sent. The packer may
773 773 need to pack file contents not introduced by the changes being packed.
774 774
775 775 fullnodes is the set of changelog nodes which should not be ellipsis
776 776 nodes. We store this rather than the set of nodes that should be
777 777 ellipsis because for very large histories we expect this to be
778 778 significantly smaller.
779 779 """
780 780 assert oldmatcher
781 781 assert matcher
782 782 self._oldmatcher = oldmatcher
783 783 self._matcher = matcher
784 784
785 785 self.version = version
786 786 self._forcedeltaparentprev = forcedeltaparentprev
787 787 self._builddeltaheader = builddeltaheader
788 788 self._manifestsend = manifestsend
789 789 self._ellipses = ellipses
790 790
791 791 # Set of capabilities we can use to build the bundle.
792 792 if bundlecaps is None:
793 793 bundlecaps = set()
794 794 self._bundlecaps = bundlecaps
795 795 self._isshallow = shallow
796 796 self._fullclnodes = fullnodes
797 797
798 798 # Maps ellipsis revs to their roots at the changelog level.
799 799 self._precomputedellipsis = ellipsisroots
800 800
801 801 self._repo = repo
802 802
803 803 if self._repo.ui.verbose and not self._repo.ui.debugflag:
804 804 self._verbosenote = self._repo.ui.note
805 805 else:
806 806 self._verbosenote = lambda s: None
807 807
808 808 def generate(self, commonrevs, clnodes, fastpathlinkrev, source,
809 809 changelog=True):
810 810 """Yield a sequence of changegroup byte chunks.
811 811 If changelog is False, changelog data won't be added to changegroup
812 812 """
813 813
814 814 repo = self._repo
815 815 cl = repo.changelog
816 816
817 817 self._verbosenote(_('uncompressed size of bundle content:\n'))
818 818 size = 0
819 819
820 820 clstate, deltas = self._generatechangelog(cl, clnodes,
821 821 generate=changelog)
822 822 for delta in deltas:
823 823 for chunk in _revisiondeltatochunks(delta,
824 824 self._builddeltaheader):
825 825 size += len(chunk)
826 826 yield chunk
827 827
828 828 close = closechunk()
829 829 size += len(close)
830 830 yield closechunk()
831 831
832 832 self._verbosenote(_('%8.i (changelog)\n') % size)
833 833
834 834 clrevorder = clstate['clrevorder']
835 835 manifests = clstate['manifests']
836 836 changedfiles = clstate['changedfiles']
837 837
838 838 # We need to make sure that the linkrev in the changegroup refers to
839 839 # the first changeset that introduced the manifest or file revision.
840 840 # The fastpath is usually safer than the slowpath, because the filelogs
841 841 # are walked in revlog order.
842 842 #
843 843 # When taking the slowpath when the manifest revlog uses generaldelta,
844 844 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
845 845 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
846 846 #
847 847 # When taking the fastpath, we are only vulnerable to reordering
848 848 # of the changelog itself. The changelog never uses generaldelta and is
849 849 # never reordered. To handle this case, we simply take the slowpath,
850 850 # which already has the 'clrevorder' logic. This was also fixed in
851 851 # cc0ff93d0c0c.
852 852
853 853 # Treemanifests don't work correctly with fastpathlinkrev
854 854 # either, because we don't discover which directory nodes to
855 855 # send along with files. This could probably be fixed.
856 856 fastpathlinkrev = fastpathlinkrev and (
857 857 'treemanifest' not in repo.requirements)
858 858
859 859 fnodes = {} # needed file nodes
860 860
861 861 size = 0
862 862 it = self.generatemanifests(
863 863 commonrevs, clrevorder, fastpathlinkrev, manifests, fnodes, source,
864 864 clstate['clrevtomanifestrev'])
865 865
866 866 for tree, deltas in it:
867 867 if tree:
868 868 assert self.version == b'03'
869 869 chunk = _fileheader(tree)
870 870 size += len(chunk)
871 871 yield chunk
872 872
873 873 for delta in deltas:
874 874 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
875 875 for chunk in chunks:
876 876 size += len(chunk)
877 877 yield chunk
878 878
879 879 close = closechunk()
880 880 size += len(close)
881 881 yield close
882 882
883 883 self._verbosenote(_('%8.i (manifests)\n') % size)
884 884 yield self._manifestsend
885 885
886 886 mfdicts = None
887 887 if self._ellipses and self._isshallow:
888 888 mfdicts = [(self._repo.manifestlog[n].read(), lr)
889 889 for (n, lr) in manifests.iteritems()]
890 890
891 891 manifests.clear()
892 892 clrevs = set(cl.rev(x) for x in clnodes)
893 893
894 894 it = self.generatefiles(changedfiles, commonrevs,
895 895 source, mfdicts, fastpathlinkrev,
896 896 fnodes, clrevs)
897 897
898 898 for path, deltas in it:
899 899 h = _fileheader(path)
900 900 size = len(h)
901 901 yield h
902 902
903 903 for delta in deltas:
904 904 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
905 905 for chunk in chunks:
906 906 size += len(chunk)
907 907 yield chunk
908 908
909 909 close = closechunk()
910 910 size += len(close)
911 911 yield close
912 912
913 913 self._verbosenote(_('%8.i %s\n') % (size, path))
914 914
915 915 yield closechunk()
916 916
917 917 if clnodes:
918 918 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
919 919
920 920 def _generatechangelog(self, cl, nodes, generate=True):
921 921 """Generate data for changelog chunks.
922 922
923 923 Returns a 2-tuple of a dict containing state and an iterable of
924 924 byte chunks. The state will not be fully populated until the
925 925 chunk stream has been fully consumed.
926 926
927 927 if generate is False, the state will be fully populated and no chunk
928 928 stream will be yielded
929 929 """
930 930 clrevorder = {}
931 931 manifests = {}
932 932 mfl = self._repo.manifestlog
933 933 changedfiles = set()
934 934 clrevtomanifestrev = {}
935 935
936 936 state = {
937 937 'clrevorder': clrevorder,
938 938 'manifests': manifests,
939 939 'changedfiles': changedfiles,
940 940 'clrevtomanifestrev': clrevtomanifestrev,
941 941 }
942 942
943 943 if not (generate or self._ellipses):
944 944 # sort the nodes in storage order
945 945 nodes = sorted(nodes, key=cl.rev)
946 946 for node in nodes:
947 947 c = cl.changelogrevision(node)
948 948 clrevorder[node] = len(clrevorder)
949 949 # record the first changeset introducing this manifest version
950 950 manifests.setdefault(c.manifest, node)
951 951 # Record a complete list of potentially-changed files in
952 952 # this manifest.
953 953 changedfiles.update(c.files)
954 954
955 955 return state, ()
956 956
957 957 # Callback for the changelog, used to collect changed files and
958 958 # manifest nodes.
959 959 # Returns the linkrev node (identity in the changelog case).
960 960 def lookupcl(x):
961 961 c = cl.changelogrevision(x)
962 962 clrevorder[x] = len(clrevorder)
963 963
964 964 if self._ellipses:
965 965 # Only update manifests if x is going to be sent. Otherwise we
966 966 # end up with bogus linkrevs specified for manifests and
967 967 # we skip some manifest nodes that we should otherwise
968 968 # have sent.
969 969 if (x in self._fullclnodes
970 970 or cl.rev(x) in self._precomputedellipsis):
971 971
972 972 manifestnode = c.manifest
973 973 # Record the first changeset introducing this manifest
974 974 # version.
975 975 manifests.setdefault(manifestnode, x)
976 976 # Set this narrow-specific dict so we have the lowest
977 977 # manifest revnum to look up for this cl revnum. (Part of
978 978 # mapping changelog ellipsis parents to manifest ellipsis
979 979 # parents)
980 980 clrevtomanifestrev.setdefault(
981 981 cl.rev(x), mfl.rev(manifestnode))
982 982 # We can't trust the changed files list in the changeset if the
983 983 # client requested a shallow clone.
984 984 if self._isshallow:
985 985 changedfiles.update(mfl[c.manifest].read().keys())
986 986 else:
987 987 changedfiles.update(c.files)
988 988 else:
989 989 # record the first changeset introducing this manifest version
990 990 manifests.setdefault(c.manifest, x)
991 991 # Record a complete list of potentially-changed files in
992 992 # this manifest.
993 993 changedfiles.update(c.files)
994 994
995 995 return x
996 996
997 997 gen = deltagroup(
998 998 self._repo, cl, nodes, True, lookupcl,
999 999 self._forcedeltaparentprev,
1000 1000 ellipses=self._ellipses,
1001 1001 topic=_('changesets'),
1002 1002 clrevtolocalrev={},
1003 1003 fullclnodes=self._fullclnodes,
1004 1004 precomputedellipsis=self._precomputedellipsis)
1005 1005
1006 1006 return state, gen
1007 1007
1008 1008 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev,
1009 1009 manifests, fnodes, source, clrevtolocalrev):
1010 1010 """Returns an iterator of changegroup chunks containing manifests.
1011 1011
1012 1012 `source` is unused here, but is used by extensions like remotefilelog to
1013 1013 change what is sent based in pulls vs pushes, etc.
1014 1014 """
1015 1015 repo = self._repo
1016 1016 mfl = repo.manifestlog
1017 1017 tmfnodes = {'': manifests}
1018 1018
1019 1019 # Callback for the manifest, used to collect linkrevs for filelog
1020 1020 # revisions.
1021 1021 # Returns the linkrev node (collected in lookupcl).
1022 1022 def makelookupmflinknode(tree, nodes):
1023 1023 if fastpathlinkrev:
1024 1024 assert not tree
1025 1025 return manifests.__getitem__
1026 1026
1027 1027 def lookupmflinknode(x):
1028 1028 """Callback for looking up the linknode for manifests.
1029 1029
1030 1030 Returns the linkrev node for the specified manifest.
1031 1031
1032 1032 SIDE EFFECT:
1033 1033
1034 1034 1) fclnodes gets populated with the list of relevant
1035 1035 file nodes if we're not using fastpathlinkrev
1036 1036 2) When treemanifests are in use, collects treemanifest nodes
1037 1037 to send
1038 1038
1039 1039 Note that this means manifests must be completely sent to
1040 1040 the client before you can trust the list of files and
1041 1041 treemanifests to send.
1042 1042 """
1043 1043 clnode = nodes[x]
1044 1044 mdata = mfl.get(tree, x).readfast(shallow=True)
1045 1045 for p, n, fl in mdata.iterentries():
1046 1046 if fl == 't': # subdirectory manifest
1047 1047 subtree = tree + p + '/'
1048 1048 tmfclnodes = tmfnodes.setdefault(subtree, {})
1049 1049 tmfclnode = tmfclnodes.setdefault(n, clnode)
1050 1050 if clrevorder[clnode] < clrevorder[tmfclnode]:
1051 1051 tmfclnodes[n] = clnode
1052 1052 else:
1053 1053 f = tree + p
1054 1054 fclnodes = fnodes.setdefault(f, {})
1055 1055 fclnode = fclnodes.setdefault(n, clnode)
1056 1056 if clrevorder[clnode] < clrevorder[fclnode]:
1057 1057 fclnodes[n] = clnode
1058 1058 return clnode
1059 1059 return lookupmflinknode
1060 1060
1061 1061 while tmfnodes:
1062 1062 tree, nodes = tmfnodes.popitem()
1063 1063
1064 should_visit = self._matcher.visitdir(tree[:-1] or '.')
1064 should_visit = self._matcher.visitdir(tree[:-1])
1065 1065 if tree and not should_visit:
1066 1066 continue
1067 1067
1068 1068 store = mfl.getstorage(tree)
1069 1069
1070 1070 if not should_visit:
1071 1071 # No nodes to send because this directory is out of
1072 1072 # the client's view of the repository (probably
1073 1073 # because of narrow clones). Do this even for the root
1074 1074 # directory (tree=='')
1075 1075 prunednodes = []
1076 1076 else:
1077 1077 # Avoid sending any manifest nodes we can prove the
1078 1078 # client already has by checking linkrevs. See the
1079 1079 # related comment in generatefiles().
1080 1080 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1081 1081
1082 1082 if tree and not prunednodes:
1083 1083 continue
1084 1084
1085 1085 lookupfn = makelookupmflinknode(tree, nodes)
1086 1086
1087 1087 deltas = deltagroup(
1088 1088 self._repo, store, prunednodes, False, lookupfn,
1089 1089 self._forcedeltaparentprev,
1090 1090 ellipses=self._ellipses,
1091 1091 topic=_('manifests'),
1092 1092 clrevtolocalrev=clrevtolocalrev,
1093 1093 fullclnodes=self._fullclnodes,
1094 1094 precomputedellipsis=self._precomputedellipsis)
1095 1095
1096 if not self._oldmatcher.visitdir(store.tree[:-1] or '.'):
1096 if not self._oldmatcher.visitdir(store.tree[:-1]):
1097 1097 yield tree, deltas
1098 1098 else:
1099 1099 # 'deltas' is a generator and we need to consume it even if
1100 1100 # we are not going to send it because a side-effect is that
1101 1101 # it updates tmdnodes (via lookupfn)
1102 1102 for d in deltas:
1103 1103 pass
1104 1104 if not tree:
1105 1105 yield tree, []
1106 1106
1107 1107 def _prunemanifests(self, store, nodes, commonrevs):
1108 1108 if not self._ellipses:
1109 1109 # In non-ellipses case and large repositories, it is better to
1110 1110 # prevent calling of store.rev and store.linkrev on a lot of
1111 1111 # nodes as compared to sending some extra data
1112 1112 return nodes.copy()
1113 1113 # This is split out as a separate method to allow filtering
1114 1114 # commonrevs in extension code.
1115 1115 #
1116 1116 # TODO(augie): this shouldn't be required, instead we should
1117 1117 # make filtering of revisions to send delegated to the store
1118 1118 # layer.
1119 1119 frev, flr = store.rev, store.linkrev
1120 1120 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1121 1121
1122 1122 # The 'source' parameter is useful for extensions
1123 1123 def generatefiles(self, changedfiles, commonrevs, source,
1124 1124 mfdicts, fastpathlinkrev, fnodes, clrevs):
1125 1125 changedfiles = [f for f in changedfiles
1126 1126 if self._matcher(f) and not self._oldmatcher(f)]
1127 1127
1128 1128 if not fastpathlinkrev:
1129 1129 def normallinknodes(unused, fname):
1130 1130 return fnodes.get(fname, {})
1131 1131 else:
1132 1132 cln = self._repo.changelog.node
1133 1133
1134 1134 def normallinknodes(store, fname):
1135 1135 flinkrev = store.linkrev
1136 1136 fnode = store.node
1137 1137 revs = ((r, flinkrev(r)) for r in store)
1138 1138 return dict((fnode(r), cln(lr))
1139 1139 for r, lr in revs if lr in clrevs)
1140 1140
1141 1141 clrevtolocalrev = {}
1142 1142
1143 1143 if self._isshallow:
1144 1144 # In a shallow clone, the linknodes callback needs to also include
1145 1145 # those file nodes that are in the manifests we sent but weren't
1146 1146 # introduced by those manifests.
1147 1147 commonctxs = [self._repo[c] for c in commonrevs]
1148 1148 clrev = self._repo.changelog.rev
1149 1149
1150 1150 def linknodes(flog, fname):
1151 1151 for c in commonctxs:
1152 1152 try:
1153 1153 fnode = c.filenode(fname)
1154 1154 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1155 1155 except error.ManifestLookupError:
1156 1156 pass
1157 1157 links = normallinknodes(flog, fname)
1158 1158 if len(links) != len(mfdicts):
1159 1159 for mf, lr in mfdicts:
1160 1160 fnode = mf.get(fname, None)
1161 1161 if fnode in links:
1162 1162 links[fnode] = min(links[fnode], lr, key=clrev)
1163 1163 elif fnode:
1164 1164 links[fnode] = lr
1165 1165 return links
1166 1166 else:
1167 1167 linknodes = normallinknodes
1168 1168
1169 1169 repo = self._repo
1170 1170 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1171 1171 total=len(changedfiles))
1172 1172 for i, fname in enumerate(sorted(changedfiles)):
1173 1173 filerevlog = repo.file(fname)
1174 1174 if not filerevlog:
1175 1175 raise error.Abort(_("empty or missing file data for %s") %
1176 1176 fname)
1177 1177
1178 1178 clrevtolocalrev.clear()
1179 1179
1180 1180 linkrevnodes = linknodes(filerevlog, fname)
1181 1181 # Lookup for filenodes, we collected the linkrev nodes above in the
1182 1182 # fastpath case and with lookupmf in the slowpath case.
1183 1183 def lookupfilelog(x):
1184 1184 return linkrevnodes[x]
1185 1185
1186 1186 frev, flr = filerevlog.rev, filerevlog.linkrev
1187 1187 # Skip sending any filenode we know the client already
1188 1188 # has. This avoids over-sending files relatively
1189 1189 # inexpensively, so it's not a problem if we under-filter
1190 1190 # here.
1191 1191 filenodes = [n for n in linkrevnodes
1192 1192 if flr(frev(n)) not in commonrevs]
1193 1193
1194 1194 if not filenodes:
1195 1195 continue
1196 1196
1197 1197 progress.update(i + 1, item=fname)
1198 1198
1199 1199 deltas = deltagroup(
1200 1200 self._repo, filerevlog, filenodes, False, lookupfilelog,
1201 1201 self._forcedeltaparentprev,
1202 1202 ellipses=self._ellipses,
1203 1203 clrevtolocalrev=clrevtolocalrev,
1204 1204 fullclnodes=self._fullclnodes,
1205 1205 precomputedellipsis=self._precomputedellipsis)
1206 1206
1207 1207 yield fname, deltas
1208 1208
1209 1209 progress.complete()
1210 1210
1211 1211 def _makecg1packer(repo, oldmatcher, matcher, bundlecaps,
1212 1212 ellipses=False, shallow=False, ellipsisroots=None,
1213 1213 fullnodes=None):
1214 1214 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1215 1215 d.node, d.p1node, d.p2node, d.linknode)
1216 1216
1217 1217 return cgpacker(repo, oldmatcher, matcher, b'01',
1218 1218 builddeltaheader=builddeltaheader,
1219 1219 manifestsend=b'',
1220 1220 forcedeltaparentprev=True,
1221 1221 bundlecaps=bundlecaps,
1222 1222 ellipses=ellipses,
1223 1223 shallow=shallow,
1224 1224 ellipsisroots=ellipsisroots,
1225 1225 fullnodes=fullnodes)
1226 1226
1227 1227 def _makecg2packer(repo, oldmatcher, matcher, bundlecaps,
1228 1228 ellipses=False, shallow=False, ellipsisroots=None,
1229 1229 fullnodes=None):
1230 1230 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1231 1231 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1232 1232
1233 1233 return cgpacker(repo, oldmatcher, matcher, b'02',
1234 1234 builddeltaheader=builddeltaheader,
1235 1235 manifestsend=b'',
1236 1236 bundlecaps=bundlecaps,
1237 1237 ellipses=ellipses,
1238 1238 shallow=shallow,
1239 1239 ellipsisroots=ellipsisroots,
1240 1240 fullnodes=fullnodes)
1241 1241
1242 1242 def _makecg3packer(repo, oldmatcher, matcher, bundlecaps,
1243 1243 ellipses=False, shallow=False, ellipsisroots=None,
1244 1244 fullnodes=None):
1245 1245 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1246 1246 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1247 1247
1248 1248 return cgpacker(repo, oldmatcher, matcher, b'03',
1249 1249 builddeltaheader=builddeltaheader,
1250 1250 manifestsend=closechunk(),
1251 1251 bundlecaps=bundlecaps,
1252 1252 ellipses=ellipses,
1253 1253 shallow=shallow,
1254 1254 ellipsisroots=ellipsisroots,
1255 1255 fullnodes=fullnodes)
1256 1256
1257 1257 _packermap = {'01': (_makecg1packer, cg1unpacker),
1258 1258 # cg2 adds support for exchanging generaldelta
1259 1259 '02': (_makecg2packer, cg2unpacker),
1260 1260 # cg3 adds support for exchanging revlog flags and treemanifests
1261 1261 '03': (_makecg3packer, cg3unpacker),
1262 1262 }
1263 1263
1264 1264 def allsupportedversions(repo):
1265 1265 versions = set(_packermap.keys())
1266 1266 if not (repo.ui.configbool('experimental', 'changegroup3') or
1267 1267 repo.ui.configbool('experimental', 'treemanifest') or
1268 1268 'treemanifest' in repo.requirements):
1269 1269 versions.discard('03')
1270 1270 return versions
1271 1271
1272 1272 # Changegroup versions that can be applied to the repo
1273 1273 def supportedincomingversions(repo):
1274 1274 return allsupportedversions(repo)
1275 1275
1276 1276 # Changegroup versions that can be created from the repo
1277 1277 def supportedoutgoingversions(repo):
1278 1278 versions = allsupportedversions(repo)
1279 1279 if 'treemanifest' in repo.requirements:
1280 1280 # Versions 01 and 02 support only flat manifests and it's just too
1281 1281 # expensive to convert between the flat manifest and tree manifest on
1282 1282 # the fly. Since tree manifests are hashed differently, all of history
1283 1283 # would have to be converted. Instead, we simply don't even pretend to
1284 1284 # support versions 01 and 02.
1285 1285 versions.discard('01')
1286 1286 versions.discard('02')
1287 1287 if repository.NARROW_REQUIREMENT in repo.requirements:
1288 1288 # Versions 01 and 02 don't support revlog flags, and we need to
1289 1289 # support that for stripping and unbundling to work.
1290 1290 versions.discard('01')
1291 1291 versions.discard('02')
1292 1292 if LFS_REQUIREMENT in repo.requirements:
1293 1293 # Versions 01 and 02 don't support revlog flags, and we need to
1294 1294 # mark LFS entries with REVIDX_EXTSTORED.
1295 1295 versions.discard('01')
1296 1296 versions.discard('02')
1297 1297
1298 1298 return versions
1299 1299
1300 1300 def localversion(repo):
1301 1301 # Finds the best version to use for bundles that are meant to be used
1302 1302 # locally, such as those from strip and shelve, and temporary bundles.
1303 1303 return max(supportedoutgoingversions(repo))
1304 1304
1305 1305 def safeversion(repo):
1306 1306 # Finds the smallest version that it's safe to assume clients of the repo
1307 1307 # will support. For example, all hg versions that support generaldelta also
1308 1308 # support changegroup 02.
1309 1309 versions = supportedoutgoingversions(repo)
1310 1310 if 'generaldelta' in repo.requirements:
1311 1311 versions.discard('01')
1312 1312 assert versions
1313 1313 return min(versions)
1314 1314
1315 1315 def getbundler(version, repo, bundlecaps=None, oldmatcher=None,
1316 1316 matcher=None, ellipses=False, shallow=False,
1317 1317 ellipsisroots=None, fullnodes=None):
1318 1318 assert version in supportedoutgoingversions(repo)
1319 1319
1320 1320 if matcher is None:
1321 1321 matcher = matchmod.always()
1322 1322 if oldmatcher is None:
1323 1323 oldmatcher = matchmod.never()
1324 1324
1325 1325 if version == '01' and not matcher.always():
1326 1326 raise error.ProgrammingError('version 01 changegroups do not support '
1327 1327 'sparse file matchers')
1328 1328
1329 1329 if ellipses and version in (b'01', b'02'):
1330 1330 raise error.Abort(
1331 1331 _('ellipsis nodes require at least cg3 on client and server, '
1332 1332 'but negotiated version %s') % version)
1333 1333
1334 1334 # Requested files could include files not in the local store. So
1335 1335 # filter those out.
1336 1336 matcher = repo.narrowmatch(matcher)
1337 1337
1338 1338 fn = _packermap[version][0]
1339 1339 return fn(repo, oldmatcher, matcher, bundlecaps, ellipses=ellipses,
1340 1340 shallow=shallow, ellipsisroots=ellipsisroots,
1341 1341 fullnodes=fullnodes)
1342 1342
1343 1343 def getunbundler(version, fh, alg, extras=None):
1344 1344 return _packermap[version][1](fh, alg, extras=extras)
1345 1345
1346 1346 def _changegroupinfo(repo, nodes, source):
1347 1347 if repo.ui.verbose or source == 'bundle':
1348 1348 repo.ui.status(_("%d changesets found\n") % len(nodes))
1349 1349 if repo.ui.debugflag:
1350 1350 repo.ui.debug("list of changesets:\n")
1351 1351 for node in nodes:
1352 1352 repo.ui.debug("%s\n" % hex(node))
1353 1353
1354 1354 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1355 1355 bundlecaps=None):
1356 1356 cgstream = makestream(repo, outgoing, version, source,
1357 1357 fastpath=fastpath, bundlecaps=bundlecaps)
1358 1358 return getunbundler(version, util.chunkbuffer(cgstream), None,
1359 1359 {'clcount': len(outgoing.missing) })
1360 1360
1361 1361 def makestream(repo, outgoing, version, source, fastpath=False,
1362 1362 bundlecaps=None, matcher=None):
1363 1363 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1364 1364 matcher=matcher)
1365 1365
1366 1366 repo = repo.unfiltered()
1367 1367 commonrevs = outgoing.common
1368 1368 csets = outgoing.missing
1369 1369 heads = outgoing.missingheads
1370 1370 # We go through the fast path if we get told to, or if all (unfiltered
1371 1371 # heads have been requested (since we then know there all linkrevs will
1372 1372 # be pulled by the client).
1373 1373 heads.sort()
1374 1374 fastpathlinkrev = fastpath or (
1375 1375 repo.filtername is None and heads == sorted(repo.heads()))
1376 1376
1377 1377 repo.hook('preoutgoing', throw=True, source=source)
1378 1378 _changegroupinfo(repo, csets, source)
1379 1379 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1380 1380
1381 1381 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1382 1382 revisions = 0
1383 1383 files = 0
1384 1384 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1385 1385 total=expectedfiles)
1386 1386 for chunkdata in iter(source.filelogheader, {}):
1387 1387 files += 1
1388 1388 f = chunkdata["filename"]
1389 1389 repo.ui.debug("adding %s revisions\n" % f)
1390 1390 progress.increment()
1391 1391 fl = repo.file(f)
1392 1392 o = len(fl)
1393 1393 try:
1394 1394 deltas = source.deltaiter()
1395 1395 if not fl.addgroup(deltas, revmap, trp):
1396 1396 raise error.Abort(_("received file revlog group is empty"))
1397 1397 except error.CensoredBaseError as e:
1398 1398 raise error.Abort(_("received delta base is censored: %s") % e)
1399 1399 revisions += len(fl) - o
1400 1400 if f in needfiles:
1401 1401 needs = needfiles[f]
1402 1402 for new in pycompat.xrange(o, len(fl)):
1403 1403 n = fl.node(new)
1404 1404 if n in needs:
1405 1405 needs.remove(n)
1406 1406 else:
1407 1407 raise error.Abort(
1408 1408 _("received spurious file revlog entry"))
1409 1409 if not needs:
1410 1410 del needfiles[f]
1411 1411 progress.complete()
1412 1412
1413 1413 for f, needs in needfiles.iteritems():
1414 1414 fl = repo.file(f)
1415 1415 for n in needs:
1416 1416 try:
1417 1417 fl.rev(n)
1418 1418 except error.LookupError:
1419 1419 raise error.Abort(
1420 1420 _('missing file data for %s:%s - run hg verify') %
1421 1421 (f, hex(n)))
1422 1422
1423 1423 return revisions, files
@@ -1,1525 +1,1523 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import errno
13 13 import os
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .node import nullid
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 scmutil,
26 26 txnutil,
27 27 util,
28 28 )
29 29
30 30 try:
31 31 from . import rustext
32 32 rustext.__name__ # force actual import (see hgdemandimport)
33 33 except ImportError:
34 34 rustext = None
35 35
36 36 parsers = policy.importmod(r'parsers')
37 37
38 38 propertycache = util.propertycache
39 39 filecache = scmutil.filecache
40 40 _rangemask = 0x7fffffff
41 41
42 42 dirstatetuple = parsers.dirstatetuple
43 43
44 44 class repocache(filecache):
45 45 """filecache for files in .hg/"""
46 46 def join(self, obj, fname):
47 47 return obj._opener.join(fname)
48 48
49 49 class rootcache(filecache):
50 50 """filecache for files in the repository root"""
51 51 def join(self, obj, fname):
52 52 return obj._join(fname)
53 53
54 54 def _getfsnow(vfs):
55 55 '''Get "now" timestamp on filesystem'''
56 56 tmpfd, tmpname = vfs.mkstemp()
57 57 try:
58 58 return os.fstat(tmpfd)[stat.ST_MTIME]
59 59 finally:
60 60 os.close(tmpfd)
61 61 vfs.unlink(tmpname)
62 62
63 63 class dirstate(object):
64 64
65 65 def __init__(self, opener, ui, root, validate, sparsematchfn):
66 66 '''Create a new dirstate object.
67 67
68 68 opener is an open()-like callable that can be used to open the
69 69 dirstate file; root is the root of the directory tracked by
70 70 the dirstate.
71 71 '''
72 72 self._opener = opener
73 73 self._validate = validate
74 74 self._root = root
75 75 self._sparsematchfn = sparsematchfn
76 76 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
77 77 # UNC path pointing to root share (issue4557)
78 78 self._rootdir = pathutil.normasprefix(root)
79 79 self._dirty = False
80 80 self._lastnormaltime = 0
81 81 self._ui = ui
82 82 self._filecache = {}
83 83 self._parentwriters = 0
84 84 self._filename = 'dirstate'
85 85 self._pendingfilename = '%s.pending' % self._filename
86 86 self._plchangecallbacks = {}
87 87 self._origpl = None
88 88 self._updatedfiles = set()
89 89 self._mapcls = dirstatemap
90 90 # Access and cache cwd early, so we don't access it for the first time
91 91 # after a working-copy update caused it to not exist (accessing it then
92 92 # raises an exception).
93 93 self._cwd
94 94
95 95 @contextlib.contextmanager
96 96 def parentchange(self):
97 97 '''Context manager for handling dirstate parents.
98 98
99 99 If an exception occurs in the scope of the context manager,
100 100 the incoherent dirstate won't be written when wlock is
101 101 released.
102 102 '''
103 103 self._parentwriters += 1
104 104 yield
105 105 # Typically we want the "undo" step of a context manager in a
106 106 # finally block so it happens even when an exception
107 107 # occurs. In this case, however, we only want to decrement
108 108 # parentwriters if the code in the with statement exits
109 109 # normally, so we don't have a try/finally here on purpose.
110 110 self._parentwriters -= 1
111 111
112 112 def pendingparentchange(self):
113 113 '''Returns true if the dirstate is in the middle of a set of changes
114 114 that modify the dirstate parent.
115 115 '''
116 116 return self._parentwriters > 0
117 117
118 118 @propertycache
119 119 def _map(self):
120 120 """Return the dirstate contents (see documentation for dirstatemap)."""
121 121 self._map = self._mapcls(self._ui, self._opener, self._root)
122 122 return self._map
123 123
124 124 @property
125 125 def _sparsematcher(self):
126 126 """The matcher for the sparse checkout.
127 127
128 128 The working directory may not include every file from a manifest. The
129 129 matcher obtained by this property will match a path if it is to be
130 130 included in the working directory.
131 131 """
132 132 # TODO there is potential to cache this property. For now, the matcher
133 133 # is resolved on every access. (But the called function does use a
134 134 # cache to keep the lookup fast.)
135 135 return self._sparsematchfn()
136 136
137 137 @repocache('branch')
138 138 def _branch(self):
139 139 try:
140 140 return self._opener.read("branch").strip() or "default"
141 141 except IOError as inst:
142 142 if inst.errno != errno.ENOENT:
143 143 raise
144 144 return "default"
145 145
146 146 @property
147 147 def _pl(self):
148 148 return self._map.parents()
149 149
150 150 def hasdir(self, d):
151 151 return self._map.hastrackeddir(d)
152 152
153 153 @rootcache('.hgignore')
154 154 def _ignore(self):
155 155 files = self._ignorefiles()
156 156 if not files:
157 157 return matchmod.never()
158 158
159 159 pats = ['include:%s' % f for f in files]
160 160 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
161 161
162 162 @propertycache
163 163 def _slash(self):
164 164 return self._ui.configbool('ui', 'slash') and pycompat.ossep != '/'
165 165
166 166 @propertycache
167 167 def _checklink(self):
168 168 return util.checklink(self._root)
169 169
170 170 @propertycache
171 171 def _checkexec(self):
172 172 return util.checkexec(self._root)
173 173
174 174 @propertycache
175 175 def _checkcase(self):
176 176 return not util.fscasesensitive(self._join('.hg'))
177 177
178 178 def _join(self, f):
179 179 # much faster than os.path.join()
180 180 # it's safe because f is always a relative path
181 181 return self._rootdir + f
182 182
183 183 def flagfunc(self, buildfallback):
184 184 if self._checklink and self._checkexec:
185 185 def f(x):
186 186 try:
187 187 st = os.lstat(self._join(x))
188 188 if util.statislink(st):
189 189 return 'l'
190 190 if util.statisexec(st):
191 191 return 'x'
192 192 except OSError:
193 193 pass
194 194 return ''
195 195 return f
196 196
197 197 fallback = buildfallback()
198 198 if self._checklink:
199 199 def f(x):
200 200 if os.path.islink(self._join(x)):
201 201 return 'l'
202 202 if 'x' in fallback(x):
203 203 return 'x'
204 204 return ''
205 205 return f
206 206 if self._checkexec:
207 207 def f(x):
208 208 if 'l' in fallback(x):
209 209 return 'l'
210 210 if util.isexec(self._join(x)):
211 211 return 'x'
212 212 return ''
213 213 return f
214 214 else:
215 215 return fallback
216 216
217 217 @propertycache
218 218 def _cwd(self):
219 219 # internal config: ui.forcecwd
220 220 forcecwd = self._ui.config('ui', 'forcecwd')
221 221 if forcecwd:
222 222 return forcecwd
223 223 return encoding.getcwd()
224 224
225 225 def getcwd(self):
226 226 '''Return the path from which a canonical path is calculated.
227 227
228 228 This path should be used to resolve file patterns or to convert
229 229 canonical paths back to file paths for display. It shouldn't be
230 230 used to get real file paths. Use vfs functions instead.
231 231 '''
232 232 cwd = self._cwd
233 233 if cwd == self._root:
234 234 return ''
235 235 # self._root ends with a path separator if self._root is '/' or 'C:\'
236 236 rootsep = self._root
237 237 if not util.endswithsep(rootsep):
238 238 rootsep += pycompat.ossep
239 239 if cwd.startswith(rootsep):
240 240 return cwd[len(rootsep):]
241 241 else:
242 242 # we're outside the repo. return an absolute path.
243 243 return cwd
244 244
245 245 def pathto(self, f, cwd=None):
246 246 if cwd is None:
247 247 cwd = self.getcwd()
248 248 path = util.pathto(self._root, cwd, f)
249 249 if self._slash:
250 250 return util.pconvert(path)
251 251 return path
252 252
253 253 def __getitem__(self, key):
254 254 '''Return the current state of key (a filename) in the dirstate.
255 255
256 256 States are:
257 257 n normal
258 258 m needs merging
259 259 r marked for removal
260 260 a marked for addition
261 261 ? not tracked
262 262 '''
263 263 return self._map.get(key, ("?",))[0]
264 264
265 265 def __contains__(self, key):
266 266 return key in self._map
267 267
268 268 def __iter__(self):
269 269 return iter(sorted(self._map))
270 270
271 271 def items(self):
272 272 return self._map.iteritems()
273 273
274 274 iteritems = items
275 275
276 276 def parents(self):
277 277 return [self._validate(p) for p in self._pl]
278 278
279 279 def p1(self):
280 280 return self._validate(self._pl[0])
281 281
282 282 def p2(self):
283 283 return self._validate(self._pl[1])
284 284
285 285 def branch(self):
286 286 return encoding.tolocal(self._branch)
287 287
288 288 def setparents(self, p1, p2=nullid):
289 289 """Set dirstate parents to p1 and p2.
290 290
291 291 When moving from two parents to one, 'm' merged entries a
292 292 adjusted to normal and previous copy records discarded and
293 293 returned by the call.
294 294
295 295 See localrepo.setparents()
296 296 """
297 297 if self._parentwriters == 0:
298 298 raise ValueError("cannot set dirstate parent outside of "
299 299 "dirstate.parentchange context manager")
300 300
301 301 self._dirty = True
302 302 oldp2 = self._pl[1]
303 303 if self._origpl is None:
304 304 self._origpl = self._pl
305 305 self._map.setparents(p1, p2)
306 306 copies = {}
307 307 if oldp2 != nullid and p2 == nullid:
308 308 candidatefiles = self._map.nonnormalset.union(
309 309 self._map.otherparentset)
310 310 for f in candidatefiles:
311 311 s = self._map.get(f)
312 312 if s is None:
313 313 continue
314 314
315 315 # Discard 'm' markers when moving away from a merge state
316 316 if s[0] == 'm':
317 317 source = self._map.copymap.get(f)
318 318 if source:
319 319 copies[f] = source
320 320 self.normallookup(f)
321 321 # Also fix up otherparent markers
322 322 elif s[0] == 'n' and s[2] == -2:
323 323 source = self._map.copymap.get(f)
324 324 if source:
325 325 copies[f] = source
326 326 self.add(f)
327 327 return copies
328 328
329 329 def setbranch(self, branch):
330 330 self.__class__._branch.set(self, encoding.fromlocal(branch))
331 331 f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
332 332 try:
333 333 f.write(self._branch + '\n')
334 334 f.close()
335 335
336 336 # make sure filecache has the correct stat info for _branch after
337 337 # replacing the underlying file
338 338 ce = self._filecache['_branch']
339 339 if ce:
340 340 ce.refresh()
341 341 except: # re-raises
342 342 f.discard()
343 343 raise
344 344
345 345 def invalidate(self):
346 346 '''Causes the next access to reread the dirstate.
347 347
348 348 This is different from localrepo.invalidatedirstate() because it always
349 349 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
350 350 check whether the dirstate has changed before rereading it.'''
351 351
352 352 for a in (r"_map", r"_branch", r"_ignore"):
353 353 if a in self.__dict__:
354 354 delattr(self, a)
355 355 self._lastnormaltime = 0
356 356 self._dirty = False
357 357 self._updatedfiles.clear()
358 358 self._parentwriters = 0
359 359 self._origpl = None
360 360
361 361 def copy(self, source, dest):
362 362 """Mark dest as a copy of source. Unmark dest if source is None."""
363 363 if source == dest:
364 364 return
365 365 self._dirty = True
366 366 if source is not None:
367 367 self._map.copymap[dest] = source
368 368 self._updatedfiles.add(source)
369 369 self._updatedfiles.add(dest)
370 370 elif self._map.copymap.pop(dest, None):
371 371 self._updatedfiles.add(dest)
372 372
373 373 def copied(self, file):
374 374 return self._map.copymap.get(file, None)
375 375
376 376 def copies(self):
377 377 return self._map.copymap
378 378
379 379 def _addpath(self, f, state, mode, size, mtime):
380 380 oldstate = self[f]
381 381 if state == 'a' or oldstate == 'r':
382 382 scmutil.checkfilename(f)
383 383 if self._map.hastrackeddir(f):
384 384 raise error.Abort(_('directory %r already in dirstate') %
385 385 pycompat.bytestr(f))
386 386 # shadows
387 387 for d in util.finddirs(f):
388 388 if self._map.hastrackeddir(d):
389 389 break
390 390 entry = self._map.get(d)
391 391 if entry is not None and entry[0] != 'r':
392 392 raise error.Abort(
393 393 _('file %r in dirstate clashes with %r') %
394 394 (pycompat.bytestr(d), pycompat.bytestr(f)))
395 395 self._dirty = True
396 396 self._updatedfiles.add(f)
397 397 self._map.addfile(f, oldstate, state, mode, size, mtime)
398 398
399 399 def normal(self, f):
400 400 '''Mark a file normal and clean.'''
401 401 s = os.lstat(self._join(f))
402 402 mtime = s[stat.ST_MTIME]
403 403 self._addpath(f, 'n', s.st_mode,
404 404 s.st_size & _rangemask, mtime & _rangemask)
405 405 self._map.copymap.pop(f, None)
406 406 if f in self._map.nonnormalset:
407 407 self._map.nonnormalset.remove(f)
408 408 if mtime > self._lastnormaltime:
409 409 # Remember the most recent modification timeslot for status(),
410 410 # to make sure we won't miss future size-preserving file content
411 411 # modifications that happen within the same timeslot.
412 412 self._lastnormaltime = mtime
413 413
414 414 def normallookup(self, f):
415 415 '''Mark a file normal, but possibly dirty.'''
416 416 if self._pl[1] != nullid:
417 417 # if there is a merge going on and the file was either
418 418 # in state 'm' (-1) or coming from other parent (-2) before
419 419 # being removed, restore that state.
420 420 entry = self._map.get(f)
421 421 if entry is not None:
422 422 if entry[0] == 'r' and entry[2] in (-1, -2):
423 423 source = self._map.copymap.get(f)
424 424 if entry[2] == -1:
425 425 self.merge(f)
426 426 elif entry[2] == -2:
427 427 self.otherparent(f)
428 428 if source:
429 429 self.copy(source, f)
430 430 return
431 431 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
432 432 return
433 433 self._addpath(f, 'n', 0, -1, -1)
434 434 self._map.copymap.pop(f, None)
435 435
436 436 def otherparent(self, f):
437 437 '''Mark as coming from the other parent, always dirty.'''
438 438 if self._pl[1] == nullid:
439 439 raise error.Abort(_("setting %r to other parent "
440 440 "only allowed in merges") % f)
441 441 if f in self and self[f] == 'n':
442 442 # merge-like
443 443 self._addpath(f, 'm', 0, -2, -1)
444 444 else:
445 445 # add-like
446 446 self._addpath(f, 'n', 0, -2, -1)
447 447 self._map.copymap.pop(f, None)
448 448
449 449 def add(self, f):
450 450 '''Mark a file added.'''
451 451 self._addpath(f, 'a', 0, -1, -1)
452 452 self._map.copymap.pop(f, None)
453 453
454 454 def remove(self, f):
455 455 '''Mark a file removed.'''
456 456 self._dirty = True
457 457 oldstate = self[f]
458 458 size = 0
459 459 if self._pl[1] != nullid:
460 460 entry = self._map.get(f)
461 461 if entry is not None:
462 462 # backup the previous state
463 463 if entry[0] == 'm': # merge
464 464 size = -1
465 465 elif entry[0] == 'n' and entry[2] == -2: # other parent
466 466 size = -2
467 467 self._map.otherparentset.add(f)
468 468 self._updatedfiles.add(f)
469 469 self._map.removefile(f, oldstate, size)
470 470 if size == 0:
471 471 self._map.copymap.pop(f, None)
472 472
473 473 def merge(self, f):
474 474 '''Mark a file merged.'''
475 475 if self._pl[1] == nullid:
476 476 return self.normallookup(f)
477 477 return self.otherparent(f)
478 478
479 479 def drop(self, f):
480 480 '''Drop a file from the dirstate'''
481 481 oldstate = self[f]
482 482 if self._map.dropfile(f, oldstate):
483 483 self._dirty = True
484 484 self._updatedfiles.add(f)
485 485 self._map.copymap.pop(f, None)
486 486
487 487 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
488 488 if exists is None:
489 489 exists = os.path.lexists(os.path.join(self._root, path))
490 490 if not exists:
491 491 # Maybe a path component exists
492 492 if not ignoremissing and '/' in path:
493 493 d, f = path.rsplit('/', 1)
494 494 d = self._normalize(d, False, ignoremissing, None)
495 495 folded = d + "/" + f
496 496 else:
497 497 # No path components, preserve original case
498 498 folded = path
499 499 else:
500 500 # recursively normalize leading directory components
501 501 # against dirstate
502 502 if '/' in normed:
503 503 d, f = normed.rsplit('/', 1)
504 504 d = self._normalize(d, False, ignoremissing, True)
505 505 r = self._root + "/" + d
506 506 folded = d + "/" + util.fspath(f, r)
507 507 else:
508 508 folded = util.fspath(normed, self._root)
509 509 storemap[normed] = folded
510 510
511 511 return folded
512 512
513 513 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
514 514 normed = util.normcase(path)
515 515 folded = self._map.filefoldmap.get(normed, None)
516 516 if folded is None:
517 517 if isknown:
518 518 folded = path
519 519 else:
520 520 folded = self._discoverpath(path, normed, ignoremissing, exists,
521 521 self._map.filefoldmap)
522 522 return folded
523 523
524 524 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
525 525 normed = util.normcase(path)
526 526 folded = self._map.filefoldmap.get(normed, None)
527 527 if folded is None:
528 528 folded = self._map.dirfoldmap.get(normed, None)
529 529 if folded is None:
530 530 if isknown:
531 531 folded = path
532 532 else:
533 533 # store discovered result in dirfoldmap so that future
534 534 # normalizefile calls don't start matching directories
535 535 folded = self._discoverpath(path, normed, ignoremissing, exists,
536 536 self._map.dirfoldmap)
537 537 return folded
538 538
539 539 def normalize(self, path, isknown=False, ignoremissing=False):
540 540 '''
541 541 normalize the case of a pathname when on a casefolding filesystem
542 542
543 543 isknown specifies whether the filename came from walking the
544 544 disk, to avoid extra filesystem access.
545 545
546 546 If ignoremissing is True, missing path are returned
547 547 unchanged. Otherwise, we try harder to normalize possibly
548 548 existing path components.
549 549
550 550 The normalized case is determined based on the following precedence:
551 551
552 552 - version of name already stored in the dirstate
553 553 - version of name stored on disk
554 554 - version provided via command arguments
555 555 '''
556 556
557 557 if self._checkcase:
558 558 return self._normalize(path, isknown, ignoremissing)
559 559 return path
560 560
561 561 def clear(self):
562 562 self._map.clear()
563 563 self._lastnormaltime = 0
564 564 self._updatedfiles.clear()
565 565 self._dirty = True
566 566
567 567 def rebuild(self, parent, allfiles, changedfiles=None):
568 568 if changedfiles is None:
569 569 # Rebuild entire dirstate
570 570 changedfiles = allfiles
571 571 lastnormaltime = self._lastnormaltime
572 572 self.clear()
573 573 self._lastnormaltime = lastnormaltime
574 574
575 575 if self._origpl is None:
576 576 self._origpl = self._pl
577 577 self._map.setparents(parent, nullid)
578 578 for f in changedfiles:
579 579 if f in allfiles:
580 580 self.normallookup(f)
581 581 else:
582 582 self.drop(f)
583 583
584 584 self._dirty = True
585 585
586 586 def identity(self):
587 587 '''Return identity of dirstate itself to detect changing in storage
588 588
589 589 If identity of previous dirstate is equal to this, writing
590 590 changes based on the former dirstate out can keep consistency.
591 591 '''
592 592 return self._map.identity
593 593
594 594 def write(self, tr):
595 595 if not self._dirty:
596 596 return
597 597
598 598 filename = self._filename
599 599 if tr:
600 600 # 'dirstate.write()' is not only for writing in-memory
601 601 # changes out, but also for dropping ambiguous timestamp.
602 602 # delayed writing re-raise "ambiguous timestamp issue".
603 603 # See also the wiki page below for detail:
604 604 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
605 605
606 606 # emulate dropping timestamp in 'parsers.pack_dirstate'
607 607 now = _getfsnow(self._opener)
608 608 self._map.clearambiguoustimes(self._updatedfiles, now)
609 609
610 610 # emulate that all 'dirstate.normal' results are written out
611 611 self._lastnormaltime = 0
612 612 self._updatedfiles.clear()
613 613
614 614 # delay writing in-memory changes out
615 615 tr.addfilegenerator('dirstate', (self._filename,),
616 616 self._writedirstate, location='plain')
617 617 return
618 618
619 619 st = self._opener(filename, "w", atomictemp=True, checkambig=True)
620 620 self._writedirstate(st)
621 621
622 622 def addparentchangecallback(self, category, callback):
623 623 """add a callback to be called when the wd parents are changed
624 624
625 625 Callback will be called with the following arguments:
626 626 dirstate, (oldp1, oldp2), (newp1, newp2)
627 627
628 628 Category is a unique identifier to allow overwriting an old callback
629 629 with a newer callback.
630 630 """
631 631 self._plchangecallbacks[category] = callback
632 632
633 633 def _writedirstate(self, st):
634 634 # notify callbacks about parents change
635 635 if self._origpl is not None and self._origpl != self._pl:
636 636 for c, callback in sorted(self._plchangecallbacks.iteritems()):
637 637 callback(self, self._origpl, self._pl)
638 638 self._origpl = None
639 639 # use the modification time of the newly created temporary file as the
640 640 # filesystem's notion of 'now'
641 641 now = util.fstat(st)[stat.ST_MTIME] & _rangemask
642 642
643 643 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
644 644 # timestamp of each entries in dirstate, because of 'now > mtime'
645 645 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite')
646 646 if delaywrite > 0:
647 647 # do we have any files to delay for?
648 648 for f, e in self._map.iteritems():
649 649 if e[0] == 'n' and e[3] == now:
650 650 import time # to avoid useless import
651 651 # rather than sleep n seconds, sleep until the next
652 652 # multiple of n seconds
653 653 clock = time.time()
654 654 start = int(clock) - (int(clock) % delaywrite)
655 655 end = start + delaywrite
656 656 time.sleep(end - clock)
657 657 now = end # trust our estimate that the end is near now
658 658 break
659 659
660 660 self._map.write(st, now)
661 661 self._lastnormaltime = 0
662 662 self._dirty = False
663 663
664 664 def _dirignore(self, f):
665 665 if f == '.':
666 666 return False
667 667 if self._ignore(f):
668 668 return True
669 669 for p in util.finddirs(f):
670 670 if self._ignore(p):
671 671 return True
672 672 return False
673 673
674 674 def _ignorefiles(self):
675 675 files = []
676 676 if os.path.exists(self._join('.hgignore')):
677 677 files.append(self._join('.hgignore'))
678 678 for name, path in self._ui.configitems("ui"):
679 679 if name == 'ignore' or name.startswith('ignore.'):
680 680 # we need to use os.path.join here rather than self._join
681 681 # because path is arbitrary and user-specified
682 682 files.append(os.path.join(self._rootdir, util.expandpath(path)))
683 683 return files
684 684
685 685 def _ignorefileandline(self, f):
686 686 files = collections.deque(self._ignorefiles())
687 687 visited = set()
688 688 while files:
689 689 i = files.popleft()
690 690 patterns = matchmod.readpatternfile(i, self._ui.warn,
691 691 sourceinfo=True)
692 692 for pattern, lineno, line in patterns:
693 693 kind, p = matchmod._patsplit(pattern, 'glob')
694 694 if kind == "subinclude":
695 695 if p not in visited:
696 696 files.append(p)
697 697 continue
698 698 m = matchmod.match(self._root, '', [], [pattern],
699 699 warn=self._ui.warn)
700 700 if m(f):
701 701 return (i, lineno, line)
702 702 visited.add(i)
703 703 return (None, -1, "")
704 704
705 705 def _walkexplicit(self, match, subrepos):
706 706 '''Get stat data about the files explicitly specified by match.
707 707
708 708 Return a triple (results, dirsfound, dirsnotfound).
709 709 - results is a mapping from filename to stat result. It also contains
710 710 listings mapping subrepos and .hg to None.
711 711 - dirsfound is a list of files found to be directories.
712 712 - dirsnotfound is a list of files that the dirstate thinks are
713 713 directories and that were not found.'''
714 714
715 715 def badtype(mode):
716 716 kind = _('unknown')
717 717 if stat.S_ISCHR(mode):
718 718 kind = _('character device')
719 719 elif stat.S_ISBLK(mode):
720 720 kind = _('block device')
721 721 elif stat.S_ISFIFO(mode):
722 722 kind = _('fifo')
723 723 elif stat.S_ISSOCK(mode):
724 724 kind = _('socket')
725 725 elif stat.S_ISDIR(mode):
726 726 kind = _('directory')
727 727 return _('unsupported file type (type is %s)') % kind
728 728
729 729 matchedir = match.explicitdir
730 730 badfn = match.bad
731 731 dmap = self._map
732 732 lstat = os.lstat
733 733 getkind = stat.S_IFMT
734 734 dirkind = stat.S_IFDIR
735 735 regkind = stat.S_IFREG
736 736 lnkkind = stat.S_IFLNK
737 737 join = self._join
738 738 dirsfound = []
739 739 foundadd = dirsfound.append
740 740 dirsnotfound = []
741 741 notfoundadd = dirsnotfound.append
742 742
743 743 if not match.isexact() and self._checkcase:
744 744 normalize = self._normalize
745 745 else:
746 746 normalize = None
747 747
748 748 files = sorted(match.files())
749 749 subrepos.sort()
750 750 i, j = 0, 0
751 751 while i < len(files) and j < len(subrepos):
752 752 subpath = subrepos[j] + "/"
753 753 if files[i] < subpath:
754 754 i += 1
755 755 continue
756 756 while i < len(files) and files[i].startswith(subpath):
757 757 del files[i]
758 758 j += 1
759 759
760 if not files or '.' in files:
761 files = ['.']
760 if not files or '' in files:
761 files = ['']
762 762 # constructing the foldmap is expensive, so don't do it for the
763 # common case where files is ['.']
763 # common case where files is ['']
764 764 normalize = None
765 765 results = dict.fromkeys(subrepos)
766 766 results['.hg'] = None
767 767
768 768 for ff in files:
769 769 if normalize:
770 770 nf = normalize(ff, False, True)
771 771 else:
772 772 nf = ff
773 773 if nf in results:
774 774 continue
775 775
776 776 try:
777 777 st = lstat(join(nf))
778 778 kind = getkind(st.st_mode)
779 779 if kind == dirkind:
780 780 if nf in dmap:
781 781 # file replaced by dir on disk but still in dirstate
782 782 results[nf] = None
783 783 if matchedir:
784 784 matchedir(nf)
785 785 foundadd((nf, ff))
786 786 elif kind == regkind or kind == lnkkind:
787 787 results[nf] = st
788 788 else:
789 789 badfn(ff, badtype(kind))
790 790 if nf in dmap:
791 791 results[nf] = None
792 792 except OSError as inst: # nf not found on disk - it is dirstate only
793 793 if nf in dmap: # does it exactly match a missing file?
794 794 results[nf] = None
795 795 else: # does it match a missing directory?
796 796 if self._map.hasdir(nf):
797 797 if matchedir:
798 798 matchedir(nf)
799 799 notfoundadd(nf)
800 800 else:
801 801 badfn(ff, encoding.strtolocal(inst.strerror))
802 802
803 803 # match.files() may contain explicitly-specified paths that shouldn't
804 804 # be taken; drop them from the list of files found. dirsfound/notfound
805 805 # aren't filtered here because they will be tested later.
806 806 if match.anypats():
807 807 for f in list(results):
808 808 if f == '.hg' or f in subrepos:
809 809 # keep sentinel to disable further out-of-repo walks
810 810 continue
811 811 if not match(f):
812 812 del results[f]
813 813
814 814 # Case insensitive filesystems cannot rely on lstat() failing to detect
815 815 # a case-only rename. Prune the stat object for any file that does not
816 816 # match the case in the filesystem, if there are multiple files that
817 817 # normalize to the same path.
818 818 if match.isexact() and self._checkcase:
819 819 normed = {}
820 820
821 821 for f, st in results.iteritems():
822 822 if st is None:
823 823 continue
824 824
825 825 nc = util.normcase(f)
826 826 paths = normed.get(nc)
827 827
828 828 if paths is None:
829 829 paths = set()
830 830 normed[nc] = paths
831 831
832 832 paths.add(f)
833 833
834 834 for norm, paths in normed.iteritems():
835 835 if len(paths) > 1:
836 836 for path in paths:
837 837 folded = self._discoverpath(path, norm, True, None,
838 838 self._map.dirfoldmap)
839 839 if path != folded:
840 840 results[path] = None
841 841
842 842 return results, dirsfound, dirsnotfound
843 843
844 844 def walk(self, match, subrepos, unknown, ignored, full=True):
845 845 '''
846 846 Walk recursively through the directory tree, finding all files
847 847 matched by match.
848 848
849 849 If full is False, maybe skip some known-clean files.
850 850
851 851 Return a dict mapping filename to stat-like object (either
852 852 mercurial.osutil.stat instance or return value of os.stat()).
853 853
854 854 '''
855 855 # full is a flag that extensions that hook into walk can use -- this
856 856 # implementation doesn't use it at all. This satisfies the contract
857 857 # because we only guarantee a "maybe".
858 858
859 859 if ignored:
860 860 ignore = util.never
861 861 dirignore = util.never
862 862 elif unknown:
863 863 ignore = self._ignore
864 864 dirignore = self._dirignore
865 865 else:
866 866 # if not unknown and not ignored, drop dir recursion and step 2
867 867 ignore = util.always
868 868 dirignore = util.always
869 869
870 870 matchfn = match.matchfn
871 871 matchalways = match.always()
872 872 matchtdir = match.traversedir
873 873 dmap = self._map
874 874 listdir = util.listdir
875 875 lstat = os.lstat
876 876 dirkind = stat.S_IFDIR
877 877 regkind = stat.S_IFREG
878 878 lnkkind = stat.S_IFLNK
879 879 join = self._join
880 880
881 881 exact = skipstep3 = False
882 882 if match.isexact(): # match.exact
883 883 exact = True
884 884 dirignore = util.always # skip step 2
885 885 elif match.prefix(): # match.match, no patterns
886 886 skipstep3 = True
887 887
888 888 if not exact and self._checkcase:
889 889 normalize = self._normalize
890 890 normalizefile = self._normalizefile
891 891 skipstep3 = False
892 892 else:
893 893 normalize = self._normalize
894 894 normalizefile = None
895 895
896 896 # step 1: find all explicit files
897 897 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
898 898
899 899 skipstep3 = skipstep3 and not (work or dirsnotfound)
900 900 work = [d for d in work if not dirignore(d[0])]
901 901
902 902 # step 2: visit subdirectories
903 903 def traverse(work, alreadynormed):
904 904 wadd = work.append
905 905 while work:
906 906 nd = work.pop()
907 907 visitentries = match.visitchildrenset(nd)
908 908 if not visitentries:
909 909 continue
910 910 if visitentries == 'this' or visitentries == 'all':
911 911 visitentries = None
912 912 skip = None
913 if nd == '.':
914 nd = ''
915 else:
913 if nd != '':
916 914 skip = '.hg'
917 915 try:
918 916 entries = listdir(join(nd), stat=True, skip=skip)
919 917 except OSError as inst:
920 918 if inst.errno in (errno.EACCES, errno.ENOENT):
921 919 match.bad(self.pathto(nd),
922 920 encoding.strtolocal(inst.strerror))
923 921 continue
924 922 raise
925 923 for f, kind, st in entries:
926 924 # Some matchers may return files in the visitentries set,
927 925 # instead of 'this', if the matcher explicitly mentions them
928 926 # and is not an exactmatcher. This is acceptable; we do not
929 927 # make any hard assumptions about file-or-directory below
930 928 # based on the presence of `f` in visitentries. If
931 929 # visitchildrenset returned a set, we can always skip the
932 930 # entries *not* in the set it provided regardless of whether
933 931 # they're actually a file or a directory.
934 932 if visitentries and f not in visitentries:
935 933 continue
936 934 if normalizefile:
937 935 # even though f might be a directory, we're only
938 936 # interested in comparing it to files currently in the
939 937 # dmap -- therefore normalizefile is enough
940 938 nf = normalizefile(nd and (nd + "/" + f) or f, True,
941 939 True)
942 940 else:
943 941 nf = nd and (nd + "/" + f) or f
944 942 if nf not in results:
945 943 if kind == dirkind:
946 944 if not ignore(nf):
947 945 if matchtdir:
948 946 matchtdir(nf)
949 947 wadd(nf)
950 948 if nf in dmap and (matchalways or matchfn(nf)):
951 949 results[nf] = None
952 950 elif kind == regkind or kind == lnkkind:
953 951 if nf in dmap:
954 952 if matchalways or matchfn(nf):
955 953 results[nf] = st
956 954 elif ((matchalways or matchfn(nf))
957 955 and not ignore(nf)):
958 956 # unknown file -- normalize if necessary
959 957 if not alreadynormed:
960 958 nf = normalize(nf, False, True)
961 959 results[nf] = st
962 960 elif nf in dmap and (matchalways or matchfn(nf)):
963 961 results[nf] = None
964 962
965 963 for nd, d in work:
966 964 # alreadynormed means that processwork doesn't have to do any
967 965 # expensive directory normalization
968 966 alreadynormed = not normalize or nd == d
969 967 traverse([d], alreadynormed)
970 968
971 969 for s in subrepos:
972 970 del results[s]
973 971 del results['.hg']
974 972
975 973 # step 3: visit remaining files from dmap
976 974 if not skipstep3 and not exact:
977 975 # If a dmap file is not in results yet, it was either
978 976 # a) not matching matchfn b) ignored, c) missing, or d) under a
979 977 # symlink directory.
980 978 if not results and matchalways:
981 979 visit = [f for f in dmap]
982 980 else:
983 981 visit = [f for f in dmap if f not in results and matchfn(f)]
984 982 visit.sort()
985 983
986 984 if unknown:
987 985 # unknown == True means we walked all dirs under the roots
988 986 # that wasn't ignored, and everything that matched was stat'ed
989 987 # and is already in results.
990 988 # The rest must thus be ignored or under a symlink.
991 989 audit_path = pathutil.pathauditor(self._root, cached=True)
992 990
993 991 for nf in iter(visit):
994 992 # If a stat for the same file was already added with a
995 993 # different case, don't add one for this, since that would
996 994 # make it appear as if the file exists under both names
997 995 # on disk.
998 996 if (normalizefile and
999 997 normalizefile(nf, True, True) in results):
1000 998 results[nf] = None
1001 999 # Report ignored items in the dmap as long as they are not
1002 1000 # under a symlink directory.
1003 1001 elif audit_path.check(nf):
1004 1002 try:
1005 1003 results[nf] = lstat(join(nf))
1006 1004 # file was just ignored, no links, and exists
1007 1005 except OSError:
1008 1006 # file doesn't exist
1009 1007 results[nf] = None
1010 1008 else:
1011 1009 # It's either missing or under a symlink directory
1012 1010 # which we in this case report as missing
1013 1011 results[nf] = None
1014 1012 else:
1015 1013 # We may not have walked the full directory tree above,
1016 1014 # so stat and check everything we missed.
1017 1015 iv = iter(visit)
1018 1016 for st in util.statfiles([join(i) for i in visit]):
1019 1017 results[next(iv)] = st
1020 1018 return results
1021 1019
1022 1020 def status(self, match, subrepos, ignored, clean, unknown):
1023 1021 '''Determine the status of the working copy relative to the
1024 1022 dirstate and return a pair of (unsure, status), where status is of type
1025 1023 scmutil.status and:
1026 1024
1027 1025 unsure:
1028 1026 files that might have been modified since the dirstate was
1029 1027 written, but need to be read to be sure (size is the same
1030 1028 but mtime differs)
1031 1029 status.modified:
1032 1030 files that have definitely been modified since the dirstate
1033 1031 was written (different size or mode)
1034 1032 status.clean:
1035 1033 files that have definitely not been modified since the
1036 1034 dirstate was written
1037 1035 '''
1038 1036 listignored, listclean, listunknown = ignored, clean, unknown
1039 1037 lookup, modified, added, unknown, ignored = [], [], [], [], []
1040 1038 removed, deleted, clean = [], [], []
1041 1039
1042 1040 dmap = self._map
1043 1041 dmap.preload()
1044 1042 dcontains = dmap.__contains__
1045 1043 dget = dmap.__getitem__
1046 1044 ladd = lookup.append # aka "unsure"
1047 1045 madd = modified.append
1048 1046 aadd = added.append
1049 1047 uadd = unknown.append
1050 1048 iadd = ignored.append
1051 1049 radd = removed.append
1052 1050 dadd = deleted.append
1053 1051 cadd = clean.append
1054 1052 mexact = match.exact
1055 1053 dirignore = self._dirignore
1056 1054 checkexec = self._checkexec
1057 1055 copymap = self._map.copymap
1058 1056 lastnormaltime = self._lastnormaltime
1059 1057
1060 1058 # We need to do full walks when either
1061 1059 # - we're listing all clean files, or
1062 1060 # - match.traversedir does something, because match.traversedir should
1063 1061 # be called for every dir in the working dir
1064 1062 full = listclean or match.traversedir is not None
1065 1063 for fn, st in self.walk(match, subrepos, listunknown, listignored,
1066 1064 full=full).iteritems():
1067 1065 if not dcontains(fn):
1068 1066 if (listignored or mexact(fn)) and dirignore(fn):
1069 1067 if listignored:
1070 1068 iadd(fn)
1071 1069 else:
1072 1070 uadd(fn)
1073 1071 continue
1074 1072
1075 1073 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
1076 1074 # written like that for performance reasons. dmap[fn] is not a
1077 1075 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
1078 1076 # opcode has fast paths when the value to be unpacked is a tuple or
1079 1077 # a list, but falls back to creating a full-fledged iterator in
1080 1078 # general. That is much slower than simply accessing and storing the
1081 1079 # tuple members one by one.
1082 1080 t = dget(fn)
1083 1081 state = t[0]
1084 1082 mode = t[1]
1085 1083 size = t[2]
1086 1084 time = t[3]
1087 1085
1088 1086 if not st and state in "nma":
1089 1087 dadd(fn)
1090 1088 elif state == 'n':
1091 1089 if (size >= 0 and
1092 1090 ((size != st.st_size and size != st.st_size & _rangemask)
1093 1091 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1094 1092 or size == -2 # other parent
1095 1093 or fn in copymap):
1096 1094 madd(fn)
1097 1095 elif (time != st[stat.ST_MTIME]
1098 1096 and time != st[stat.ST_MTIME] & _rangemask):
1099 1097 ladd(fn)
1100 1098 elif st[stat.ST_MTIME] == lastnormaltime:
1101 1099 # fn may have just been marked as normal and it may have
1102 1100 # changed in the same second without changing its size.
1103 1101 # This can happen if we quickly do multiple commits.
1104 1102 # Force lookup, so we don't miss such a racy file change.
1105 1103 ladd(fn)
1106 1104 elif listclean:
1107 1105 cadd(fn)
1108 1106 elif state == 'm':
1109 1107 madd(fn)
1110 1108 elif state == 'a':
1111 1109 aadd(fn)
1112 1110 elif state == 'r':
1113 1111 radd(fn)
1114 1112
1115 1113 return (lookup, scmutil.status(modified, added, removed, deleted,
1116 1114 unknown, ignored, clean))
1117 1115
1118 1116 def matches(self, match):
1119 1117 '''
1120 1118 return files in the dirstate (in whatever state) filtered by match
1121 1119 '''
1122 1120 dmap = self._map
1123 1121 if match.always():
1124 1122 return dmap.keys()
1125 1123 files = match.files()
1126 1124 if match.isexact():
1127 1125 # fast path -- filter the other way around, since typically files is
1128 1126 # much smaller than dmap
1129 1127 return [f for f in files if f in dmap]
1130 1128 if match.prefix() and all(fn in dmap for fn in files):
1131 1129 # fast path -- all the values are known to be files, so just return
1132 1130 # that
1133 1131 return list(files)
1134 1132 return [f for f in dmap if match(f)]
1135 1133
1136 1134 def _actualfilename(self, tr):
1137 1135 if tr:
1138 1136 return self._pendingfilename
1139 1137 else:
1140 1138 return self._filename
1141 1139
1142 1140 def savebackup(self, tr, backupname):
1143 1141 '''Save current dirstate into backup file'''
1144 1142 filename = self._actualfilename(tr)
1145 1143 assert backupname != filename
1146 1144
1147 1145 # use '_writedirstate' instead of 'write' to write changes certainly,
1148 1146 # because the latter omits writing out if transaction is running.
1149 1147 # output file will be used to create backup of dirstate at this point.
1150 1148 if self._dirty or not self._opener.exists(filename):
1151 1149 self._writedirstate(self._opener(filename, "w", atomictemp=True,
1152 1150 checkambig=True))
1153 1151
1154 1152 if tr:
1155 1153 # ensure that subsequent tr.writepending returns True for
1156 1154 # changes written out above, even if dirstate is never
1157 1155 # changed after this
1158 1156 tr.addfilegenerator('dirstate', (self._filename,),
1159 1157 self._writedirstate, location='plain')
1160 1158
1161 1159 # ensure that pending file written above is unlinked at
1162 1160 # failure, even if tr.writepending isn't invoked until the
1163 1161 # end of this transaction
1164 1162 tr.registertmp(filename, location='plain')
1165 1163
1166 1164 self._opener.tryunlink(backupname)
1167 1165 # hardlink backup is okay because _writedirstate is always called
1168 1166 # with an "atomictemp=True" file.
1169 1167 util.copyfile(self._opener.join(filename),
1170 1168 self._opener.join(backupname), hardlink=True)
1171 1169
1172 1170 def restorebackup(self, tr, backupname):
1173 1171 '''Restore dirstate by backup file'''
1174 1172 # this "invalidate()" prevents "wlock.release()" from writing
1175 1173 # changes of dirstate out after restoring from backup file
1176 1174 self.invalidate()
1177 1175 filename = self._actualfilename(tr)
1178 1176 o = self._opener
1179 1177 if util.samefile(o.join(backupname), o.join(filename)):
1180 1178 o.unlink(backupname)
1181 1179 else:
1182 1180 o.rename(backupname, filename, checkambig=True)
1183 1181
1184 1182 def clearbackup(self, tr, backupname):
1185 1183 '''Clear backup file'''
1186 1184 self._opener.unlink(backupname)
1187 1185
1188 1186 class dirstatemap(object):
1189 1187 """Map encapsulating the dirstate's contents.
1190 1188
1191 1189 The dirstate contains the following state:
1192 1190
1193 1191 - `identity` is the identity of the dirstate file, which can be used to
1194 1192 detect when changes have occurred to the dirstate file.
1195 1193
1196 1194 - `parents` is a pair containing the parents of the working copy. The
1197 1195 parents are updated by calling `setparents`.
1198 1196
1199 1197 - the state map maps filenames to tuples of (state, mode, size, mtime),
1200 1198 where state is a single character representing 'normal', 'added',
1201 1199 'removed', or 'merged'. It is read by treating the dirstate as a
1202 1200 dict. File state is updated by calling the `addfile`, `removefile` and
1203 1201 `dropfile` methods.
1204 1202
1205 1203 - `copymap` maps destination filenames to their source filename.
1206 1204
1207 1205 The dirstate also provides the following views onto the state:
1208 1206
1209 1207 - `nonnormalset` is a set of the filenames that have state other
1210 1208 than 'normal', or are normal but have an mtime of -1 ('normallookup').
1211 1209
1212 1210 - `otherparentset` is a set of the filenames that are marked as coming
1213 1211 from the second parent when the dirstate is currently being merged.
1214 1212
1215 1213 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
1216 1214 form that they appear as in the dirstate.
1217 1215
1218 1216 - `dirfoldmap` is a dict mapping normalized directory names to the
1219 1217 denormalized form that they appear as in the dirstate.
1220 1218 """
1221 1219
1222 1220 def __init__(self, ui, opener, root):
1223 1221 self._ui = ui
1224 1222 self._opener = opener
1225 1223 self._root = root
1226 1224 self._filename = 'dirstate'
1227 1225
1228 1226 self._parents = None
1229 1227 self._dirtyparents = False
1230 1228
1231 1229 # for consistent view between _pl() and _read() invocations
1232 1230 self._pendingmode = None
1233 1231
1234 1232 @propertycache
1235 1233 def _map(self):
1236 1234 self._map = {}
1237 1235 self.read()
1238 1236 return self._map
1239 1237
1240 1238 @propertycache
1241 1239 def copymap(self):
1242 1240 self.copymap = {}
1243 1241 self._map
1244 1242 return self.copymap
1245 1243
1246 1244 def clear(self):
1247 1245 self._map.clear()
1248 1246 self.copymap.clear()
1249 1247 self.setparents(nullid, nullid)
1250 1248 util.clearcachedproperty(self, "_dirs")
1251 1249 util.clearcachedproperty(self, "_alldirs")
1252 1250 util.clearcachedproperty(self, "filefoldmap")
1253 1251 util.clearcachedproperty(self, "dirfoldmap")
1254 1252 util.clearcachedproperty(self, "nonnormalset")
1255 1253 util.clearcachedproperty(self, "otherparentset")
1256 1254
1257 1255 def items(self):
1258 1256 return self._map.iteritems()
1259 1257
1260 1258 # forward for python2,3 compat
1261 1259 iteritems = items
1262 1260
1263 1261 def __len__(self):
1264 1262 return len(self._map)
1265 1263
1266 1264 def __iter__(self):
1267 1265 return iter(self._map)
1268 1266
1269 1267 def get(self, key, default=None):
1270 1268 return self._map.get(key, default)
1271 1269
1272 1270 def __contains__(self, key):
1273 1271 return key in self._map
1274 1272
1275 1273 def __getitem__(self, key):
1276 1274 return self._map[key]
1277 1275
1278 1276 def keys(self):
1279 1277 return self._map.keys()
1280 1278
1281 1279 def preload(self):
1282 1280 """Loads the underlying data, if it's not already loaded"""
1283 1281 self._map
1284 1282
1285 1283 def addfile(self, f, oldstate, state, mode, size, mtime):
1286 1284 """Add a tracked file to the dirstate."""
1287 1285 if oldstate in "?r" and r"_dirs" in self.__dict__:
1288 1286 self._dirs.addpath(f)
1289 1287 if oldstate == "?" and r"_alldirs" in self.__dict__:
1290 1288 self._alldirs.addpath(f)
1291 1289 self._map[f] = dirstatetuple(state, mode, size, mtime)
1292 1290 if state != 'n' or mtime == -1:
1293 1291 self.nonnormalset.add(f)
1294 1292 if size == -2:
1295 1293 self.otherparentset.add(f)
1296 1294
1297 1295 def removefile(self, f, oldstate, size):
1298 1296 """
1299 1297 Mark a file as removed in the dirstate.
1300 1298
1301 1299 The `size` parameter is used to store sentinel values that indicate
1302 1300 the file's previous state. In the future, we should refactor this
1303 1301 to be more explicit about what that state is.
1304 1302 """
1305 1303 if oldstate not in "?r" and r"_dirs" in self.__dict__:
1306 1304 self._dirs.delpath(f)
1307 1305 if oldstate == "?" and r"_alldirs" in self.__dict__:
1308 1306 self._alldirs.addpath(f)
1309 1307 if r"filefoldmap" in self.__dict__:
1310 1308 normed = util.normcase(f)
1311 1309 self.filefoldmap.pop(normed, None)
1312 1310 self._map[f] = dirstatetuple('r', 0, size, 0)
1313 1311 self.nonnormalset.add(f)
1314 1312
1315 1313 def dropfile(self, f, oldstate):
1316 1314 """
1317 1315 Remove a file from the dirstate. Returns True if the file was
1318 1316 previously recorded.
1319 1317 """
1320 1318 exists = self._map.pop(f, None) is not None
1321 1319 if exists:
1322 1320 if oldstate != "r" and r"_dirs" in self.__dict__:
1323 1321 self._dirs.delpath(f)
1324 1322 if r"_alldirs" in self.__dict__:
1325 1323 self._alldirs.delpath(f)
1326 1324 if r"filefoldmap" in self.__dict__:
1327 1325 normed = util.normcase(f)
1328 1326 self.filefoldmap.pop(normed, None)
1329 1327 self.nonnormalset.discard(f)
1330 1328 return exists
1331 1329
1332 1330 def clearambiguoustimes(self, files, now):
1333 1331 for f in files:
1334 1332 e = self.get(f)
1335 1333 if e is not None and e[0] == 'n' and e[3] == now:
1336 1334 self._map[f] = dirstatetuple(e[0], e[1], e[2], -1)
1337 1335 self.nonnormalset.add(f)
1338 1336
1339 1337 def nonnormalentries(self):
1340 1338 '''Compute the nonnormal dirstate entries from the dmap'''
1341 1339 try:
1342 1340 return parsers.nonnormalotherparententries(self._map)
1343 1341 except AttributeError:
1344 1342 nonnorm = set()
1345 1343 otherparent = set()
1346 1344 for fname, e in self._map.iteritems():
1347 1345 if e[0] != 'n' or e[3] == -1:
1348 1346 nonnorm.add(fname)
1349 1347 if e[0] == 'n' and e[2] == -2:
1350 1348 otherparent.add(fname)
1351 1349 return nonnorm, otherparent
1352 1350
1353 1351 @propertycache
1354 1352 def filefoldmap(self):
1355 1353 """Returns a dictionary mapping normalized case paths to their
1356 1354 non-normalized versions.
1357 1355 """
1358 1356 try:
1359 1357 makefilefoldmap = parsers.make_file_foldmap
1360 1358 except AttributeError:
1361 1359 pass
1362 1360 else:
1363 1361 return makefilefoldmap(self._map, util.normcasespec,
1364 1362 util.normcasefallback)
1365 1363
1366 1364 f = {}
1367 1365 normcase = util.normcase
1368 1366 for name, s in self._map.iteritems():
1369 1367 if s[0] != 'r':
1370 1368 f[normcase(name)] = name
1371 1369 f['.'] = '.' # prevents useless util.fspath() invocation
1372 1370 return f
1373 1371
1374 1372 def hastrackeddir(self, d):
1375 1373 """
1376 1374 Returns True if the dirstate contains a tracked (not removed) file
1377 1375 in this directory.
1378 1376 """
1379 1377 return d in self._dirs
1380 1378
1381 1379 def hasdir(self, d):
1382 1380 """
1383 1381 Returns True if the dirstate contains a file (tracked or removed)
1384 1382 in this directory.
1385 1383 """
1386 1384 return d in self._alldirs
1387 1385
1388 1386 @propertycache
1389 1387 def _dirs(self):
1390 1388 return util.dirs(self._map, 'r')
1391 1389
1392 1390 @propertycache
1393 1391 def _alldirs(self):
1394 1392 return util.dirs(self._map)
1395 1393
1396 1394 def _opendirstatefile(self):
1397 1395 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
1398 1396 if self._pendingmode is not None and self._pendingmode != mode:
1399 1397 fp.close()
1400 1398 raise error.Abort(_('working directory state may be '
1401 1399 'changed parallelly'))
1402 1400 self._pendingmode = mode
1403 1401 return fp
1404 1402
1405 1403 def parents(self):
1406 1404 if not self._parents:
1407 1405 try:
1408 1406 fp = self._opendirstatefile()
1409 1407 st = fp.read(40)
1410 1408 fp.close()
1411 1409 except IOError as err:
1412 1410 if err.errno != errno.ENOENT:
1413 1411 raise
1414 1412 # File doesn't exist, so the current state is empty
1415 1413 st = ''
1416 1414
1417 1415 l = len(st)
1418 1416 if l == 40:
1419 1417 self._parents = (st[:20], st[20:40])
1420 1418 elif l == 0:
1421 1419 self._parents = (nullid, nullid)
1422 1420 else:
1423 1421 raise error.Abort(_('working directory state appears '
1424 1422 'damaged!'))
1425 1423
1426 1424 return self._parents
1427 1425
1428 1426 def setparents(self, p1, p2):
1429 1427 self._parents = (p1, p2)
1430 1428 self._dirtyparents = True
1431 1429
1432 1430 def read(self):
1433 1431 # ignore HG_PENDING because identity is used only for writing
1434 1432 self.identity = util.filestat.frompath(
1435 1433 self._opener.join(self._filename))
1436 1434
1437 1435 try:
1438 1436 fp = self._opendirstatefile()
1439 1437 try:
1440 1438 st = fp.read()
1441 1439 finally:
1442 1440 fp.close()
1443 1441 except IOError as err:
1444 1442 if err.errno != errno.ENOENT:
1445 1443 raise
1446 1444 return
1447 1445 if not st:
1448 1446 return
1449 1447
1450 1448 if util.safehasattr(parsers, 'dict_new_presized'):
1451 1449 # Make an estimate of the number of files in the dirstate based on
1452 1450 # its size. From a linear regression on a set of real-world repos,
1453 1451 # all over 10,000 files, the size of a dirstate entry is 85
1454 1452 # bytes. The cost of resizing is significantly higher than the cost
1455 1453 # of filling in a larger presized dict, so subtract 20% from the
1456 1454 # size.
1457 1455 #
1458 1456 # This heuristic is imperfect in many ways, so in a future dirstate
1459 1457 # format update it makes sense to just record the number of entries
1460 1458 # on write.
1461 1459 self._map = parsers.dict_new_presized(len(st) // 71)
1462 1460
1463 1461 # Python's garbage collector triggers a GC each time a certain number
1464 1462 # of container objects (the number being defined by
1465 1463 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
1466 1464 # for each file in the dirstate. The C version then immediately marks
1467 1465 # them as not to be tracked by the collector. However, this has no
1468 1466 # effect on when GCs are triggered, only on what objects the GC looks
1469 1467 # into. This means that O(number of files) GCs are unavoidable.
1470 1468 # Depending on when in the process's lifetime the dirstate is parsed,
1471 1469 # this can get very expensive. As a workaround, disable GC while
1472 1470 # parsing the dirstate.
1473 1471 #
1474 1472 # (we cannot decorate the function directly since it is in a C module)
1475 1473 if rustext is not None:
1476 1474 parse_dirstate = rustext.dirstate.parse_dirstate
1477 1475 else:
1478 1476 parse_dirstate = parsers.parse_dirstate
1479 1477
1480 1478 parse_dirstate = util.nogc(parse_dirstate)
1481 1479 p = parse_dirstate(self._map, self.copymap, st)
1482 1480 if not self._dirtyparents:
1483 1481 self.setparents(*p)
1484 1482
1485 1483 # Avoid excess attribute lookups by fast pathing certain checks
1486 1484 self.__contains__ = self._map.__contains__
1487 1485 self.__getitem__ = self._map.__getitem__
1488 1486 self.get = self._map.get
1489 1487
1490 1488 def write(self, st, now):
1491 1489 if rustext is not None:
1492 1490 pack_dirstate = rustext.dirstate.pack_dirstate
1493 1491 else:
1494 1492 pack_dirstate = parsers.pack_dirstate
1495 1493
1496 1494 st.write(pack_dirstate(self._map, self.copymap,
1497 1495 self.parents(), now))
1498 1496 st.close()
1499 1497 self._dirtyparents = False
1500 1498 self.nonnormalset, self.otherparentset = self.nonnormalentries()
1501 1499
1502 1500 @propertycache
1503 1501 def nonnormalset(self):
1504 1502 nonnorm, otherparents = self.nonnormalentries()
1505 1503 self.otherparentset = otherparents
1506 1504 return nonnorm
1507 1505
1508 1506 @propertycache
1509 1507 def otherparentset(self):
1510 1508 nonnorm, otherparents = self.nonnormalentries()
1511 1509 self.nonnormalset = nonnorm
1512 1510 return otherparents
1513 1511
1514 1512 @propertycache
1515 1513 def identity(self):
1516 1514 self._map
1517 1515 return self.identity
1518 1516
1519 1517 @propertycache
1520 1518 def dirfoldmap(self):
1521 1519 f = {}
1522 1520 normcase = util.normcase
1523 1521 for name in self._dirs:
1524 1522 f[normcase(name)] = name
1525 1523 return f
@@ -1,2055 +1,2055 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from . import (
23 23 error,
24 24 mdiff,
25 25 policy,
26 26 pycompat,
27 27 repository,
28 28 revlog,
29 29 util,
30 30 )
31 31 from .utils import (
32 32 interfaceutil,
33 33 )
34 34
35 35 parsers = policy.importmod(r'parsers')
36 36 propertycache = util.propertycache
37 37
38 38 def _parse(data):
39 39 # This method does a little bit of excessive-looking
40 40 # precondition checking. This is so that the behavior of this
41 41 # class exactly matches its C counterpart to try and help
42 42 # prevent surprise breakage for anyone that develops against
43 43 # the pure version.
44 44 if data and data[-1:] != '\n':
45 45 raise ValueError('Manifest did not end in a newline.')
46 46 prev = None
47 47 for l in data.splitlines():
48 48 if prev is not None and prev > l:
49 49 raise ValueError('Manifest lines not in sorted order.')
50 50 prev = l
51 51 f, n = l.split('\0')
52 52 if len(n) > 40:
53 53 yield f, bin(n[:40]), n[40:]
54 54 else:
55 55 yield f, bin(n), ''
56 56
57 57 def _text(it):
58 58 files = []
59 59 lines = []
60 60 for f, n, fl in it:
61 61 files.append(f)
62 62 # if this is changed to support newlines in filenames,
63 63 # be sure to check the templates/ dir again (especially *-raw.tmpl)
64 64 lines.append("%s\0%s%s\n" % (f, hex(n), fl))
65 65
66 66 _checkforbidden(files)
67 67 return ''.join(lines)
68 68
69 69 class lazymanifestiter(object):
70 70 def __init__(self, lm):
71 71 self.pos = 0
72 72 self.lm = lm
73 73
74 74 def __iter__(self):
75 75 return self
76 76
77 77 def next(self):
78 78 try:
79 79 data, pos = self.lm._get(self.pos)
80 80 except IndexError:
81 81 raise StopIteration
82 82 if pos == -1:
83 83 self.pos += 1
84 84 return data[0]
85 85 self.pos += 1
86 86 zeropos = data.find('\x00', pos)
87 87 return data[pos:zeropos]
88 88
89 89 __next__ = next
90 90
91 91 class lazymanifestiterentries(object):
92 92 def __init__(self, lm):
93 93 self.lm = lm
94 94 self.pos = 0
95 95
96 96 def __iter__(self):
97 97 return self
98 98
99 99 def next(self):
100 100 try:
101 101 data, pos = self.lm._get(self.pos)
102 102 except IndexError:
103 103 raise StopIteration
104 104 if pos == -1:
105 105 self.pos += 1
106 106 return data
107 107 zeropos = data.find('\x00', pos)
108 108 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
109 109 zeropos + 1, 40)
110 110 flags = self.lm._getflags(data, self.pos, zeropos)
111 111 self.pos += 1
112 112 return (data[pos:zeropos], hashval, flags)
113 113
114 114 __next__ = next
115 115
116 116 def unhexlify(data, extra, pos, length):
117 117 s = bin(data[pos:pos + length])
118 118 if extra:
119 119 s += chr(extra & 0xff)
120 120 return s
121 121
122 122 def _cmp(a, b):
123 123 return (a > b) - (a < b)
124 124
125 125 class _lazymanifest(object):
126 126 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
127 127 if positions is None:
128 128 self.positions = self.findlines(data)
129 129 self.extrainfo = [0] * len(self.positions)
130 130 self.data = data
131 131 self.extradata = []
132 132 else:
133 133 self.positions = positions[:]
134 134 self.extrainfo = extrainfo[:]
135 135 self.extradata = extradata[:]
136 136 self.data = data
137 137
138 138 def findlines(self, data):
139 139 if not data:
140 140 return []
141 141 pos = data.find("\n")
142 142 if pos == -1 or data[-1:] != '\n':
143 143 raise ValueError("Manifest did not end in a newline.")
144 144 positions = [0]
145 145 prev = data[:data.find('\x00')]
146 146 while pos < len(data) - 1 and pos != -1:
147 147 positions.append(pos + 1)
148 148 nexts = data[pos + 1:data.find('\x00', pos + 1)]
149 149 if nexts < prev:
150 150 raise ValueError("Manifest lines not in sorted order.")
151 151 prev = nexts
152 152 pos = data.find("\n", pos + 1)
153 153 return positions
154 154
155 155 def _get(self, index):
156 156 # get the position encoded in pos:
157 157 # positive number is an index in 'data'
158 158 # negative number is in extrapieces
159 159 pos = self.positions[index]
160 160 if pos >= 0:
161 161 return self.data, pos
162 162 return self.extradata[-pos - 1], -1
163 163
164 164 def _getkey(self, pos):
165 165 if pos >= 0:
166 166 return self.data[pos:self.data.find('\x00', pos + 1)]
167 167 return self.extradata[-pos - 1][0]
168 168
169 169 def bsearch(self, key):
170 170 first = 0
171 171 last = len(self.positions) - 1
172 172
173 173 while first <= last:
174 174 midpoint = (first + last)//2
175 175 nextpos = self.positions[midpoint]
176 176 candidate = self._getkey(nextpos)
177 177 r = _cmp(key, candidate)
178 178 if r == 0:
179 179 return midpoint
180 180 else:
181 181 if r < 0:
182 182 last = midpoint - 1
183 183 else:
184 184 first = midpoint + 1
185 185 return -1
186 186
187 187 def bsearch2(self, key):
188 188 # same as the above, but will always return the position
189 189 # done for performance reasons
190 190 first = 0
191 191 last = len(self.positions) - 1
192 192
193 193 while first <= last:
194 194 midpoint = (first + last)//2
195 195 nextpos = self.positions[midpoint]
196 196 candidate = self._getkey(nextpos)
197 197 r = _cmp(key, candidate)
198 198 if r == 0:
199 199 return (midpoint, True)
200 200 else:
201 201 if r < 0:
202 202 last = midpoint - 1
203 203 else:
204 204 first = midpoint + 1
205 205 return (first, False)
206 206
207 207 def __contains__(self, key):
208 208 return self.bsearch(key) != -1
209 209
210 210 def _getflags(self, data, needle, pos):
211 211 start = pos + 41
212 212 end = data.find("\n", start)
213 213 if end == -1:
214 214 end = len(data) - 1
215 215 if start == end:
216 216 return ''
217 217 return self.data[start:end]
218 218
219 219 def __getitem__(self, key):
220 220 if not isinstance(key, bytes):
221 221 raise TypeError("getitem: manifest keys must be a bytes.")
222 222 needle = self.bsearch(key)
223 223 if needle == -1:
224 224 raise KeyError
225 225 data, pos = self._get(needle)
226 226 if pos == -1:
227 227 return (data[1], data[2])
228 228 zeropos = data.find('\x00', pos)
229 229 assert 0 <= needle <= len(self.positions)
230 230 assert len(self.extrainfo) == len(self.positions)
231 231 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
232 232 flags = self._getflags(data, needle, zeropos)
233 233 return (hashval, flags)
234 234
235 235 def __delitem__(self, key):
236 236 needle, found = self.bsearch2(key)
237 237 if not found:
238 238 raise KeyError
239 239 cur = self.positions[needle]
240 240 self.positions = self.positions[:needle] + self.positions[needle + 1:]
241 241 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
242 242 if cur >= 0:
243 243 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
244 244
245 245 def __setitem__(self, key, value):
246 246 if not isinstance(key, bytes):
247 247 raise TypeError("setitem: manifest keys must be a byte string.")
248 248 if not isinstance(value, tuple) or len(value) != 2:
249 249 raise TypeError("Manifest values must be a tuple of (node, flags).")
250 250 hashval = value[0]
251 251 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
252 252 raise TypeError("node must be a 20-byte byte string")
253 253 flags = value[1]
254 254 if len(hashval) == 22:
255 255 hashval = hashval[:-1]
256 256 if not isinstance(flags, bytes) or len(flags) > 1:
257 257 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
258 258 needle, found = self.bsearch2(key)
259 259 if found:
260 260 # put the item
261 261 pos = self.positions[needle]
262 262 if pos < 0:
263 263 self.extradata[-pos - 1] = (key, hashval, value[1])
264 264 else:
265 265 # just don't bother
266 266 self.extradata.append((key, hashval, value[1]))
267 267 self.positions[needle] = -len(self.extradata)
268 268 else:
269 269 # not found, put it in with extra positions
270 270 self.extradata.append((key, hashval, value[1]))
271 271 self.positions = (self.positions[:needle] + [-len(self.extradata)]
272 272 + self.positions[needle:])
273 273 self.extrainfo = (self.extrainfo[:needle] + [0] +
274 274 self.extrainfo[needle:])
275 275
276 276 def copy(self):
277 277 # XXX call _compact like in C?
278 278 return _lazymanifest(self.data, self.positions, self.extrainfo,
279 279 self.extradata)
280 280
281 281 def _compact(self):
282 282 # hopefully not called TOO often
283 283 if len(self.extradata) == 0:
284 284 return
285 285 l = []
286 286 i = 0
287 287 offset = 0
288 288 self.extrainfo = [0] * len(self.positions)
289 289 while i < len(self.positions):
290 290 if self.positions[i] >= 0:
291 291 cur = self.positions[i]
292 292 last_cut = cur
293 293 while True:
294 294 self.positions[i] = offset
295 295 i += 1
296 296 if i == len(self.positions) or self.positions[i] < 0:
297 297 break
298 298 offset += self.positions[i] - cur
299 299 cur = self.positions[i]
300 300 end_cut = self.data.find('\n', cur)
301 301 if end_cut != -1:
302 302 end_cut += 1
303 303 offset += end_cut - cur
304 304 l.append(self.data[last_cut:end_cut])
305 305 else:
306 306 while i < len(self.positions) and self.positions[i] < 0:
307 307 cur = self.positions[i]
308 308 t = self.extradata[-cur - 1]
309 309 l.append(self._pack(t))
310 310 self.positions[i] = offset
311 311 if len(t[1]) > 20:
312 312 self.extrainfo[i] = ord(t[1][21])
313 313 offset += len(l[-1])
314 314 i += 1
315 315 self.data = ''.join(l)
316 316 self.extradata = []
317 317
318 318 def _pack(self, d):
319 319 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
320 320
321 321 def text(self):
322 322 self._compact()
323 323 return self.data
324 324
325 325 def diff(self, m2, clean=False):
326 326 '''Finds changes between the current manifest and m2.'''
327 327 # XXX think whether efficiency matters here
328 328 diff = {}
329 329
330 330 for fn, e1, flags in self.iterentries():
331 331 if fn not in m2:
332 332 diff[fn] = (e1, flags), (None, '')
333 333 else:
334 334 e2 = m2[fn]
335 335 if (e1, flags) != e2:
336 336 diff[fn] = (e1, flags), e2
337 337 elif clean:
338 338 diff[fn] = None
339 339
340 340 for fn, e2, flags in m2.iterentries():
341 341 if fn not in self:
342 342 diff[fn] = (None, ''), (e2, flags)
343 343
344 344 return diff
345 345
346 346 def iterentries(self):
347 347 return lazymanifestiterentries(self)
348 348
349 349 def iterkeys(self):
350 350 return lazymanifestiter(self)
351 351
352 352 def __iter__(self):
353 353 return lazymanifestiter(self)
354 354
355 355 def __len__(self):
356 356 return len(self.positions)
357 357
358 358 def filtercopy(self, filterfn):
359 359 # XXX should be optimized
360 360 c = _lazymanifest('')
361 361 for f, n, fl in self.iterentries():
362 362 if filterfn(f):
363 363 c[f] = n, fl
364 364 return c
365 365
366 366 try:
367 367 _lazymanifest = parsers.lazymanifest
368 368 except AttributeError:
369 369 pass
370 370
371 371 @interfaceutil.implementer(repository.imanifestdict)
372 372 class manifestdict(object):
373 373 def __init__(self, data=''):
374 374 self._lm = _lazymanifest(data)
375 375
376 376 def __getitem__(self, key):
377 377 return self._lm[key][0]
378 378
379 379 def find(self, key):
380 380 return self._lm[key]
381 381
382 382 def __len__(self):
383 383 return len(self._lm)
384 384
385 385 def __nonzero__(self):
386 386 # nonzero is covered by the __len__ function, but implementing it here
387 387 # makes it easier for extensions to override.
388 388 return len(self._lm) != 0
389 389
390 390 __bool__ = __nonzero__
391 391
392 392 def __setitem__(self, key, node):
393 393 self._lm[key] = node, self.flags(key, '')
394 394
395 395 def __contains__(self, key):
396 396 if key is None:
397 397 return False
398 398 return key in self._lm
399 399
400 400 def __delitem__(self, key):
401 401 del self._lm[key]
402 402
403 403 def __iter__(self):
404 404 return self._lm.__iter__()
405 405
406 406 def iterkeys(self):
407 407 return self._lm.iterkeys()
408 408
409 409 def keys(self):
410 410 return list(self.iterkeys())
411 411
412 412 def filesnotin(self, m2, match=None):
413 413 '''Set of files in this manifest that are not in the other'''
414 414 if match:
415 415 m1 = self.matches(match)
416 416 m2 = m2.matches(match)
417 417 return m1.filesnotin(m2)
418 418 diff = self.diff(m2)
419 419 files = set(filepath
420 420 for filepath, hashflags in diff.iteritems()
421 421 if hashflags[1][0] is None)
422 422 return files
423 423
424 424 @propertycache
425 425 def _dirs(self):
426 426 return util.dirs(self)
427 427
428 428 def dirs(self):
429 429 return self._dirs
430 430
431 431 def hasdir(self, dir):
432 432 return dir in self._dirs
433 433
434 434 def _filesfastpath(self, match):
435 435 '''Checks whether we can correctly and quickly iterate over matcher
436 436 files instead of over manifest files.'''
437 437 files = match.files()
438 438 return (len(files) < 100 and (match.isexact() or
439 439 (match.prefix() and all(fn in self for fn in files))))
440 440
441 441 def walk(self, match):
442 442 '''Generates matching file names.
443 443
444 444 Equivalent to manifest.matches(match).iterkeys(), but without creating
445 445 an entirely new manifest.
446 446
447 447 It also reports nonexistent files by marking them bad with match.bad().
448 448 '''
449 449 if match.always():
450 450 for f in iter(self):
451 451 yield f
452 452 return
453 453
454 454 fset = set(match.files())
455 455
456 456 # avoid the entire walk if we're only looking for specific files
457 457 if self._filesfastpath(match):
458 458 for fn in sorted(fset):
459 459 yield fn
460 460 return
461 461
462 462 for fn in self:
463 463 if fn in fset:
464 464 # specified pattern is the exact name
465 465 fset.remove(fn)
466 466 if match(fn):
467 467 yield fn
468 468
469 # for dirstate.walk, files=['.'] means "walk the whole tree".
469 # for dirstate.walk, files=[''] means "walk the whole tree".
470 470 # follow that here, too
471 fset.discard('.')
471 fset.discard('')
472 472
473 473 for fn in sorted(fset):
474 474 if not self.hasdir(fn):
475 475 match.bad(fn, None)
476 476
477 477 def matches(self, match):
478 478 '''generate a new manifest filtered by the match argument'''
479 479 if match.always():
480 480 return self.copy()
481 481
482 482 if self._filesfastpath(match):
483 483 m = manifestdict()
484 484 lm = self._lm
485 485 for fn in match.files():
486 486 if fn in lm:
487 487 m._lm[fn] = lm[fn]
488 488 return m
489 489
490 490 m = manifestdict()
491 491 m._lm = self._lm.filtercopy(match)
492 492 return m
493 493
494 494 def diff(self, m2, match=None, clean=False):
495 495 '''Finds changes between the current manifest and m2.
496 496
497 497 Args:
498 498 m2: the manifest to which this manifest should be compared.
499 499 clean: if true, include files unchanged between these manifests
500 500 with a None value in the returned dictionary.
501 501
502 502 The result is returned as a dict with filename as key and
503 503 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
504 504 nodeid in the current/other manifest and fl1/fl2 is the flag
505 505 in the current/other manifest. Where the file does not exist,
506 506 the nodeid will be None and the flags will be the empty
507 507 string.
508 508 '''
509 509 if match:
510 510 m1 = self.matches(match)
511 511 m2 = m2.matches(match)
512 512 return m1.diff(m2, clean=clean)
513 513 return self._lm.diff(m2._lm, clean)
514 514
515 515 def setflag(self, key, flag):
516 516 self._lm[key] = self[key], flag
517 517
518 518 def get(self, key, default=None):
519 519 try:
520 520 return self._lm[key][0]
521 521 except KeyError:
522 522 return default
523 523
524 524 def flags(self, key, default=''):
525 525 try:
526 526 return self._lm[key][1]
527 527 except KeyError:
528 528 return default
529 529
530 530 def copy(self):
531 531 c = manifestdict()
532 532 c._lm = self._lm.copy()
533 533 return c
534 534
535 535 def items(self):
536 536 return (x[:2] for x in self._lm.iterentries())
537 537
538 538 def iteritems(self):
539 539 return (x[:2] for x in self._lm.iterentries())
540 540
541 541 def iterentries(self):
542 542 return self._lm.iterentries()
543 543
544 544 def text(self):
545 545 # most likely uses native version
546 546 return self._lm.text()
547 547
548 548 def fastdelta(self, base, changes):
549 549 """Given a base manifest text as a bytearray and a list of changes
550 550 relative to that text, compute a delta that can be used by revlog.
551 551 """
552 552 delta = []
553 553 dstart = None
554 554 dend = None
555 555 dline = [""]
556 556 start = 0
557 557 # zero copy representation of base as a buffer
558 558 addbuf = util.buffer(base)
559 559
560 560 changes = list(changes)
561 561 if len(changes) < 1000:
562 562 # start with a readonly loop that finds the offset of
563 563 # each line and creates the deltas
564 564 for f, todelete in changes:
565 565 # bs will either be the index of the item or the insert point
566 566 start, end = _msearch(addbuf, f, start)
567 567 if not todelete:
568 568 h, fl = self._lm[f]
569 569 l = "%s\0%s%s\n" % (f, hex(h), fl)
570 570 else:
571 571 if start == end:
572 572 # item we want to delete was not found, error out
573 573 raise AssertionError(
574 574 _("failed to remove %s from manifest") % f)
575 575 l = ""
576 576 if dstart is not None and dstart <= start and dend >= start:
577 577 if dend < end:
578 578 dend = end
579 579 if l:
580 580 dline.append(l)
581 581 else:
582 582 if dstart is not None:
583 583 delta.append([dstart, dend, "".join(dline)])
584 584 dstart = start
585 585 dend = end
586 586 dline = [l]
587 587
588 588 if dstart is not None:
589 589 delta.append([dstart, dend, "".join(dline)])
590 590 # apply the delta to the base, and get a delta for addrevision
591 591 deltatext, arraytext = _addlistdelta(base, delta)
592 592 else:
593 593 # For large changes, it's much cheaper to just build the text and
594 594 # diff it.
595 595 arraytext = bytearray(self.text())
596 596 deltatext = mdiff.textdiff(
597 597 util.buffer(base), util.buffer(arraytext))
598 598
599 599 return arraytext, deltatext
600 600
601 601 def _msearch(m, s, lo=0, hi=None):
602 602 '''return a tuple (start, end) that says where to find s within m.
603 603
604 604 If the string is found m[start:end] are the line containing
605 605 that string. If start == end the string was not found and
606 606 they indicate the proper sorted insertion point.
607 607
608 608 m should be a buffer, a memoryview or a byte string.
609 609 s is a byte string'''
610 610 def advance(i, c):
611 611 while i < lenm and m[i:i + 1] != c:
612 612 i += 1
613 613 return i
614 614 if not s:
615 615 return (lo, lo)
616 616 lenm = len(m)
617 617 if not hi:
618 618 hi = lenm
619 619 while lo < hi:
620 620 mid = (lo + hi) // 2
621 621 start = mid
622 622 while start > 0 and m[start - 1:start] != '\n':
623 623 start -= 1
624 624 end = advance(start, '\0')
625 625 if bytes(m[start:end]) < s:
626 626 # we know that after the null there are 40 bytes of sha1
627 627 # this translates to the bisect lo = mid + 1
628 628 lo = advance(end + 40, '\n') + 1
629 629 else:
630 630 # this translates to the bisect hi = mid
631 631 hi = start
632 632 end = advance(lo, '\0')
633 633 found = m[lo:end]
634 634 if s == found:
635 635 # we know that after the null there are 40 bytes of sha1
636 636 end = advance(end + 40, '\n')
637 637 return (lo, end + 1)
638 638 else:
639 639 return (lo, lo)
640 640
641 641 def _checkforbidden(l):
642 642 """Check filenames for illegal characters."""
643 643 for f in l:
644 644 if '\n' in f or '\r' in f:
645 645 raise error.StorageError(
646 646 _("'\\n' and '\\r' disallowed in filenames: %r")
647 647 % pycompat.bytestr(f))
648 648
649 649
650 650 # apply the changes collected during the bisect loop to our addlist
651 651 # return a delta suitable for addrevision
652 652 def _addlistdelta(addlist, x):
653 653 # for large addlist arrays, building a new array is cheaper
654 654 # than repeatedly modifying the existing one
655 655 currentposition = 0
656 656 newaddlist = bytearray()
657 657
658 658 for start, end, content in x:
659 659 newaddlist += addlist[currentposition:start]
660 660 if content:
661 661 newaddlist += bytearray(content)
662 662
663 663 currentposition = end
664 664
665 665 newaddlist += addlist[currentposition:]
666 666
667 667 deltatext = "".join(struct.pack(">lll", start, end, len(content))
668 668 + content for start, end, content in x)
669 669 return deltatext, newaddlist
670 670
671 671 def _splittopdir(f):
672 672 if '/' in f:
673 673 dir, subpath = f.split('/', 1)
674 674 return dir + '/', subpath
675 675 else:
676 676 return '', f
677 677
678 678 _noop = lambda s: None
679 679
680 680 class treemanifest(object):
681 681 def __init__(self, dir='', text=''):
682 682 self._dir = dir
683 683 self._node = nullid
684 684 self._loadfunc = _noop
685 685 self._copyfunc = _noop
686 686 self._dirty = False
687 687 self._dirs = {}
688 688 self._lazydirs = {}
689 689 # Using _lazymanifest here is a little slower than plain old dicts
690 690 self._files = {}
691 691 self._flags = {}
692 692 if text:
693 693 def readsubtree(subdir, subm):
694 694 raise AssertionError('treemanifest constructor only accepts '
695 695 'flat manifests')
696 696 self.parse(text, readsubtree)
697 697 self._dirty = True # Mark flat manifest dirty after parsing
698 698
699 699 def _subpath(self, path):
700 700 return self._dir + path
701 701
702 702 def _loadalllazy(self):
703 703 selfdirs = self._dirs
704 704 for d, (path, node, readsubtree, docopy) in self._lazydirs.iteritems():
705 705 if docopy:
706 706 selfdirs[d] = readsubtree(path, node).copy()
707 707 else:
708 708 selfdirs[d] = readsubtree(path, node)
709 709 self._lazydirs = {}
710 710
711 711 def _loadlazy(self, d):
712 712 v = self._lazydirs.get(d)
713 713 if v:
714 714 path, node, readsubtree, docopy = v
715 715 if docopy:
716 716 self._dirs[d] = readsubtree(path, node).copy()
717 717 else:
718 718 self._dirs[d] = readsubtree(path, node)
719 719 del self._lazydirs[d]
720 720
721 721 def _loadchildrensetlazy(self, visit):
722 722 if not visit:
723 723 return None
724 724 if visit == 'all' or visit == 'this':
725 725 self._loadalllazy()
726 726 return None
727 727
728 728 loadlazy = self._loadlazy
729 729 for k in visit:
730 730 loadlazy(k + '/')
731 731 return visit
732 732
733 733 def _loaddifflazy(self, t1, t2):
734 734 """load items in t1 and t2 if they're needed for diffing.
735 735
736 736 The criteria currently is:
737 737 - if it's not present in _lazydirs in either t1 or t2, load it in the
738 738 other (it may already be loaded or it may not exist, doesn't matter)
739 739 - if it's present in _lazydirs in both, compare the nodeid; if it
740 740 differs, load it in both
741 741 """
742 742 toloadlazy = []
743 743 for d, v1 in t1._lazydirs.iteritems():
744 744 v2 = t2._lazydirs.get(d)
745 745 if not v2 or v2[1] != v1[1]:
746 746 toloadlazy.append(d)
747 747 for d, v1 in t2._lazydirs.iteritems():
748 748 if d not in t1._lazydirs:
749 749 toloadlazy.append(d)
750 750
751 751 for d in toloadlazy:
752 752 t1._loadlazy(d)
753 753 t2._loadlazy(d)
754 754
755 755 def __len__(self):
756 756 self._load()
757 757 size = len(self._files)
758 758 self._loadalllazy()
759 759 for m in self._dirs.values():
760 760 size += m.__len__()
761 761 return size
762 762
763 763 def __nonzero__(self):
764 764 # Faster than "__len() != 0" since it avoids loading sub-manifests
765 765 return not self._isempty()
766 766
767 767 __bool__ = __nonzero__
768 768
769 769 def _isempty(self):
770 770 self._load() # for consistency; already loaded by all callers
771 771 # See if we can skip loading everything.
772 772 if self._files or (self._dirs and
773 773 any(not m._isempty() for m in self._dirs.values())):
774 774 return False
775 775 self._loadalllazy()
776 776 return (not self._dirs or
777 777 all(m._isempty() for m in self._dirs.values()))
778 778
779 779 def __repr__(self):
780 780 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
781 781 (self._dir, hex(self._node),
782 782 bool(self._loadfunc is _noop),
783 783 self._dirty, id(self)))
784 784
785 785 def dir(self):
786 786 '''The directory that this tree manifest represents, including a
787 787 trailing '/'. Empty string for the repo root directory.'''
788 788 return self._dir
789 789
790 790 def node(self):
791 791 '''This node of this instance. nullid for unsaved instances. Should
792 792 be updated when the instance is read or written from a revlog.
793 793 '''
794 794 assert not self._dirty
795 795 return self._node
796 796
797 797 def setnode(self, node):
798 798 self._node = node
799 799 self._dirty = False
800 800
801 801 def iterentries(self):
802 802 self._load()
803 803 self._loadalllazy()
804 804 for p, n in sorted(itertools.chain(self._dirs.items(),
805 805 self._files.items())):
806 806 if p in self._files:
807 807 yield self._subpath(p), n, self._flags.get(p, '')
808 808 else:
809 809 for x in n.iterentries():
810 810 yield x
811 811
812 812 def items(self):
813 813 self._load()
814 814 self._loadalllazy()
815 815 for p, n in sorted(itertools.chain(self._dirs.items(),
816 816 self._files.items())):
817 817 if p in self._files:
818 818 yield self._subpath(p), n
819 819 else:
820 820 for f, sn in n.iteritems():
821 821 yield f, sn
822 822
823 823 iteritems = items
824 824
825 825 def iterkeys(self):
826 826 self._load()
827 827 self._loadalllazy()
828 828 for p in sorted(itertools.chain(self._dirs, self._files)):
829 829 if p in self._files:
830 830 yield self._subpath(p)
831 831 else:
832 832 for f in self._dirs[p]:
833 833 yield f
834 834
835 835 def keys(self):
836 836 return list(self.iterkeys())
837 837
838 838 def __iter__(self):
839 839 return self.iterkeys()
840 840
841 841 def __contains__(self, f):
842 842 if f is None:
843 843 return False
844 844 self._load()
845 845 dir, subpath = _splittopdir(f)
846 846 if dir:
847 847 self._loadlazy(dir)
848 848
849 849 if dir not in self._dirs:
850 850 return False
851 851
852 852 return self._dirs[dir].__contains__(subpath)
853 853 else:
854 854 return f in self._files
855 855
856 856 def get(self, f, default=None):
857 857 self._load()
858 858 dir, subpath = _splittopdir(f)
859 859 if dir:
860 860 self._loadlazy(dir)
861 861
862 862 if dir not in self._dirs:
863 863 return default
864 864 return self._dirs[dir].get(subpath, default)
865 865 else:
866 866 return self._files.get(f, default)
867 867
868 868 def __getitem__(self, f):
869 869 self._load()
870 870 dir, subpath = _splittopdir(f)
871 871 if dir:
872 872 self._loadlazy(dir)
873 873
874 874 return self._dirs[dir].__getitem__(subpath)
875 875 else:
876 876 return self._files[f]
877 877
878 878 def flags(self, f):
879 879 self._load()
880 880 dir, subpath = _splittopdir(f)
881 881 if dir:
882 882 self._loadlazy(dir)
883 883
884 884 if dir not in self._dirs:
885 885 return ''
886 886 return self._dirs[dir].flags(subpath)
887 887 else:
888 888 if f in self._lazydirs or f in self._dirs:
889 889 return ''
890 890 return self._flags.get(f, '')
891 891
892 892 def find(self, f):
893 893 self._load()
894 894 dir, subpath = _splittopdir(f)
895 895 if dir:
896 896 self._loadlazy(dir)
897 897
898 898 return self._dirs[dir].find(subpath)
899 899 else:
900 900 return self._files[f], self._flags.get(f, '')
901 901
902 902 def __delitem__(self, f):
903 903 self._load()
904 904 dir, subpath = _splittopdir(f)
905 905 if dir:
906 906 self._loadlazy(dir)
907 907
908 908 self._dirs[dir].__delitem__(subpath)
909 909 # If the directory is now empty, remove it
910 910 if self._dirs[dir]._isempty():
911 911 del self._dirs[dir]
912 912 else:
913 913 del self._files[f]
914 914 if f in self._flags:
915 915 del self._flags[f]
916 916 self._dirty = True
917 917
918 918 def __setitem__(self, f, n):
919 919 assert n is not None
920 920 self._load()
921 921 dir, subpath = _splittopdir(f)
922 922 if dir:
923 923 self._loadlazy(dir)
924 924 if dir not in self._dirs:
925 925 self._dirs[dir] = treemanifest(self._subpath(dir))
926 926 self._dirs[dir].__setitem__(subpath, n)
927 927 else:
928 928 self._files[f] = n[:21] # to match manifestdict's behavior
929 929 self._dirty = True
930 930
931 931 def _load(self):
932 932 if self._loadfunc is not _noop:
933 933 lf, self._loadfunc = self._loadfunc, _noop
934 934 lf(self)
935 935 elif self._copyfunc is not _noop:
936 936 cf, self._copyfunc = self._copyfunc, _noop
937 937 cf(self)
938 938
939 939 def setflag(self, f, flags):
940 940 """Set the flags (symlink, executable) for path f."""
941 941 self._load()
942 942 dir, subpath = _splittopdir(f)
943 943 if dir:
944 944 self._loadlazy(dir)
945 945 if dir not in self._dirs:
946 946 self._dirs[dir] = treemanifest(self._subpath(dir))
947 947 self._dirs[dir].setflag(subpath, flags)
948 948 else:
949 949 self._flags[f] = flags
950 950 self._dirty = True
951 951
952 952 def copy(self):
953 953 copy = treemanifest(self._dir)
954 954 copy._node = self._node
955 955 copy._dirty = self._dirty
956 956 if self._copyfunc is _noop:
957 957 def _copyfunc(s):
958 958 self._load()
959 959 s._lazydirs = {d: (p, n, r, True) for
960 960 d, (p, n, r, c) in self._lazydirs.iteritems()}
961 961 sdirs = s._dirs
962 962 for d, v in self._dirs.iteritems():
963 963 sdirs[d] = v.copy()
964 964 s._files = dict.copy(self._files)
965 965 s._flags = dict.copy(self._flags)
966 966 if self._loadfunc is _noop:
967 967 _copyfunc(copy)
968 968 else:
969 969 copy._copyfunc = _copyfunc
970 970 else:
971 971 copy._copyfunc = self._copyfunc
972 972 return copy
973 973
974 974 def filesnotin(self, m2, match=None):
975 975 '''Set of files in this manifest that are not in the other'''
976 976 if match and not match.always():
977 977 m1 = self.matches(match)
978 978 m2 = m2.matches(match)
979 979 return m1.filesnotin(m2)
980 980
981 981 files = set()
982 982 def _filesnotin(t1, t2):
983 983 if t1._node == t2._node and not t1._dirty and not t2._dirty:
984 984 return
985 985 t1._load()
986 986 t2._load()
987 987 self._loaddifflazy(t1, t2)
988 988 for d, m1 in t1._dirs.iteritems():
989 989 if d in t2._dirs:
990 990 m2 = t2._dirs[d]
991 991 _filesnotin(m1, m2)
992 992 else:
993 993 files.update(m1.iterkeys())
994 994
995 995 for fn in t1._files:
996 996 if fn not in t2._files:
997 997 files.add(t1._subpath(fn))
998 998
999 999 _filesnotin(self, m2)
1000 1000 return files
1001 1001
1002 1002 @propertycache
1003 1003 def _alldirs(self):
1004 1004 return util.dirs(self)
1005 1005
1006 1006 def dirs(self):
1007 1007 return self._alldirs
1008 1008
1009 1009 def hasdir(self, dir):
1010 1010 self._load()
1011 1011 topdir, subdir = _splittopdir(dir)
1012 1012 if topdir:
1013 1013 self._loadlazy(topdir)
1014 1014 if topdir in self._dirs:
1015 1015 return self._dirs[topdir].hasdir(subdir)
1016 1016 return False
1017 1017 dirslash = dir + '/'
1018 1018 return dirslash in self._dirs or dirslash in self._lazydirs
1019 1019
1020 1020 def walk(self, match):
1021 1021 '''Generates matching file names.
1022 1022
1023 1023 Equivalent to manifest.matches(match).iterkeys(), but without creating
1024 1024 an entirely new manifest.
1025 1025
1026 1026 It also reports nonexistent files by marking them bad with match.bad().
1027 1027 '''
1028 1028 if match.always():
1029 1029 for f in iter(self):
1030 1030 yield f
1031 1031 return
1032 1032
1033 1033 fset = set(match.files())
1034 1034
1035 1035 for fn in self._walk(match):
1036 1036 if fn in fset:
1037 1037 # specified pattern is the exact name
1038 1038 fset.remove(fn)
1039 1039 yield fn
1040 1040
1041 # for dirstate.walk, files=['.'] means "walk the whole tree".
1041 # for dirstate.walk, files=[''] means "walk the whole tree".
1042 1042 # follow that here, too
1043 fset.discard('.')
1043 fset.discard('')
1044 1044
1045 1045 for fn in sorted(fset):
1046 1046 if not self.hasdir(fn):
1047 1047 match.bad(fn, None)
1048 1048
1049 1049 def _walk(self, match):
1050 1050 '''Recursively generates matching file names for walk().'''
1051 visit = match.visitchildrenset(self._dir[:-1] or '.')
1051 visit = match.visitchildrenset(self._dir[:-1])
1052 1052 if not visit:
1053 1053 return
1054 1054
1055 1055 # yield this dir's files and walk its submanifests
1056 1056 self._load()
1057 1057 visit = self._loadchildrensetlazy(visit)
1058 1058 for p in sorted(list(self._dirs) + list(self._files)):
1059 1059 if p in self._files:
1060 1060 fullp = self._subpath(p)
1061 1061 if match(fullp):
1062 1062 yield fullp
1063 1063 else:
1064 1064 if not visit or p[:-1] in visit:
1065 1065 for f in self._dirs[p]._walk(match):
1066 1066 yield f
1067 1067
1068 1068 def matches(self, match):
1069 1069 '''generate a new manifest filtered by the match argument'''
1070 1070 if match.always():
1071 1071 return self.copy()
1072 1072
1073 1073 return self._matches(match)
1074 1074
1075 1075 def _matches(self, match):
1076 1076 '''recursively generate a new manifest filtered by the match argument.
1077 1077 '''
1078 1078
1079 visit = match.visitchildrenset(self._dir[:-1] or '.')
1079 visit = match.visitchildrenset(self._dir[:-1])
1080 1080 if visit == 'all':
1081 1081 return self.copy()
1082 1082 ret = treemanifest(self._dir)
1083 1083 if not visit:
1084 1084 return ret
1085 1085
1086 1086 self._load()
1087 1087 for fn in self._files:
1088 1088 # While visitchildrenset *usually* lists only subdirs, this is
1089 1089 # actually up to the matcher and may have some files in the set().
1090 1090 # If visit == 'this', we should obviously look at the files in this
1091 1091 # directory; if visit is a set, and fn is in it, we should inspect
1092 1092 # fn (but no need to inspect things not in the set).
1093 1093 if visit != 'this' and fn not in visit:
1094 1094 continue
1095 1095 fullp = self._subpath(fn)
1096 1096 # visitchildrenset isn't perfect, we still need to call the regular
1097 1097 # matcher code to further filter results.
1098 1098 if not match(fullp):
1099 1099 continue
1100 1100 ret._files[fn] = self._files[fn]
1101 1101 if fn in self._flags:
1102 1102 ret._flags[fn] = self._flags[fn]
1103 1103
1104 1104 visit = self._loadchildrensetlazy(visit)
1105 1105 for dir, subm in self._dirs.iteritems():
1106 1106 if visit and dir[:-1] not in visit:
1107 1107 continue
1108 1108 m = subm._matches(match)
1109 1109 if not m._isempty():
1110 1110 ret._dirs[dir] = m
1111 1111
1112 1112 if not ret._isempty():
1113 1113 ret._dirty = True
1114 1114 return ret
1115 1115
1116 1116 def diff(self, m2, match=None, clean=False):
1117 1117 '''Finds changes between the current manifest and m2.
1118 1118
1119 1119 Args:
1120 1120 m2: the manifest to which this manifest should be compared.
1121 1121 clean: if true, include files unchanged between these manifests
1122 1122 with a None value in the returned dictionary.
1123 1123
1124 1124 The result is returned as a dict with filename as key and
1125 1125 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1126 1126 nodeid in the current/other manifest and fl1/fl2 is the flag
1127 1127 in the current/other manifest. Where the file does not exist,
1128 1128 the nodeid will be None and the flags will be the empty
1129 1129 string.
1130 1130 '''
1131 1131 if match and not match.always():
1132 1132 m1 = self.matches(match)
1133 1133 m2 = m2.matches(match)
1134 1134 return m1.diff(m2, clean=clean)
1135 1135 result = {}
1136 1136 emptytree = treemanifest()
1137 1137
1138 1138 def _iterativediff(t1, t2, stack):
1139 1139 """compares two tree manifests and append new tree-manifests which
1140 1140 needs to be compared to stack"""
1141 1141 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1142 1142 return
1143 1143 t1._load()
1144 1144 t2._load()
1145 1145 self._loaddifflazy(t1, t2)
1146 1146
1147 1147 for d, m1 in t1._dirs.iteritems():
1148 1148 m2 = t2._dirs.get(d, emptytree)
1149 1149 stack.append((m1, m2))
1150 1150
1151 1151 for d, m2 in t2._dirs.iteritems():
1152 1152 if d not in t1._dirs:
1153 1153 stack.append((emptytree, m2))
1154 1154
1155 1155 for fn, n1 in t1._files.iteritems():
1156 1156 fl1 = t1._flags.get(fn, '')
1157 1157 n2 = t2._files.get(fn, None)
1158 1158 fl2 = t2._flags.get(fn, '')
1159 1159 if n1 != n2 or fl1 != fl2:
1160 1160 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1161 1161 elif clean:
1162 1162 result[t1._subpath(fn)] = None
1163 1163
1164 1164 for fn, n2 in t2._files.iteritems():
1165 1165 if fn not in t1._files:
1166 1166 fl2 = t2._flags.get(fn, '')
1167 1167 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1168 1168
1169 1169 stackls = []
1170 1170 _iterativediff(self, m2, stackls)
1171 1171 while stackls:
1172 1172 t1, t2 = stackls.pop()
1173 1173 # stackls is populated in the function call
1174 1174 _iterativediff(t1, t2, stackls)
1175 1175 return result
1176 1176
1177 1177 def unmodifiedsince(self, m2):
1178 1178 return not self._dirty and not m2._dirty and self._node == m2._node
1179 1179
1180 1180 def parse(self, text, readsubtree):
1181 1181 selflazy = self._lazydirs
1182 1182 subpath = self._subpath
1183 1183 for f, n, fl in _parse(text):
1184 1184 if fl == 't':
1185 1185 f = f + '/'
1186 1186 # False below means "doesn't need to be copied" and can use the
1187 1187 # cached value from readsubtree directly.
1188 1188 selflazy[f] = (subpath(f), n, readsubtree, False)
1189 1189 elif '/' in f:
1190 1190 # This is a flat manifest, so use __setitem__ and setflag rather
1191 1191 # than assigning directly to _files and _flags, so we can
1192 1192 # assign a path in a subdirectory, and to mark dirty (compared
1193 1193 # to nullid).
1194 1194 self[f] = n
1195 1195 if fl:
1196 1196 self.setflag(f, fl)
1197 1197 else:
1198 1198 # Assigning to _files and _flags avoids marking as dirty,
1199 1199 # and should be a little faster.
1200 1200 self._files[f] = n
1201 1201 if fl:
1202 1202 self._flags[f] = fl
1203 1203
1204 1204 def text(self):
1205 1205 """Get the full data of this manifest as a bytestring."""
1206 1206 self._load()
1207 1207 return _text(self.iterentries())
1208 1208
1209 1209 def dirtext(self):
1210 1210 """Get the full data of this directory as a bytestring. Make sure that
1211 1211 any submanifests have been written first, so their nodeids are correct.
1212 1212 """
1213 1213 self._load()
1214 1214 flags = self.flags
1215 1215 lazydirs = [(d[:-1], v[1], 't') for d, v in self._lazydirs.iteritems()]
1216 1216 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1217 1217 files = [(f, self._files[f], flags(f)) for f in self._files]
1218 1218 return _text(sorted(dirs + files + lazydirs))
1219 1219
1220 1220 def read(self, gettext, readsubtree):
1221 1221 def _load_for_read(s):
1222 1222 s.parse(gettext(), readsubtree)
1223 1223 s._dirty = False
1224 1224 self._loadfunc = _load_for_read
1225 1225
1226 1226 def writesubtrees(self, m1, m2, writesubtree, match):
1227 1227 self._load() # for consistency; should never have any effect here
1228 1228 m1._load()
1229 1229 m2._load()
1230 1230 emptytree = treemanifest()
1231 1231 def getnode(m, d):
1232 1232 ld = m._lazydirs.get(d)
1233 1233 if ld:
1234 1234 return ld[1]
1235 1235 return m._dirs.get(d, emptytree)._node
1236 1236
1237 1237 # let's skip investigating things that `match` says we do not need.
1238 visit = match.visitchildrenset(self._dir[:-1] or '.')
1238 visit = match.visitchildrenset(self._dir[:-1])
1239 1239 visit = self._loadchildrensetlazy(visit)
1240 1240 if visit == 'this' or visit == 'all':
1241 1241 visit = None
1242 1242 for d, subm in self._dirs.iteritems():
1243 1243 if visit and d[:-1] not in visit:
1244 1244 continue
1245 1245 subp1 = getnode(m1, d)
1246 1246 subp2 = getnode(m2, d)
1247 1247 if subp1 == nullid:
1248 1248 subp1, subp2 = subp2, subp1
1249 1249 writesubtree(subm, subp1, subp2, match)
1250 1250
1251 1251 def walksubtrees(self, matcher=None):
1252 1252 """Returns an iterator of the subtrees of this manifest, including this
1253 1253 manifest itself.
1254 1254
1255 1255 If `matcher` is provided, it only returns subtrees that match.
1256 1256 """
1257 if matcher and not matcher.visitdir(self._dir[:-1] or '.'):
1257 if matcher and not matcher.visitdir(self._dir[:-1]):
1258 1258 return
1259 1259 if not matcher or matcher(self._dir[:-1]):
1260 1260 yield self
1261 1261
1262 1262 self._load()
1263 1263 # OPT: use visitchildrenset to avoid loading everything.
1264 1264 self._loadalllazy()
1265 1265 for d, subm in self._dirs.iteritems():
1266 1266 for subtree in subm.walksubtrees(matcher=matcher):
1267 1267 yield subtree
1268 1268
1269 1269 class manifestfulltextcache(util.lrucachedict):
1270 1270 """File-backed LRU cache for the manifest cache
1271 1271
1272 1272 File consists of entries, up to EOF:
1273 1273
1274 1274 - 20 bytes node, 4 bytes length, <length> manifest data
1275 1275
1276 1276 These are written in reverse cache order (oldest to newest).
1277 1277
1278 1278 """
1279 1279
1280 1280 _file = 'manifestfulltextcache'
1281 1281
1282 1282 def __init__(self, max):
1283 1283 super(manifestfulltextcache, self).__init__(max)
1284 1284 self._dirty = False
1285 1285 self._read = False
1286 1286 self._opener = None
1287 1287
1288 1288 def read(self):
1289 1289 if self._read or self._opener is None:
1290 1290 return
1291 1291
1292 1292 try:
1293 1293 with self._opener(self._file) as fp:
1294 1294 set = super(manifestfulltextcache, self).__setitem__
1295 1295 # ignore trailing data, this is a cache, corruption is skipped
1296 1296 while True:
1297 1297 node = fp.read(20)
1298 1298 if len(node) < 20:
1299 1299 break
1300 1300 try:
1301 1301 size = struct.unpack('>L', fp.read(4))[0]
1302 1302 except struct.error:
1303 1303 break
1304 1304 value = bytearray(fp.read(size))
1305 1305 if len(value) != size:
1306 1306 break
1307 1307 set(node, value)
1308 1308 except IOError:
1309 1309 # the file is allowed to be missing
1310 1310 pass
1311 1311
1312 1312 self._read = True
1313 1313 self._dirty = False
1314 1314
1315 1315 def write(self):
1316 1316 if not self._dirty or self._opener is None:
1317 1317 return
1318 1318 # rotate backwards to the first used node
1319 1319 with self._opener(self._file, 'w', atomictemp=True, checkambig=True
1320 1320 ) as fp:
1321 1321 node = self._head.prev
1322 1322 while True:
1323 1323 if node.key in self._cache:
1324 1324 fp.write(node.key)
1325 1325 fp.write(struct.pack('>L', len(node.value)))
1326 1326 fp.write(node.value)
1327 1327 if node is self._head:
1328 1328 break
1329 1329 node = node.prev
1330 1330
1331 1331 def __len__(self):
1332 1332 if not self._read:
1333 1333 self.read()
1334 1334 return super(manifestfulltextcache, self).__len__()
1335 1335
1336 1336 def __contains__(self, k):
1337 1337 if not self._read:
1338 1338 self.read()
1339 1339 return super(manifestfulltextcache, self).__contains__(k)
1340 1340
1341 1341 def __iter__(self):
1342 1342 if not self._read:
1343 1343 self.read()
1344 1344 return super(manifestfulltextcache, self).__iter__()
1345 1345
1346 1346 def __getitem__(self, k):
1347 1347 if not self._read:
1348 1348 self.read()
1349 1349 # the cache lru order can change on read
1350 1350 setdirty = self._cache.get(k) is not self._head
1351 1351 value = super(manifestfulltextcache, self).__getitem__(k)
1352 1352 if setdirty:
1353 1353 self._dirty = True
1354 1354 return value
1355 1355
1356 1356 def __setitem__(self, k, v):
1357 1357 if not self._read:
1358 1358 self.read()
1359 1359 super(manifestfulltextcache, self).__setitem__(k, v)
1360 1360 self._dirty = True
1361 1361
1362 1362 def __delitem__(self, k):
1363 1363 if not self._read:
1364 1364 self.read()
1365 1365 super(manifestfulltextcache, self).__delitem__(k)
1366 1366 self._dirty = True
1367 1367
1368 1368 def get(self, k, default=None):
1369 1369 if not self._read:
1370 1370 self.read()
1371 1371 return super(manifestfulltextcache, self).get(k, default=default)
1372 1372
1373 1373 def clear(self, clear_persisted_data=False):
1374 1374 super(manifestfulltextcache, self).clear()
1375 1375 if clear_persisted_data:
1376 1376 self._dirty = True
1377 1377 self.write()
1378 1378 self._read = False
1379 1379
1380 1380 @interfaceutil.implementer(repository.imanifeststorage)
1381 1381 class manifestrevlog(object):
1382 1382 '''A revlog that stores manifest texts. This is responsible for caching the
1383 1383 full-text manifest contents.
1384 1384 '''
1385 1385 def __init__(self, opener, tree='', dirlogcache=None, indexfile=None,
1386 1386 treemanifest=False):
1387 1387 """Constructs a new manifest revlog
1388 1388
1389 1389 `indexfile` - used by extensions to have two manifests at once, like
1390 1390 when transitioning between flatmanifeset and treemanifests.
1391 1391
1392 1392 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1393 1393 options can also be used to make this a tree manifest revlog. The opener
1394 1394 option takes precedence, so if it is set to True, we ignore whatever
1395 1395 value is passed in to the constructor.
1396 1396 """
1397 1397 # During normal operations, we expect to deal with not more than four
1398 1398 # revs at a time (such as during commit --amend). When rebasing large
1399 1399 # stacks of commits, the number can go up, hence the config knob below.
1400 1400 cachesize = 4
1401 1401 optiontreemanifest = False
1402 1402 opts = getattr(opener, 'options', None)
1403 1403 if opts is not None:
1404 1404 cachesize = opts.get('manifestcachesize', cachesize)
1405 1405 optiontreemanifest = opts.get('treemanifest', False)
1406 1406
1407 1407 self._treeondisk = optiontreemanifest or treemanifest
1408 1408
1409 1409 self._fulltextcache = manifestfulltextcache(cachesize)
1410 1410
1411 1411 if tree:
1412 1412 assert self._treeondisk, 'opts is %r' % opts
1413 1413
1414 1414 if indexfile is None:
1415 1415 indexfile = '00manifest.i'
1416 1416 if tree:
1417 1417 indexfile = "meta/" + tree + indexfile
1418 1418
1419 1419 self.tree = tree
1420 1420
1421 1421 # The dirlogcache is kept on the root manifest log
1422 1422 if tree:
1423 1423 self._dirlogcache = dirlogcache
1424 1424 else:
1425 1425 self._dirlogcache = {'': self}
1426 1426
1427 1427 self._revlog = revlog.revlog(opener, indexfile,
1428 1428 # only root indexfile is cached
1429 1429 checkambig=not bool(tree),
1430 1430 mmaplargeindex=True)
1431 1431
1432 1432 self.index = self._revlog.index
1433 1433 self.version = self._revlog.version
1434 1434 self._generaldelta = self._revlog._generaldelta
1435 1435
1436 1436 def _setupmanifestcachehooks(self, repo):
1437 1437 """Persist the manifestfulltextcache on lock release"""
1438 1438 if not util.safehasattr(repo, '_wlockref'):
1439 1439 return
1440 1440
1441 1441 self._fulltextcache._opener = repo.wcachevfs
1442 1442 if repo._currentlock(repo._wlockref) is None:
1443 1443 return
1444 1444
1445 1445 reporef = weakref.ref(repo)
1446 1446 manifestrevlogref = weakref.ref(self)
1447 1447
1448 1448 def persistmanifestcache():
1449 1449 repo = reporef()
1450 1450 self = manifestrevlogref()
1451 1451 if repo is None or self is None:
1452 1452 return
1453 1453 if repo.manifestlog.getstorage(b'') is not self:
1454 1454 # there's a different manifest in play now, abort
1455 1455 return
1456 1456 self._fulltextcache.write()
1457 1457
1458 1458 repo._afterlock(persistmanifestcache)
1459 1459
1460 1460 @property
1461 1461 def fulltextcache(self):
1462 1462 return self._fulltextcache
1463 1463
1464 1464 def clearcaches(self, clear_persisted_data=False):
1465 1465 self._revlog.clearcaches()
1466 1466 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1467 1467 self._dirlogcache = {self.tree: self}
1468 1468
1469 1469 def dirlog(self, d):
1470 1470 if d:
1471 1471 assert self._treeondisk
1472 1472 if d not in self._dirlogcache:
1473 1473 mfrevlog = manifestrevlog(self.opener, d,
1474 1474 self._dirlogcache,
1475 1475 treemanifest=self._treeondisk)
1476 1476 self._dirlogcache[d] = mfrevlog
1477 1477 return self._dirlogcache[d]
1478 1478
1479 1479 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None,
1480 1480 match=None):
1481 1481 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1482 1482 # If our first parent is in the manifest cache, we can
1483 1483 # compute a delta here using properties we know about the
1484 1484 # manifest up-front, which may save time later for the
1485 1485 # revlog layer.
1486 1486
1487 1487 _checkforbidden(added)
1488 1488 # combine the changed lists into one sorted iterator
1489 1489 work = heapq.merge([(x, False) for x in added],
1490 1490 [(x, True) for x in removed])
1491 1491
1492 1492 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1493 1493 cachedelta = self._revlog.rev(p1), deltatext
1494 1494 text = util.buffer(arraytext)
1495 1495 n = self._revlog.addrevision(text, transaction, link, p1, p2,
1496 1496 cachedelta)
1497 1497 else:
1498 1498 # The first parent manifest isn't already loaded, so we'll
1499 1499 # just encode a fulltext of the manifest and pass that
1500 1500 # through to the revlog layer, and let it handle the delta
1501 1501 # process.
1502 1502 if self._treeondisk:
1503 1503 assert readtree, "readtree must be set for treemanifest writes"
1504 1504 assert match, "match must be specified for treemanifest writes"
1505 1505 m1 = readtree(self.tree, p1)
1506 1506 m2 = readtree(self.tree, p2)
1507 1507 n = self._addtree(m, transaction, link, m1, m2, readtree,
1508 1508 match=match)
1509 1509 arraytext = None
1510 1510 else:
1511 1511 text = m.text()
1512 1512 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1513 1513 arraytext = bytearray(text)
1514 1514
1515 1515 if arraytext is not None:
1516 1516 self.fulltextcache[n] = arraytext
1517 1517
1518 1518 return n
1519 1519
1520 1520 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1521 1521 # If the manifest is unchanged compared to one parent,
1522 1522 # don't write a new revision
1523 1523 if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(
1524 1524 m2)):
1525 1525 return m.node()
1526 1526 def writesubtree(subm, subp1, subp2, match):
1527 1527 sublog = self.dirlog(subm.dir())
1528 1528 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1529 1529 readtree=readtree, match=match)
1530 1530 m.writesubtrees(m1, m2, writesubtree, match)
1531 1531 text = m.dirtext()
1532 1532 n = None
1533 1533 if self.tree != '':
1534 1534 # Double-check whether contents are unchanged to one parent
1535 1535 if text == m1.dirtext():
1536 1536 n = m1.node()
1537 1537 elif text == m2.dirtext():
1538 1538 n = m2.node()
1539 1539
1540 1540 if not n:
1541 1541 n = self._revlog.addrevision(text, transaction, link, m1.node(),
1542 1542 m2.node())
1543 1543
1544 1544 # Save nodeid so parent manifest can calculate its nodeid
1545 1545 m.setnode(n)
1546 1546 return n
1547 1547
1548 1548 def __len__(self):
1549 1549 return len(self._revlog)
1550 1550
1551 1551 def __iter__(self):
1552 1552 return self._revlog.__iter__()
1553 1553
1554 1554 def rev(self, node):
1555 1555 return self._revlog.rev(node)
1556 1556
1557 1557 def node(self, rev):
1558 1558 return self._revlog.node(rev)
1559 1559
1560 1560 def lookup(self, value):
1561 1561 return self._revlog.lookup(value)
1562 1562
1563 1563 def parentrevs(self, rev):
1564 1564 return self._revlog.parentrevs(rev)
1565 1565
1566 1566 def parents(self, node):
1567 1567 return self._revlog.parents(node)
1568 1568
1569 1569 def linkrev(self, rev):
1570 1570 return self._revlog.linkrev(rev)
1571 1571
1572 1572 def checksize(self):
1573 1573 return self._revlog.checksize()
1574 1574
1575 1575 def revision(self, node, _df=None, raw=False):
1576 1576 return self._revlog.revision(node, _df=_df, raw=raw)
1577 1577
1578 1578 def revdiff(self, rev1, rev2):
1579 1579 return self._revlog.revdiff(rev1, rev2)
1580 1580
1581 1581 def cmp(self, node, text):
1582 1582 return self._revlog.cmp(node, text)
1583 1583
1584 1584 def deltaparent(self, rev):
1585 1585 return self._revlog.deltaparent(rev)
1586 1586
1587 1587 def emitrevisions(self, nodes, nodesorder=None,
1588 1588 revisiondata=False, assumehaveparentrevisions=False,
1589 1589 deltamode=repository.CG_DELTAMODE_STD):
1590 1590 return self._revlog.emitrevisions(
1591 1591 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
1592 1592 assumehaveparentrevisions=assumehaveparentrevisions,
1593 1593 deltamode=deltamode)
1594 1594
1595 1595 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1596 1596 return self._revlog.addgroup(deltas, linkmapper, transaction,
1597 1597 addrevisioncb=addrevisioncb)
1598 1598
1599 1599 def rawsize(self, rev):
1600 1600 return self._revlog.rawsize(rev)
1601 1601
1602 1602 def getstrippoint(self, minlink):
1603 1603 return self._revlog.getstrippoint(minlink)
1604 1604
1605 1605 def strip(self, minlink, transaction):
1606 1606 return self._revlog.strip(minlink, transaction)
1607 1607
1608 1608 def files(self):
1609 1609 return self._revlog.files()
1610 1610
1611 1611 def clone(self, tr, destrevlog, **kwargs):
1612 1612 if not isinstance(destrevlog, manifestrevlog):
1613 1613 raise error.ProgrammingError('expected manifestrevlog to clone()')
1614 1614
1615 1615 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1616 1616
1617 1617 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
1618 1618 revisionscount=False, trackedsize=False,
1619 1619 storedsize=False):
1620 1620 return self._revlog.storageinfo(
1621 1621 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
1622 1622 revisionscount=revisionscount, trackedsize=trackedsize,
1623 1623 storedsize=storedsize)
1624 1624
1625 1625 @property
1626 1626 def indexfile(self):
1627 1627 return self._revlog.indexfile
1628 1628
1629 1629 @indexfile.setter
1630 1630 def indexfile(self, value):
1631 1631 self._revlog.indexfile = value
1632 1632
1633 1633 @property
1634 1634 def opener(self):
1635 1635 return self._revlog.opener
1636 1636
1637 1637 @opener.setter
1638 1638 def opener(self, value):
1639 1639 self._revlog.opener = value
1640 1640
1641 1641 @interfaceutil.implementer(repository.imanifestlog)
1642 1642 class manifestlog(object):
1643 1643 """A collection class representing the collection of manifest snapshots
1644 1644 referenced by commits in the repository.
1645 1645
1646 1646 In this situation, 'manifest' refers to the abstract concept of a snapshot
1647 1647 of the list of files in the given commit. Consumers of the output of this
1648 1648 class do not care about the implementation details of the actual manifests
1649 1649 they receive (i.e. tree or flat or lazily loaded, etc)."""
1650 1650 def __init__(self, opener, repo, rootstore, narrowmatch):
1651 1651 usetreemanifest = False
1652 1652 cachesize = 4
1653 1653
1654 1654 opts = getattr(opener, 'options', None)
1655 1655 if opts is not None:
1656 1656 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1657 1657 cachesize = opts.get('manifestcachesize', cachesize)
1658 1658
1659 1659 self._treemanifests = usetreemanifest
1660 1660
1661 1661 self._rootstore = rootstore
1662 1662 self._rootstore._setupmanifestcachehooks(repo)
1663 1663 self._narrowmatch = narrowmatch
1664 1664
1665 1665 # A cache of the manifestctx or treemanifestctx for each directory
1666 1666 self._dirmancache = {}
1667 1667 self._dirmancache[''] = util.lrucachedict(cachesize)
1668 1668
1669 1669 self._cachesize = cachesize
1670 1670
1671 1671 def __getitem__(self, node):
1672 1672 """Retrieves the manifest instance for the given node. Throws a
1673 1673 LookupError if not found.
1674 1674 """
1675 1675 return self.get('', node)
1676 1676
1677 1677 def get(self, tree, node, verify=True):
1678 1678 """Retrieves the manifest instance for the given node. Throws a
1679 1679 LookupError if not found.
1680 1680
1681 1681 `verify` - if True an exception will be thrown if the node is not in
1682 1682 the revlog
1683 1683 """
1684 1684 if node in self._dirmancache.get(tree, ()):
1685 1685 return self._dirmancache[tree][node]
1686 1686
1687 1687 if not self._narrowmatch.always():
1688 if not self._narrowmatch.visitdir(tree[:-1] or '.'):
1688 if not self._narrowmatch.visitdir(tree[:-1]):
1689 1689 return excludeddirmanifestctx(tree, node)
1690 1690 if tree:
1691 1691 if self._rootstore._treeondisk:
1692 1692 if verify:
1693 1693 # Side-effect is LookupError is raised if node doesn't
1694 1694 # exist.
1695 1695 self.getstorage(tree).rev(node)
1696 1696
1697 1697 m = treemanifestctx(self, tree, node)
1698 1698 else:
1699 1699 raise error.Abort(
1700 1700 _("cannot ask for manifest directory '%s' in a flat "
1701 1701 "manifest") % tree)
1702 1702 else:
1703 1703 if verify:
1704 1704 # Side-effect is LookupError is raised if node doesn't exist.
1705 1705 self._rootstore.rev(node)
1706 1706
1707 1707 if self._treemanifests:
1708 1708 m = treemanifestctx(self, '', node)
1709 1709 else:
1710 1710 m = manifestctx(self, node)
1711 1711
1712 1712 if node != nullid:
1713 1713 mancache = self._dirmancache.get(tree)
1714 1714 if not mancache:
1715 1715 mancache = util.lrucachedict(self._cachesize)
1716 1716 self._dirmancache[tree] = mancache
1717 1717 mancache[node] = m
1718 1718 return m
1719 1719
1720 1720 def getstorage(self, tree):
1721 1721 return self._rootstore.dirlog(tree)
1722 1722
1723 1723 def clearcaches(self, clear_persisted_data=False):
1724 1724 self._dirmancache.clear()
1725 1725 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1726 1726
1727 1727 def rev(self, node):
1728 1728 return self._rootstore.rev(node)
1729 1729
1730 1730 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1731 1731 class memmanifestctx(object):
1732 1732 def __init__(self, manifestlog):
1733 1733 self._manifestlog = manifestlog
1734 1734 self._manifestdict = manifestdict()
1735 1735
1736 1736 def _storage(self):
1737 1737 return self._manifestlog.getstorage(b'')
1738 1738
1739 1739 def new(self):
1740 1740 return memmanifestctx(self._manifestlog)
1741 1741
1742 1742 def copy(self):
1743 1743 memmf = memmanifestctx(self._manifestlog)
1744 1744 memmf._manifestdict = self.read().copy()
1745 1745 return memmf
1746 1746
1747 1747 def read(self):
1748 1748 return self._manifestdict
1749 1749
1750 1750 def write(self, transaction, link, p1, p2, added, removed, match=None):
1751 1751 return self._storage().add(self._manifestdict, transaction, link,
1752 1752 p1, p2, added, removed, match=match)
1753 1753
1754 1754 @interfaceutil.implementer(repository.imanifestrevisionstored)
1755 1755 class manifestctx(object):
1756 1756 """A class representing a single revision of a manifest, including its
1757 1757 contents, its parent revs, and its linkrev.
1758 1758 """
1759 1759 def __init__(self, manifestlog, node):
1760 1760 self._manifestlog = manifestlog
1761 1761 self._data = None
1762 1762
1763 1763 self._node = node
1764 1764
1765 1765 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1766 1766 # but let's add it later when something needs it and we can load it
1767 1767 # lazily.
1768 1768 #self.p1, self.p2 = store.parents(node)
1769 1769 #rev = store.rev(node)
1770 1770 #self.linkrev = store.linkrev(rev)
1771 1771
1772 1772 def _storage(self):
1773 1773 return self._manifestlog.getstorage(b'')
1774 1774
1775 1775 def node(self):
1776 1776 return self._node
1777 1777
1778 1778 def new(self):
1779 1779 return memmanifestctx(self._manifestlog)
1780 1780
1781 1781 def copy(self):
1782 1782 memmf = memmanifestctx(self._manifestlog)
1783 1783 memmf._manifestdict = self.read().copy()
1784 1784 return memmf
1785 1785
1786 1786 @propertycache
1787 1787 def parents(self):
1788 1788 return self._storage().parents(self._node)
1789 1789
1790 1790 def read(self):
1791 1791 if self._data is None:
1792 1792 if self._node == nullid:
1793 1793 self._data = manifestdict()
1794 1794 else:
1795 1795 store = self._storage()
1796 1796 if self._node in store.fulltextcache:
1797 1797 text = pycompat.bytestr(store.fulltextcache[self._node])
1798 1798 else:
1799 1799 text = store.revision(self._node)
1800 1800 arraytext = bytearray(text)
1801 1801 store.fulltextcache[self._node] = arraytext
1802 1802 self._data = manifestdict(text)
1803 1803 return self._data
1804 1804
1805 1805 def readfast(self, shallow=False):
1806 1806 '''Calls either readdelta or read, based on which would be less work.
1807 1807 readdelta is called if the delta is against the p1, and therefore can be
1808 1808 read quickly.
1809 1809
1810 1810 If `shallow` is True, nothing changes since this is a flat manifest.
1811 1811 '''
1812 1812 store = self._storage()
1813 1813 r = store.rev(self._node)
1814 1814 deltaparent = store.deltaparent(r)
1815 1815 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
1816 1816 return self.readdelta()
1817 1817 return self.read()
1818 1818
1819 1819 def readdelta(self, shallow=False):
1820 1820 '''Returns a manifest containing just the entries that are present
1821 1821 in this manifest, but not in its p1 manifest. This is efficient to read
1822 1822 if the revlog delta is already p1.
1823 1823
1824 1824 Changing the value of `shallow` has no effect on flat manifests.
1825 1825 '''
1826 1826 store = self._storage()
1827 1827 r = store.rev(self._node)
1828 1828 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1829 1829 return manifestdict(d)
1830 1830
1831 1831 def find(self, key):
1832 1832 return self.read().find(key)
1833 1833
1834 1834 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1835 1835 class memtreemanifestctx(object):
1836 1836 def __init__(self, manifestlog, dir=''):
1837 1837 self._manifestlog = manifestlog
1838 1838 self._dir = dir
1839 1839 self._treemanifest = treemanifest()
1840 1840
1841 1841 def _storage(self):
1842 1842 return self._manifestlog.getstorage(b'')
1843 1843
1844 1844 def new(self, dir=''):
1845 1845 return memtreemanifestctx(self._manifestlog, dir=dir)
1846 1846
1847 1847 def copy(self):
1848 1848 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1849 1849 memmf._treemanifest = self._treemanifest.copy()
1850 1850 return memmf
1851 1851
1852 1852 def read(self):
1853 1853 return self._treemanifest
1854 1854
1855 1855 def write(self, transaction, link, p1, p2, added, removed, match=None):
1856 1856 def readtree(dir, node):
1857 1857 return self._manifestlog.get(dir, node).read()
1858 1858 return self._storage().add(self._treemanifest, transaction, link,
1859 1859 p1, p2, added, removed, readtree=readtree,
1860 1860 match=match)
1861 1861
1862 1862 @interfaceutil.implementer(repository.imanifestrevisionstored)
1863 1863 class treemanifestctx(object):
1864 1864 def __init__(self, manifestlog, dir, node):
1865 1865 self._manifestlog = manifestlog
1866 1866 self._dir = dir
1867 1867 self._data = None
1868 1868
1869 1869 self._node = node
1870 1870
1871 1871 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1872 1872 # we can instantiate treemanifestctx objects for directories we don't
1873 1873 # have on disk.
1874 1874 #self.p1, self.p2 = store.parents(node)
1875 1875 #rev = store.rev(node)
1876 1876 #self.linkrev = store.linkrev(rev)
1877 1877
1878 1878 def _storage(self):
1879 1879 narrowmatch = self._manifestlog._narrowmatch
1880 1880 if not narrowmatch.always():
1881 if not narrowmatch.visitdir(self._dir[:-1] or '.'):
1881 if not narrowmatch.visitdir(self._dir[:-1]):
1882 1882 return excludedmanifestrevlog(self._dir)
1883 1883 return self._manifestlog.getstorage(self._dir)
1884 1884
1885 1885 def read(self):
1886 1886 if self._data is None:
1887 1887 store = self._storage()
1888 1888 if self._node == nullid:
1889 1889 self._data = treemanifest()
1890 1890 # TODO accessing non-public API
1891 1891 elif store._treeondisk:
1892 1892 m = treemanifest(dir=self._dir)
1893 1893 def gettext():
1894 1894 return store.revision(self._node)
1895 1895 def readsubtree(dir, subm):
1896 1896 # Set verify to False since we need to be able to create
1897 1897 # subtrees for trees that don't exist on disk.
1898 1898 return self._manifestlog.get(dir, subm, verify=False).read()
1899 1899 m.read(gettext, readsubtree)
1900 1900 m.setnode(self._node)
1901 1901 self._data = m
1902 1902 else:
1903 1903 if self._node in store.fulltextcache:
1904 1904 text = pycompat.bytestr(store.fulltextcache[self._node])
1905 1905 else:
1906 1906 text = store.revision(self._node)
1907 1907 arraytext = bytearray(text)
1908 1908 store.fulltextcache[self._node] = arraytext
1909 1909 self._data = treemanifest(dir=self._dir, text=text)
1910 1910
1911 1911 return self._data
1912 1912
1913 1913 def node(self):
1914 1914 return self._node
1915 1915
1916 1916 def new(self, dir=''):
1917 1917 return memtreemanifestctx(self._manifestlog, dir=dir)
1918 1918
1919 1919 def copy(self):
1920 1920 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1921 1921 memmf._treemanifest = self.read().copy()
1922 1922 return memmf
1923 1923
1924 1924 @propertycache
1925 1925 def parents(self):
1926 1926 return self._storage().parents(self._node)
1927 1927
1928 1928 def readdelta(self, shallow=False):
1929 1929 '''Returns a manifest containing just the entries that are present
1930 1930 in this manifest, but not in its p1 manifest. This is efficient to read
1931 1931 if the revlog delta is already p1.
1932 1932
1933 1933 If `shallow` is True, this will read the delta for this directory,
1934 1934 without recursively reading subdirectory manifests. Instead, any
1935 1935 subdirectory entry will be reported as it appears in the manifest, i.e.
1936 1936 the subdirectory will be reported among files and distinguished only by
1937 1937 its 't' flag.
1938 1938 '''
1939 1939 store = self._storage()
1940 1940 if shallow:
1941 1941 r = store.rev(self._node)
1942 1942 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1943 1943 return manifestdict(d)
1944 1944 else:
1945 1945 # Need to perform a slow delta
1946 1946 r0 = store.deltaparent(store.rev(self._node))
1947 1947 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
1948 1948 m1 = self.read()
1949 1949 md = treemanifest(dir=self._dir)
1950 1950 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1951 1951 if n1:
1952 1952 md[f] = n1
1953 1953 if fl1:
1954 1954 md.setflag(f, fl1)
1955 1955 return md
1956 1956
1957 1957 def readfast(self, shallow=False):
1958 1958 '''Calls either readdelta or read, based on which would be less work.
1959 1959 readdelta is called if the delta is against the p1, and therefore can be
1960 1960 read quickly.
1961 1961
1962 1962 If `shallow` is True, it only returns the entries from this manifest,
1963 1963 and not any submanifests.
1964 1964 '''
1965 1965 store = self._storage()
1966 1966 r = store.rev(self._node)
1967 1967 deltaparent = store.deltaparent(r)
1968 1968 if (deltaparent != nullrev and
1969 1969 deltaparent in store.parentrevs(r)):
1970 1970 return self.readdelta(shallow=shallow)
1971 1971
1972 1972 if shallow:
1973 1973 return manifestdict(store.revision(self._node))
1974 1974 else:
1975 1975 return self.read()
1976 1976
1977 1977 def find(self, key):
1978 1978 return self.read().find(key)
1979 1979
1980 1980 class excludeddir(treemanifest):
1981 1981 """Stand-in for a directory that is excluded from the repository.
1982 1982
1983 1983 With narrowing active on a repository that uses treemanifests,
1984 1984 some of the directory revlogs will be excluded from the resulting
1985 1985 clone. This is a huge storage win for clients, but means we need
1986 1986 some sort of pseudo-manifest to surface to internals so we can
1987 1987 detect a merge conflict outside the narrowspec. That's what this
1988 1988 class is: it stands in for a directory whose node is known, but
1989 1989 whose contents are unknown.
1990 1990 """
1991 1991 def __init__(self, dir, node):
1992 1992 super(excludeddir, self).__init__(dir)
1993 1993 self._node = node
1994 1994 # Add an empty file, which will be included by iterators and such,
1995 1995 # appearing as the directory itself (i.e. something like "dir/")
1996 1996 self._files[''] = node
1997 1997 self._flags[''] = 't'
1998 1998
1999 1999 # Manifests outside the narrowspec should never be modified, so avoid
2000 2000 # copying. This makes a noticeable difference when there are very many
2001 2001 # directories outside the narrowspec. Also, it makes sense for the copy to
2002 2002 # be of the same type as the original, which would not happen with the
2003 2003 # super type's copy().
2004 2004 def copy(self):
2005 2005 return self
2006 2006
2007 2007 class excludeddirmanifestctx(treemanifestctx):
2008 2008 """context wrapper for excludeddir - see that docstring for rationale"""
2009 2009 def __init__(self, dir, node):
2010 2010 self._dir = dir
2011 2011 self._node = node
2012 2012
2013 2013 def read(self):
2014 2014 return excludeddir(self._dir, self._node)
2015 2015
2016 2016 def write(self, *args):
2017 2017 raise error.ProgrammingError(
2018 2018 'attempt to write manifest from excluded dir %s' % self._dir)
2019 2019
2020 2020 class excludedmanifestrevlog(manifestrevlog):
2021 2021 """Stand-in for excluded treemanifest revlogs.
2022 2022
2023 2023 When narrowing is active on a treemanifest repository, we'll have
2024 2024 references to directories we can't see due to the revlog being
2025 2025 skipped. This class exists to conform to the manifestrevlog
2026 2026 interface for those directories and proactively prevent writes to
2027 2027 outside the narrowspec.
2028 2028 """
2029 2029
2030 2030 def __init__(self, dir):
2031 2031 self._dir = dir
2032 2032
2033 2033 def __len__(self):
2034 2034 raise error.ProgrammingError(
2035 2035 'attempt to get length of excluded dir %s' % self._dir)
2036 2036
2037 2037 def rev(self, node):
2038 2038 raise error.ProgrammingError(
2039 2039 'attempt to get rev from excluded dir %s' % self._dir)
2040 2040
2041 2041 def linkrev(self, node):
2042 2042 raise error.ProgrammingError(
2043 2043 'attempt to get linkrev from excluded dir %s' % self._dir)
2044 2044
2045 2045 def node(self, rev):
2046 2046 raise error.ProgrammingError(
2047 2047 'attempt to get node from excluded dir %s' % self._dir)
2048 2048
2049 2049 def add(self, *args, **kwargs):
2050 2050 # We should never write entries in dirlogs outside the narrow clone.
2051 2051 # However, the method still gets called from writesubtree() in
2052 2052 # _addtree(), so we need to handle it. We should possibly make that
2053 2053 # avoid calling add() with a clean manifest (_dirty is always False
2054 2054 # in excludeddir instances).
2055 2055 pass
@@ -1,1511 +1,1530 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 try:
28 28 from . import rustext
29 29 rustext.__name__ # force actual import (see hgdemandimport)
30 30 except ImportError:
31 31 rustext = None
32 32
33 33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 34 'rootglob',
35 35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 36 'rootfilesin')
37 37 cwdrelativepatternkinds = ('relpath', 'glob')
38 38
39 39 propertycache = util.propertycache
40 40
41 41 def _rematcher(regex):
42 42 '''compile the regexp with the best available regexp engine and return a
43 43 matcher function'''
44 44 m = util.re.compile(regex)
45 45 try:
46 46 # slightly faster, provided by facebook's re2 bindings
47 47 return m.test_match
48 48 except AttributeError:
49 49 return m.match
50 50
51 51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 53 matchers = []
54 54 other = []
55 55
56 56 for kind, pat, source in kindpats:
57 57 if kind == 'set':
58 58 if ctx is None:
59 59 raise error.ProgrammingError("fileset expression with no "
60 60 "context")
61 61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62 62
63 63 if listsubrepos:
64 64 for subpath in ctx.substate:
65 65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 67 matchers.append(pm)
68 68
69 69 continue
70 70 other.append((kind, pat, source))
71 71 return matchers, other
72 72
73 73 def _expandsubinclude(kindpats, root):
74 74 '''Returns the list of subinclude matcher args and the kindpats without the
75 75 subincludes in it.'''
76 76 relmatchers = []
77 77 other = []
78 78
79 79 for kind, pat, source in kindpats:
80 80 if kind == 'subinclude':
81 81 sourceroot = pathutil.dirname(util.normpath(source))
82 82 pat = util.pconvert(pat)
83 83 path = pathutil.join(sourceroot, pat)
84 84
85 85 newroot = pathutil.dirname(path)
86 86 matcherargs = (newroot, '', [], ['include:%s' % path])
87 87
88 88 prefix = pathutil.canonpath(root, root, newroot)
89 89 if prefix:
90 90 prefix += '/'
91 91 relmatchers.append((prefix, matcherargs))
92 92 else:
93 93 other.append((kind, pat, source))
94 94
95 95 return relmatchers, other
96 96
97 97 def _kindpatsalwaysmatch(kindpats):
98 98 """"Checks whether the kindspats match everything, as e.g.
99 99 'relpath:.' does.
100 100 """
101 101 for kind, pat, source in kindpats:
102 102 if pat != '' or kind not in ['relpath', 'glob']:
103 103 return False
104 104 return True
105 105
106 106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 107 listsubrepos=False, badfn=None):
108 108 matchers = []
109 109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 110 listsubrepos=listsubrepos, badfn=badfn)
111 111 if kindpats:
112 112 m = matchercls(root, kindpats, badfn=badfn)
113 113 matchers.append(m)
114 114 if fms:
115 115 matchers.extend(fms)
116 116 if not matchers:
117 117 return nevermatcher(badfn=badfn)
118 118 if len(matchers) == 1:
119 119 return matchers[0]
120 120 return unionmatcher(matchers)
121 121
122 122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 123 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 124 badfn=None, icasefs=False):
125 125 r"""build an object to match a set of file patterns
126 126
127 127 arguments:
128 128 root - the canonical root of the tree you're matching against
129 129 cwd - the current working directory, if relevant
130 130 patterns - patterns to find
131 131 include - patterns to include (unless they are excluded)
132 132 exclude - patterns to exclude (even if they are included)
133 133 default - if a pattern in patterns has no explicit type, assume this one
134 134 auditor - optional path auditor
135 135 ctx - optional changecontext
136 136 listsubrepos - if True, recurse into subrepositories
137 137 warn - optional function used for printing warnings
138 138 badfn - optional bad() callback for this matcher instead of the default
139 139 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 140 normalizes the given patterns to the case in the filesystem
141 141
142 142 a pattern is one of:
143 143 'glob:<glob>' - a glob relative to cwd
144 144 're:<regexp>' - a regular expression
145 145 'path:<path>' - a path relative to repository root, which is matched
146 146 recursively
147 147 'rootfilesin:<path>' - a path relative to repository root, which is
148 148 matched non-recursively (will not match subdirectories)
149 149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 150 'relpath:<path>' - a path relative to cwd
151 151 'relre:<regexp>' - a regexp that needn't match the start of a name
152 152 'set:<fileset>' - a fileset expression
153 153 'include:<path>' - a file of patterns to read and include
154 154 'subinclude:<path>' - a file of patterns to match against files under
155 155 the same directory
156 156 '<something>' - a pattern of the specified default type
157 157
158 158 Usually a patternmatcher is returned:
159 159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161 161
162 162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 163 intersectionmatcher (resp. a differencematcher):
164 164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 165 <class 'mercurial.match.intersectionmatcher'>
166 166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 167 <class 'mercurial.match.differencematcher'>
168 168
169 169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 170 >>> match(b'foo', b'.', [])
171 171 <alwaysmatcher>
172 172
173 173 The 'default' argument determines which kind of pattern is assumed if a
174 174 pattern has no prefix:
175 175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 176 <patternmatcher patterns='.*\\.c$'>
177 177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 178 <patternmatcher patterns='main\\.py(?:/|$)'>
179 179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 180 <patternmatcher patterns='main.py'>
181 181
182 182 The primary use of matchers is to check whether a value (usually a file
183 183 name) matches againset one of the patterns given at initialization. There
184 184 are two ways of doing this check.
185 185
186 186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187 187
188 188 1. Calling the matcher with a file name returns True if any pattern
189 189 matches that file name:
190 190 >>> m(b'a')
191 191 True
192 192 >>> m(b'main.c')
193 193 True
194 194 >>> m(b'test.py')
195 195 False
196 196
197 197 2. Using the exact() method only returns True if the file name matches one
198 198 of the exact patterns (i.e. not re: or glob: patterns):
199 199 >>> m.exact(b'a')
200 200 True
201 201 >>> m.exact(b'main.c')
202 202 False
203 203 """
204 204 normalize = _donormalize
205 205 if icasefs:
206 206 dirstate = ctx.repo().dirstate
207 207 dsnormalize = dirstate.normalize
208 208
209 209 def normalize(patterns, default, root, cwd, auditor, warn):
210 210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 211 kindpats = []
212 212 for kind, pats, source in kp:
213 213 if kind not in ('re', 'relre'): # regex can't be normalized
214 214 p = pats
215 215 pats = dsnormalize(pats)
216 216
217 217 # Preserve the original to handle a case only rename.
218 218 if p != pats and p in dirstate:
219 219 kindpats.append((kind, p, source))
220 220
221 221 kindpats.append((kind, pats, source))
222 222 return kindpats
223 223
224 224 if patterns:
225 225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 226 if _kindpatsalwaysmatch(kindpats):
227 227 m = alwaysmatcher(badfn)
228 228 else:
229 229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 230 listsubrepos=listsubrepos, badfn=badfn)
231 231 else:
232 232 # It's a little strange that no patterns means to match everything.
233 233 # Consider changing this to match nothing (probably using nevermatcher).
234 234 m = alwaysmatcher(badfn)
235 235
236 236 if include:
237 237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 239 listsubrepos=listsubrepos, badfn=None)
240 240 m = intersectmatchers(m, im)
241 241 if exclude:
242 242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 244 listsubrepos=listsubrepos, badfn=None)
245 245 m = differencematcher(m, em)
246 246 return m
247 247
248 248 def exact(files, badfn=None):
249 249 return exactmatcher(files, badfn=badfn)
250 250
251 251 def always(badfn=None):
252 252 return alwaysmatcher(badfn)
253 253
254 254 def never(badfn=None):
255 255 return nevermatcher(badfn)
256 256
257 257 def badmatch(match, badfn):
258 258 """Make a copy of the given matcher, replacing its bad method with the given
259 259 one.
260 260 """
261 261 m = copy.copy(match)
262 262 m.bad = badfn
263 263 return m
264 264
265 265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 267 normalized and rooted patterns and with listfiles expanded.'''
268 268 kindpats = []
269 269 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 270 if kind in cwdrelativepatternkinds:
271 271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 273 pat = util.normpath(pat)
274 274 elif kind in ('listfile', 'listfile0'):
275 275 try:
276 276 files = util.readfile(pat)
277 277 if kind == 'listfile0':
278 278 files = files.split('\0')
279 279 else:
280 280 files = files.splitlines()
281 281 files = [f for f in files if f]
282 282 except EnvironmentError:
283 283 raise error.Abort(_("unable to read file list (%s)") % pat)
284 284 for k, p, source in _donormalize(files, default, root, cwd,
285 285 auditor, warn):
286 286 kindpats.append((k, p, pat))
287 287 continue
288 288 elif kind == 'include':
289 289 try:
290 290 fullpath = os.path.join(root, util.localpath(pat))
291 291 includepats = readpatternfile(fullpath, warn)
292 292 for k, p, source in _donormalize(includepats, default,
293 293 root, cwd, auditor, warn):
294 294 kindpats.append((k, p, source or pat))
295 295 except error.Abort as inst:
296 296 raise error.Abort('%s: %s' % (pat, inst[0]))
297 297 except IOError as inst:
298 298 if warn:
299 299 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 300 (pat, stringutil.forcebytestr(inst.strerror)))
301 301 continue
302 302 # else: re or relre - which cannot be normalized
303 303 kindpats.append((kind, pat, ''))
304 304 return kindpats
305 305
306 306 class basematcher(object):
307 307
308 308 def __init__(self, badfn=None):
309 309 if badfn is not None:
310 310 self.bad = badfn
311 311
312 312 def __call__(self, fn):
313 313 return self.matchfn(fn)
314 314 # Callbacks related to how the matcher is used by dirstate.walk.
315 315 # Subscribers to these events must monkeypatch the matcher object.
316 316 def bad(self, f, msg):
317 317 '''Callback from dirstate.walk for each explicit file that can't be
318 318 found/accessed, with an error message.'''
319 319
320 320 # If an explicitdir is set, it will be called when an explicitly listed
321 321 # directory is visited.
322 322 explicitdir = None
323 323
324 324 # If an traversedir is set, it will be called when a directory discovered
325 325 # by recursive traversal is visited.
326 326 traversedir = None
327 327
328 328 @propertycache
329 329 def _files(self):
330 330 return []
331 331
332 332 def files(self):
333 333 '''Explicitly listed files or patterns or roots:
334 334 if no patterns or .always(): empty list,
335 335 if exact: list exact files,
336 336 if not .anypats(): list all files and dirs,
337 337 else: optimal roots'''
338 338 return self._files
339 339
340 340 @propertycache
341 341 def _fileset(self):
342 342 return set(self._files)
343 343
344 344 def exact(self, f):
345 345 '''Returns True if f is in .files().'''
346 346 return f in self._fileset
347 347
348 348 def matchfn(self, f):
349 349 return False
350 350
351 351 def visitdir(self, dir):
352 352 '''Decides whether a directory should be visited based on whether it
353 353 has potential matches in it or one of its subdirectories. This is
354 354 based on the match's primary, included, and excluded patterns.
355 355
356 356 Returns the string 'all' if the given directory and all subdirectories
357 357 should be visited. Otherwise returns True or False indicating whether
358 358 the given directory should be visited.
359 359 '''
360 360 return True
361 361
362 362 def visitchildrenset(self, dir):
363 363 '''Decides whether a directory should be visited based on whether it
364 364 has potential matches in it or one of its subdirectories, and
365 365 potentially lists which subdirectories of that directory should be
366 366 visited. This is based on the match's primary, included, and excluded
367 367 patterns.
368 368
369 369 This function is very similar to 'visitdir', and the following mapping
370 370 can be applied:
371 371
372 372 visitdir | visitchildrenlist
373 373 ----------+-------------------
374 374 False | set()
375 375 'all' | 'all'
376 376 True | 'this' OR non-empty set of subdirs -or files- to visit
377 377
378 378 Example:
379 379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 380 the following values (assuming the implementation of visitchildrenset
381 381 is capable of recognizing this; some implementations are not).
382 382
383 '.' -> {'foo', 'qux'}
383 '' -> {'foo', 'qux'}
384 384 'baz' -> set()
385 385 'foo' -> {'bar'}
386 386 # Ideally this would be 'all', but since the prefix nature of matchers
387 387 # is applied to the entire matcher, we have to downgrade this to
388 388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 389 # in.
390 390 'foo/bar' -> 'this'
391 391 'qux' -> 'this'
392 392
393 393 Important:
394 394 Most matchers do not know if they're representing files or
395 395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 396 file or a directory, so visitchildrenset('dir') for most matchers will
397 397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 398 does), it may return 'this'. Do not rely on the return being a set
399 399 indicating that there are no files in this dir to investigate (or
400 400 equivalently that if there are files to investigate in 'dir' that it
401 401 will always return 'this').
402 402 '''
403 403 return 'this'
404 404
405 405 def always(self):
406 406 '''Matcher will match everything and .files() will be empty --
407 407 optimization might be possible.'''
408 408 return False
409 409
410 410 def isexact(self):
411 411 '''Matcher will match exactly the list of files in .files() --
412 412 optimization might be possible.'''
413 413 return False
414 414
415 415 def prefix(self):
416 416 '''Matcher will match the paths in .files() recursively --
417 417 optimization might be possible.'''
418 418 return False
419 419
420 420 def anypats(self):
421 421 '''None of .always(), .isexact(), and .prefix() is true --
422 422 optimizations will be difficult.'''
423 423 return not self.always() and not self.isexact() and not self.prefix()
424 424
425 425 class alwaysmatcher(basematcher):
426 426 '''Matches everything.'''
427 427
428 428 def __init__(self, badfn=None):
429 429 super(alwaysmatcher, self).__init__(badfn)
430 430
431 431 def always(self):
432 432 return True
433 433
434 434 def matchfn(self, f):
435 435 return True
436 436
437 437 def visitdir(self, dir):
438 438 return 'all'
439 439
440 440 def visitchildrenset(self, dir):
441 441 return 'all'
442 442
443 443 def __repr__(self):
444 444 return r'<alwaysmatcher>'
445 445
446 446 class nevermatcher(basematcher):
447 447 '''Matches nothing.'''
448 448
449 449 def __init__(self, badfn=None):
450 450 super(nevermatcher, self).__init__(badfn)
451 451
452 452 # It's a little weird to say that the nevermatcher is an exact matcher
453 453 # or a prefix matcher, but it seems to make sense to let callers take
454 454 # fast paths based on either. There will be no exact matches, nor any
455 455 # prefixes (files() returns []), so fast paths iterating over them should
456 456 # be efficient (and correct).
457 457 def isexact(self):
458 458 return True
459 459
460 460 def prefix(self):
461 461 return True
462 462
463 463 def visitdir(self, dir):
464 464 return False
465 465
466 466 def visitchildrenset(self, dir):
467 467 return set()
468 468
469 469 def __repr__(self):
470 470 return r'<nevermatcher>'
471 471
472 472 class predicatematcher(basematcher):
473 473 """A matcher adapter for a simple boolean function"""
474 474
475 475 def __init__(self, predfn, predrepr=None, badfn=None):
476 476 super(predicatematcher, self).__init__(badfn)
477 477 self.matchfn = predfn
478 478 self._predrepr = predrepr
479 479
480 480 @encoding.strmethod
481 481 def __repr__(self):
482 482 s = (stringutil.buildrepr(self._predrepr)
483 483 or pycompat.byterepr(self.matchfn))
484 484 return '<predicatenmatcher pred=%s>' % s
485 485
486 def normalizerootdir(dir, funcname):
487 if dir == '.':
488 util.nouideprecwarn("match.%s() no longer accepts "
489 "'.', use '' instead." % funcname, '5.1')
490 return ''
491 return dir
492
493
486 494 class patternmatcher(basematcher):
487 495 """Matches a set of (kind, pat, source) against a 'root' directory.
488 496
489 497 >>> kindpats = [
490 498 ... (b're', br'.*\.c$', b''),
491 499 ... (b'path', b'foo/a', b''),
492 500 ... (b'relpath', b'b', b''),
493 501 ... (b'glob', b'*.h', b''),
494 502 ... ]
495 503 >>> m = patternmatcher(b'foo', kindpats)
496 504 >>> m(b'main.c') # matches re:.*\.c$
497 505 True
498 506 >>> m(b'b.txt')
499 507 False
500 508 >>> m(b'foo/a') # matches path:foo/a
501 509 True
502 510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
503 511 False
504 512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
505 513 True
506 514 >>> m(b'lib.h') # matches glob:*.h
507 515 True
508 516
509 517 >>> m.files()
510 ['.', 'foo/a', 'b', '.']
518 ['', 'foo/a', 'b', '']
511 519 >>> m.exact(b'foo/a')
512 520 True
513 521 >>> m.exact(b'b')
514 522 True
515 523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
516 524 False
517 525 """
518 526
519 527 def __init__(self, root, kindpats, badfn=None):
520 528 super(patternmatcher, self).__init__(badfn)
521 529
522 530 self._files = _explicitfiles(kindpats)
523 531 self._prefix = _prefix(kindpats)
524 532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
525 533
526 534 @propertycache
527 535 def _dirs(self):
528 return set(util.dirs(self._fileset)) | {'.'}
536 return set(util.dirs(self._fileset)) | {''}
529 537
530 538 def visitdir(self, dir):
539 dir = normalizerootdir(dir, 'visitdir')
531 540 if self._prefix and dir in self._fileset:
532 541 return 'all'
533 return ('.' in self._fileset or
542 return ('' in self._fileset or
534 543 dir in self._fileset or
535 544 dir in self._dirs or
536 545 any(parentdir in self._fileset
537 546 for parentdir in util.finddirs(dir)))
538 547
539 548 def visitchildrenset(self, dir):
540 549 ret = self.visitdir(dir)
541 550 if ret is True:
542 551 return 'this'
543 552 elif not ret:
544 553 return set()
545 554 assert ret == 'all'
546 555 return 'all'
547 556
548 557 def prefix(self):
549 558 return self._prefix
550 559
551 560 @encoding.strmethod
552 561 def __repr__(self):
553 562 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
554 563
555 564 # This is basically a reimplementation of util.dirs that stores the children
556 565 # instead of just a count of them, plus a small optional optimization to avoid
557 566 # some directories we don't need.
558 567 class _dirchildren(object):
559 568 def __init__(self, paths, onlyinclude=None):
560 569 self._dirs = {}
561 570 self._onlyinclude = onlyinclude or []
562 571 addpath = self.addpath
563 572 for f in paths:
564 573 addpath(f)
565 574
566 575 def addpath(self, path):
567 if path == '.':
576 if path == '':
568 577 return
569 578 dirs = self._dirs
570 579 findsplitdirs = _dirchildren._findsplitdirs
571 580 for d, b in findsplitdirs(path):
572 581 if d not in self._onlyinclude:
573 582 continue
574 583 dirs.setdefault(d, set()).add(b)
575 584
576 585 @staticmethod
577 586 def _findsplitdirs(path):
578 587 # yields (dirname, basename) tuples, walking back to the root. This is
579 588 # very similar to util.finddirs, except:
580 589 # - produces a (dirname, basename) tuple, not just 'dirname'
581 590 # - includes root dir
582 591 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
583 # slash, and produces '.' for the root instead of ''.
592 # slash.
584 593 oldpos = len(path)
585 594 pos = path.rfind('/')
586 595 while pos != -1:
587 596 yield path[:pos], path[pos + 1:oldpos]
588 597 oldpos = pos
589 598 pos = path.rfind('/', 0, pos)
590 yield '.', path[:oldpos]
599 yield '', path[:oldpos]
591 600
592 601 def get(self, path):
593 602 return self._dirs.get(path, set())
594 603
595 604 class includematcher(basematcher):
596 605
597 606 def __init__(self, root, kindpats, badfn=None):
598 607 super(includematcher, self).__init__(badfn)
599 608
600 609 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
601 610 self._prefix = _prefix(kindpats)
602 611 roots, dirs, parents = _rootsdirsandparents(kindpats)
603 612 # roots are directories which are recursively included.
604 613 self._roots = set(roots)
605 614 # dirs are directories which are non-recursively included.
606 615 self._dirs = set(dirs)
607 616 # parents are directories which are non-recursively included because
608 617 # they are needed to get to items in _dirs or _roots.
609 618 self._parents = set(parents)
610 619
611 620 def visitdir(self, dir):
621 dir = normalizerootdir(dir, 'visitdir')
612 622 if self._prefix and dir in self._roots:
613 623 return 'all'
614 return ('.' in self._roots or
624 return ('' in self._roots or
615 625 dir in self._roots or
616 626 dir in self._dirs or
617 627 dir in self._parents or
618 628 any(parentdir in self._roots
619 629 for parentdir in util.finddirs(dir)))
620 630
621 631 @propertycache
622 632 def _allparentschildren(self):
623 633 # It may seem odd that we add dirs, roots, and parents, and then
624 634 # restrict to only parents. This is to catch the case of:
625 635 # dirs = ['foo/bar']
626 636 # parents = ['foo']
627 637 # if we asked for the children of 'foo', but had only added
628 638 # self._parents, we wouldn't be able to respond ['bar'].
629 639 return _dirchildren(
630 640 itertools.chain(self._dirs, self._roots, self._parents),
631 641 onlyinclude=self._parents)
632 642
633 643 def visitchildrenset(self, dir):
634 644 if self._prefix and dir in self._roots:
635 645 return 'all'
636 646 # Note: this does *not* include the 'dir in self._parents' case from
637 647 # visitdir, that's handled below.
638 if ('.' in self._roots or
648 if ('' in self._roots or
639 649 dir in self._roots or
640 650 dir in self._dirs or
641 651 any(parentdir in self._roots
642 652 for parentdir in util.finddirs(dir))):
643 653 return 'this'
644 654
645 655 if dir in self._parents:
646 656 return self._allparentschildren.get(dir) or set()
647 657 return set()
648 658
649 659 @encoding.strmethod
650 660 def __repr__(self):
651 661 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
652 662
653 663 class exactmatcher(basematcher):
654 664 r'''Matches the input files exactly. They are interpreted as paths, not
655 665 patterns (so no kind-prefixes).
656 666
657 667 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
658 668 >>> m(b'a.txt')
659 669 True
660 670 >>> m(b'b.txt')
661 671 False
662 672
663 673 Input files that would be matched are exactly those returned by .files()
664 674 >>> m.files()
665 675 ['a.txt', 're:.*\\.c$']
666 676
667 677 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
668 678 >>> m(b'main.c')
669 679 False
670 680 >>> m(br're:.*\.c$')
671 681 True
672 682 '''
673 683
674 684 def __init__(self, files, badfn=None):
675 685 super(exactmatcher, self).__init__(badfn)
676 686
677 687 if isinstance(files, list):
678 688 self._files = files
679 689 else:
680 690 self._files = list(files)
681 691
682 692 matchfn = basematcher.exact
683 693
684 694 @propertycache
685 695 def _dirs(self):
686 return set(util.dirs(self._fileset)) | {'.'}
696 return set(util.dirs(self._fileset)) | {''}
687 697
688 698 def visitdir(self, dir):
699 dir = normalizerootdir(dir, 'visitdir')
689 700 return dir in self._dirs
690 701
691 702 def visitchildrenset(self, dir):
703 dir = normalizerootdir(dir, 'visitchildrenset')
704
692 705 if not self._fileset or dir not in self._dirs:
693 706 return set()
694 707
695 candidates = self._fileset | self._dirs - {'.'}
696 if dir != '.':
708 candidates = self._fileset | self._dirs - {''}
709 if dir != '':
697 710 d = dir + '/'
698 711 candidates = set(c[len(d):] for c in candidates if
699 712 c.startswith(d))
700 713 # self._dirs includes all of the directories, recursively, so if
701 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
714 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
702 715 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
703 716 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
704 717 # immediate subdir will be in there without a slash.
705 718 ret = {c for c in candidates if '/' not in c}
706 719 # We really do not expect ret to be empty, since that would imply that
707 720 # there's something in _dirs that didn't have a file in _fileset.
708 721 assert ret
709 722 return ret
710 723
711 724 def isexact(self):
712 725 return True
713 726
714 727 @encoding.strmethod
715 728 def __repr__(self):
716 729 return ('<exactmatcher files=%r>' % self._files)
717 730
718 731 class differencematcher(basematcher):
719 732 '''Composes two matchers by matching if the first matches and the second
720 733 does not.
721 734
722 735 The second matcher's non-matching-attributes (bad, explicitdir,
723 736 traversedir) are ignored.
724 737 '''
725 738 def __init__(self, m1, m2):
726 739 super(differencematcher, self).__init__()
727 740 self._m1 = m1
728 741 self._m2 = m2
729 742 self.bad = m1.bad
730 743 self.explicitdir = m1.explicitdir
731 744 self.traversedir = m1.traversedir
732 745
733 746 def matchfn(self, f):
734 747 return self._m1(f) and not self._m2(f)
735 748
736 749 @propertycache
737 750 def _files(self):
738 751 if self.isexact():
739 752 return [f for f in self._m1.files() if self(f)]
740 753 # If m1 is not an exact matcher, we can't easily figure out the set of
741 754 # files, because its files() are not always files. For example, if
742 755 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
743 756 # want to remove "dir" from the set even though it would match m2,
744 757 # because the "dir" in m1 may not be a file.
745 758 return self._m1.files()
746 759
747 760 def visitdir(self, dir):
748 761 if self._m2.visitdir(dir) == 'all':
749 762 return False
750 763 elif not self._m2.visitdir(dir):
751 764 # m2 does not match dir, we can return 'all' here if possible
752 765 return self._m1.visitdir(dir)
753 766 return bool(self._m1.visitdir(dir))
754 767
755 768 def visitchildrenset(self, dir):
756 769 m2_set = self._m2.visitchildrenset(dir)
757 770 if m2_set == 'all':
758 771 return set()
759 772 m1_set = self._m1.visitchildrenset(dir)
760 773 # Possible values for m1: 'all', 'this', set(...), set()
761 774 # Possible values for m2: 'this', set(...), set()
762 775 # If m2 has nothing under here that we care about, return m1, even if
763 776 # it's 'all'. This is a change in behavior from visitdir, which would
764 777 # return True, not 'all', for some reason.
765 778 if not m2_set:
766 779 return m1_set
767 780 if m1_set in ['all', 'this']:
768 781 # Never return 'all' here if m2_set is any kind of non-empty (either
769 782 # 'this' or set(foo)), since m2 might return set() for a
770 783 # subdirectory.
771 784 return 'this'
772 785 # Possible values for m1: set(...), set()
773 786 # Possible values for m2: 'this', set(...)
774 787 # We ignore m2's set results. They're possibly incorrect:
775 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
788 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
776 789 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
777 790 # return set(), which is *not* correct, we still need to visit 'dir'!
778 791 return m1_set
779 792
780 793 def isexact(self):
781 794 return self._m1.isexact()
782 795
783 796 @encoding.strmethod
784 797 def __repr__(self):
785 798 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
786 799
787 800 def intersectmatchers(m1, m2):
788 801 '''Composes two matchers by matching if both of them match.
789 802
790 803 The second matcher's non-matching-attributes (bad, explicitdir,
791 804 traversedir) are ignored.
792 805 '''
793 806 if m1 is None or m2 is None:
794 807 return m1 or m2
795 808 if m1.always():
796 809 m = copy.copy(m2)
797 810 # TODO: Consider encapsulating these things in a class so there's only
798 811 # one thing to copy from m1.
799 812 m.bad = m1.bad
800 813 m.explicitdir = m1.explicitdir
801 814 m.traversedir = m1.traversedir
802 815 return m
803 816 if m2.always():
804 817 m = copy.copy(m1)
805 818 return m
806 819 return intersectionmatcher(m1, m2)
807 820
808 821 class intersectionmatcher(basematcher):
809 822 def __init__(self, m1, m2):
810 823 super(intersectionmatcher, self).__init__()
811 824 self._m1 = m1
812 825 self._m2 = m2
813 826 self.bad = m1.bad
814 827 self.explicitdir = m1.explicitdir
815 828 self.traversedir = m1.traversedir
816 829
817 830 @propertycache
818 831 def _files(self):
819 832 if self.isexact():
820 833 m1, m2 = self._m1, self._m2
821 834 if not m1.isexact():
822 835 m1, m2 = m2, m1
823 836 return [f for f in m1.files() if m2(f)]
824 837 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
825 838 # the set of files, because their files() are not always files. For
826 839 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
827 840 # "path:dir2", we don't want to remove "dir2" from the set.
828 841 return self._m1.files() + self._m2.files()
829 842
830 843 def matchfn(self, f):
831 844 return self._m1(f) and self._m2(f)
832 845
833 846 def visitdir(self, dir):
834 847 visit1 = self._m1.visitdir(dir)
835 848 if visit1 == 'all':
836 849 return self._m2.visitdir(dir)
837 850 # bool() because visit1=True + visit2='all' should not be 'all'
838 851 return bool(visit1 and self._m2.visitdir(dir))
839 852
840 853 def visitchildrenset(self, dir):
841 854 m1_set = self._m1.visitchildrenset(dir)
842 855 if not m1_set:
843 856 return set()
844 857 m2_set = self._m2.visitchildrenset(dir)
845 858 if not m2_set:
846 859 return set()
847 860
848 861 if m1_set == 'all':
849 862 return m2_set
850 863 elif m2_set == 'all':
851 864 return m1_set
852 865
853 866 if m1_set == 'this' or m2_set == 'this':
854 867 return 'this'
855 868
856 869 assert isinstance(m1_set, set) and isinstance(m2_set, set)
857 870 return m1_set.intersection(m2_set)
858 871
859 872 def always(self):
860 873 return self._m1.always() and self._m2.always()
861 874
862 875 def isexact(self):
863 876 return self._m1.isexact() or self._m2.isexact()
864 877
865 878 @encoding.strmethod
866 879 def __repr__(self):
867 880 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
868 881
869 882 class subdirmatcher(basematcher):
870 883 """Adapt a matcher to work on a subdirectory only.
871 884
872 885 The paths are remapped to remove/insert the path as needed:
873 886
874 887 >>> from . import pycompat
875 888 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
876 889 >>> m2 = subdirmatcher(b'sub', m1)
877 890 >>> m2(b'a.txt')
878 891 False
879 892 >>> m2(b'b.txt')
880 893 True
881 894 >>> m2.matchfn(b'a.txt')
882 895 False
883 896 >>> m2.matchfn(b'b.txt')
884 897 True
885 898 >>> m2.files()
886 899 ['b.txt']
887 900 >>> m2.exact(b'b.txt')
888 901 True
889 902 >>> def bad(f, msg):
890 903 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
891 904 >>> m1.bad = bad
892 905 >>> m2.bad(b'x.txt', b'No such file')
893 906 sub/x.txt: No such file
894 907 """
895 908
896 909 def __init__(self, path, matcher):
897 910 super(subdirmatcher, self).__init__()
898 911 self._path = path
899 912 self._matcher = matcher
900 913 self._always = matcher.always()
901 914
902 915 self._files = [f[len(path) + 1:] for f in matcher._files
903 916 if f.startswith(path + "/")]
904 917
905 918 # If the parent repo had a path to this subrepo and the matcher is
906 919 # a prefix matcher, this submatcher always matches.
907 920 if matcher.prefix():
908 921 self._always = any(f == path for f in matcher._files)
909 922
910 923 def bad(self, f, msg):
911 924 self._matcher.bad(self._path + "/" + f, msg)
912 925
913 926 def matchfn(self, f):
914 927 # Some information is lost in the superclass's constructor, so we
915 928 # can not accurately create the matching function for the subdirectory
916 929 # from the inputs. Instead, we override matchfn() and visitdir() to
917 930 # call the original matcher with the subdirectory path prepended.
918 931 return self._matcher.matchfn(self._path + "/" + f)
919 932
920 933 def visitdir(self, dir):
921 if dir == '.':
934 dir = normalizerootdir(dir, 'visitdir')
935 if dir == '':
922 936 dir = self._path
923 937 else:
924 938 dir = self._path + "/" + dir
925 939 return self._matcher.visitdir(dir)
926 940
927 941 def visitchildrenset(self, dir):
928 if dir == '.':
942 dir = normalizerootdir(dir, 'visitchildrenset')
943 if dir == '':
929 944 dir = self._path
930 945 else:
931 946 dir = self._path + "/" + dir
932 947 return self._matcher.visitchildrenset(dir)
933 948
934 949 def always(self):
935 950 return self._always
936 951
937 952 def prefix(self):
938 953 return self._matcher.prefix() and not self._always
939 954
940 955 @encoding.strmethod
941 956 def __repr__(self):
942 957 return ('<subdirmatcher path=%r, matcher=%r>' %
943 958 (self._path, self._matcher))
944 959
945 960 class prefixdirmatcher(basematcher):
946 961 """Adapt a matcher to work on a parent directory.
947 962
948 963 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
949 964 ignored.
950 965
951 966 The prefix path should usually be the relative path from the root of
952 967 this matcher to the root of the wrapped matcher.
953 968
954 969 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
955 970 >>> m2 = prefixdirmatcher(b'd/e', m1)
956 971 >>> m2(b'a.txt')
957 972 False
958 973 >>> m2(b'd/e/a.txt')
959 974 True
960 975 >>> m2(b'd/e/b.txt')
961 976 False
962 977 >>> m2.files()
963 978 ['d/e/a.txt', 'd/e/f/b.txt']
964 979 >>> m2.exact(b'd/e/a.txt')
965 980 True
966 981 >>> m2.visitdir(b'd')
967 982 True
968 983 >>> m2.visitdir(b'd/e')
969 984 True
970 985 >>> m2.visitdir(b'd/e/f')
971 986 True
972 987 >>> m2.visitdir(b'd/e/g')
973 988 False
974 989 >>> m2.visitdir(b'd/ef')
975 990 False
976 991 """
977 992
978 993 def __init__(self, path, matcher, badfn=None):
979 994 super(prefixdirmatcher, self).__init__(badfn)
980 995 if not path:
981 996 raise error.ProgrammingError('prefix path must not be empty')
982 997 self._path = path
983 998 self._pathprefix = path + '/'
984 999 self._matcher = matcher
985 1000
986 1001 @propertycache
987 1002 def _files(self):
988 1003 return [self._pathprefix + f for f in self._matcher._files]
989 1004
990 1005 def matchfn(self, f):
991 1006 if not f.startswith(self._pathprefix):
992 1007 return False
993 1008 return self._matcher.matchfn(f[len(self._pathprefix):])
994 1009
995 1010 @propertycache
996 1011 def _pathdirs(self):
997 return set(util.finddirs(self._path)) | {'.'}
1012 return set(util.finddirs(self._path)) | {''}
998 1013
999 1014 def visitdir(self, dir):
1000 1015 if dir == self._path:
1001 return self._matcher.visitdir('.')
1016 return self._matcher.visitdir('')
1002 1017 if dir.startswith(self._pathprefix):
1003 1018 return self._matcher.visitdir(dir[len(self._pathprefix):])
1004 1019 return dir in self._pathdirs
1005 1020
1006 1021 def visitchildrenset(self, dir):
1007 1022 if dir == self._path:
1008 return self._matcher.visitchildrenset('.')
1023 return self._matcher.visitchildrenset('')
1009 1024 if dir.startswith(self._pathprefix):
1010 1025 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1011 1026 if dir in self._pathdirs:
1012 1027 return 'this'
1013 1028 return set()
1014 1029
1015 1030 def isexact(self):
1016 1031 return self._matcher.isexact()
1017 1032
1018 1033 def prefix(self):
1019 1034 return self._matcher.prefix()
1020 1035
1021 1036 @encoding.strmethod
1022 1037 def __repr__(self):
1023 1038 return ('<prefixdirmatcher path=%r, matcher=%r>'
1024 1039 % (pycompat.bytestr(self._path), self._matcher))
1025 1040
1026 1041 class unionmatcher(basematcher):
1027 1042 """A matcher that is the union of several matchers.
1028 1043
1029 1044 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1030 1045 the first matcher.
1031 1046 """
1032 1047
1033 1048 def __init__(self, matchers):
1034 1049 m1 = matchers[0]
1035 1050 super(unionmatcher, self).__init__()
1036 1051 self.explicitdir = m1.explicitdir
1037 1052 self.traversedir = m1.traversedir
1038 1053 self._matchers = matchers
1039 1054
1040 1055 def matchfn(self, f):
1041 1056 for match in self._matchers:
1042 1057 if match(f):
1043 1058 return True
1044 1059 return False
1045 1060
1046 1061 def visitdir(self, dir):
1047 1062 r = False
1048 1063 for m in self._matchers:
1049 1064 v = m.visitdir(dir)
1050 1065 if v == 'all':
1051 1066 return v
1052 1067 r |= v
1053 1068 return r
1054 1069
1055 1070 def visitchildrenset(self, dir):
1056 1071 r = set()
1057 1072 this = False
1058 1073 for m in self._matchers:
1059 1074 v = m.visitchildrenset(dir)
1060 1075 if not v:
1061 1076 continue
1062 1077 if v == 'all':
1063 1078 return v
1064 1079 if this or v == 'this':
1065 1080 this = True
1066 1081 # don't break, we might have an 'all' in here.
1067 1082 continue
1068 1083 assert isinstance(v, set)
1069 1084 r = r.union(v)
1070 1085 if this:
1071 1086 return 'this'
1072 1087 return r
1073 1088
1074 1089 @encoding.strmethod
1075 1090 def __repr__(self):
1076 1091 return ('<unionmatcher matchers=%r>' % self._matchers)
1077 1092
1078 1093 def patkind(pattern, default=None):
1079 1094 '''If pattern is 'kind:pat' with a known kind, return kind.
1080 1095
1081 1096 >>> patkind(br're:.*\.c$')
1082 1097 're'
1083 1098 >>> patkind(b'glob:*.c')
1084 1099 'glob'
1085 1100 >>> patkind(b'relpath:test.py')
1086 1101 'relpath'
1087 1102 >>> patkind(b'main.py')
1088 1103 >>> patkind(b'main.py', default=b're')
1089 1104 're'
1090 1105 '''
1091 1106 return _patsplit(pattern, default)[0]
1092 1107
1093 1108 def _patsplit(pattern, default):
1094 1109 """Split a string into the optional pattern kind prefix and the actual
1095 1110 pattern."""
1096 1111 if ':' in pattern:
1097 1112 kind, pat = pattern.split(':', 1)
1098 1113 if kind in allpatternkinds:
1099 1114 return kind, pat
1100 1115 return default, pattern
1101 1116
1102 1117 def _globre(pat):
1103 1118 r'''Convert an extended glob string to a regexp string.
1104 1119
1105 1120 >>> from . import pycompat
1106 1121 >>> def bprint(s):
1107 1122 ... print(pycompat.sysstr(s))
1108 1123 >>> bprint(_globre(br'?'))
1109 1124 .
1110 1125 >>> bprint(_globre(br'*'))
1111 1126 [^/]*
1112 1127 >>> bprint(_globre(br'**'))
1113 1128 .*
1114 1129 >>> bprint(_globre(br'**/a'))
1115 1130 (?:.*/)?a
1116 1131 >>> bprint(_globre(br'a/**/b'))
1117 1132 a/(?:.*/)?b
1118 1133 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1119 1134 [a*?!^][\^b][^c]
1120 1135 >>> bprint(_globre(br'{a,b}'))
1121 1136 (?:a|b)
1122 1137 >>> bprint(_globre(br'.\*\?'))
1123 1138 \.\*\?
1124 1139 '''
1125 1140 i, n = 0, len(pat)
1126 1141 res = ''
1127 1142 group = 0
1128 1143 escape = util.stringutil.regexbytesescapemap.get
1129 1144 def peek():
1130 1145 return i < n and pat[i:i + 1]
1131 1146 while i < n:
1132 1147 c = pat[i:i + 1]
1133 1148 i += 1
1134 1149 if c not in '*?[{},\\':
1135 1150 res += escape(c, c)
1136 1151 elif c == '*':
1137 1152 if peek() == '*':
1138 1153 i += 1
1139 1154 if peek() == '/':
1140 1155 i += 1
1141 1156 res += '(?:.*/)?'
1142 1157 else:
1143 1158 res += '.*'
1144 1159 else:
1145 1160 res += '[^/]*'
1146 1161 elif c == '?':
1147 1162 res += '.'
1148 1163 elif c == '[':
1149 1164 j = i
1150 1165 if j < n and pat[j:j + 1] in '!]':
1151 1166 j += 1
1152 1167 while j < n and pat[j:j + 1] != ']':
1153 1168 j += 1
1154 1169 if j >= n:
1155 1170 res += '\\['
1156 1171 else:
1157 1172 stuff = pat[i:j].replace('\\','\\\\')
1158 1173 i = j + 1
1159 1174 if stuff[0:1] == '!':
1160 1175 stuff = '^' + stuff[1:]
1161 1176 elif stuff[0:1] == '^':
1162 1177 stuff = '\\' + stuff
1163 1178 res = '%s[%s]' % (res, stuff)
1164 1179 elif c == '{':
1165 1180 group += 1
1166 1181 res += '(?:'
1167 1182 elif c == '}' and group:
1168 1183 res += ')'
1169 1184 group -= 1
1170 1185 elif c == ',' and group:
1171 1186 res += '|'
1172 1187 elif c == '\\':
1173 1188 p = peek()
1174 1189 if p:
1175 1190 i += 1
1176 1191 res += escape(p, p)
1177 1192 else:
1178 1193 res += escape(c, c)
1179 1194 else:
1180 1195 res += escape(c, c)
1181 1196 return res
1182 1197
1183 1198 def _regex(kind, pat, globsuffix):
1184 1199 '''Convert a (normalized) pattern of any kind into a
1185 1200 regular expression.
1186 1201 globsuffix is appended to the regexp of globs.'''
1187 1202
1188 1203 if rustext is not None:
1189 1204 try:
1190 1205 return rustext.filepatterns.build_single_regex(
1191 1206 kind,
1192 1207 pat,
1193 1208 globsuffix
1194 1209 )
1195 1210 except rustext.filepatterns.PatternError:
1196 1211 raise error.ProgrammingError(
1197 1212 'not a regex pattern: %s:%s' % (kind, pat)
1198 1213 )
1199 1214
1200 if not pat:
1215 if not pat and kind in ('glob', 'relpath'):
1201 1216 return ''
1202 1217 if kind == 're':
1203 1218 return pat
1204 1219 if kind in ('path', 'relpath'):
1205 1220 if pat == '.':
1206 1221 return ''
1207 1222 return util.stringutil.reescape(pat) + '(?:/|$)'
1208 1223 if kind == 'rootfilesin':
1209 1224 if pat == '.':
1210 1225 escaped = ''
1211 1226 else:
1212 1227 # Pattern is a directory name.
1213 1228 escaped = util.stringutil.reescape(pat) + '/'
1214 1229 # Anything after the pattern must be a non-directory.
1215 1230 return escaped + '[^/]+$'
1216 1231 if kind == 'relglob':
1217 1232 return '(?:|.*/)' + _globre(pat) + globsuffix
1218 1233 if kind == 'relre':
1219 1234 if pat.startswith('^'):
1220 1235 return pat
1221 1236 return '.*' + pat
1222 1237 if kind in ('glob', 'rootglob'):
1223 1238 return _globre(pat) + globsuffix
1224 1239 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1225 1240
1226 1241 def _buildmatch(kindpats, globsuffix, root):
1227 1242 '''Return regexp string and a matcher function for kindpats.
1228 1243 globsuffix is appended to the regexp of globs.'''
1229 1244 matchfuncs = []
1230 1245
1231 1246 subincludes, kindpats = _expandsubinclude(kindpats, root)
1232 1247 if subincludes:
1233 1248 submatchers = {}
1234 1249 def matchsubinclude(f):
1235 1250 for prefix, matcherargs in subincludes:
1236 1251 if f.startswith(prefix):
1237 1252 mf = submatchers.get(prefix)
1238 1253 if mf is None:
1239 1254 mf = match(*matcherargs)
1240 1255 submatchers[prefix] = mf
1241 1256
1242 1257 if mf(f[len(prefix):]):
1243 1258 return True
1244 1259 return False
1245 1260 matchfuncs.append(matchsubinclude)
1246 1261
1247 1262 regex = ''
1248 1263 if kindpats:
1249 1264 if all(k == 'rootfilesin' for k, p, s in kindpats):
1250 1265 dirs = {p for k, p, s in kindpats}
1251 1266 def mf(f):
1252 1267 i = f.rfind('/')
1253 1268 if i >= 0:
1254 1269 dir = f[:i]
1255 1270 else:
1256 1271 dir = '.'
1257 1272 return dir in dirs
1258 1273 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1259 1274 matchfuncs.append(mf)
1260 1275 else:
1261 1276 regex, mf = _buildregexmatch(kindpats, globsuffix)
1262 1277 matchfuncs.append(mf)
1263 1278
1264 1279 if len(matchfuncs) == 1:
1265 1280 return regex, matchfuncs[0]
1266 1281 else:
1267 1282 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1268 1283
1269 1284 MAX_RE_SIZE = 20000
1270 1285
1271 1286 def _joinregexes(regexps):
1272 1287 """gather multiple regular expressions into a single one"""
1273 1288 return '|'.join(regexps)
1274 1289
1275 1290 def _buildregexmatch(kindpats, globsuffix):
1276 1291 """Build a match function from a list of kinds and kindpats,
1277 1292 return regexp string and a matcher function.
1278 1293
1279 1294 Test too large input
1280 1295 >>> _buildregexmatch([
1281 1296 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1282 1297 ... ], b'$')
1283 1298 Traceback (most recent call last):
1284 1299 ...
1285 1300 Abort: matcher pattern is too long (20009 bytes)
1286 1301 """
1287 1302 try:
1288 1303 allgroups = []
1289 1304 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1290 1305 fullregexp = _joinregexes(regexps)
1291 1306
1292 1307 startidx = 0
1293 1308 groupsize = 0
1294 1309 for idx, r in enumerate(regexps):
1295 1310 piecesize = len(r)
1296 1311 if piecesize > MAX_RE_SIZE:
1297 1312 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1298 1313 raise error.Abort(msg)
1299 1314 elif (groupsize + piecesize) > MAX_RE_SIZE:
1300 1315 group = regexps[startidx:idx]
1301 1316 allgroups.append(_joinregexes(group))
1302 1317 startidx = idx
1303 1318 groupsize = 0
1304 1319 groupsize += piecesize + 1
1305 1320
1306 1321 if startidx == 0:
1307 1322 matcher = _rematcher(fullregexp)
1308 1323 func = lambda s: bool(matcher(s))
1309 1324 else:
1310 1325 group = regexps[startidx:]
1311 1326 allgroups.append(_joinregexes(group))
1312 1327 allmatchers = [_rematcher(g) for g in allgroups]
1313 1328 func = lambda s: any(m(s) for m in allmatchers)
1314 1329 return fullregexp, func
1315 1330 except re.error:
1316 1331 for k, p, s in kindpats:
1317 1332 try:
1318 1333 _rematcher(_regex(k, p, globsuffix))
1319 1334 except re.error:
1320 1335 if s:
1321 1336 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1322 1337 (s, k, p))
1323 1338 else:
1324 1339 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1325 1340 raise error.Abort(_("invalid pattern"))
1326 1341
1327 1342 def _patternrootsanddirs(kindpats):
1328 1343 '''Returns roots and directories corresponding to each pattern.
1329 1344
1330 1345 This calculates the roots and directories exactly matching the patterns and
1331 1346 returns a tuple of (roots, dirs) for each. It does not return other
1332 1347 directories which may also need to be considered, like the parent
1333 1348 directories.
1334 1349 '''
1335 1350 r = []
1336 1351 d = []
1337 1352 for kind, pat, source in kindpats:
1338 1353 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1339 1354 root = []
1340 1355 for p in pat.split('/'):
1341 1356 if '[' in p or '{' in p or '*' in p or '?' in p:
1342 1357 break
1343 1358 root.append(p)
1344 r.append('/'.join(root) or '.')
1359 r.append('/'.join(root))
1345 1360 elif kind in ('relpath', 'path'):
1346 r.append(pat or '.')
1361 if pat == '.':
1362 pat = ''
1363 r.append(pat)
1347 1364 elif kind in ('rootfilesin',):
1348 d.append(pat or '.')
1365 if pat == '.':
1366 pat = ''
1367 d.append(pat)
1349 1368 else: # relglob, re, relre
1350 r.append('.')
1369 r.append('')
1351 1370 return r, d
1352 1371
1353 1372 def _roots(kindpats):
1354 1373 '''Returns root directories to match recursively from the given patterns.'''
1355 1374 roots, dirs = _patternrootsanddirs(kindpats)
1356 1375 return roots
1357 1376
1358 1377 def _rootsdirsandparents(kindpats):
1359 1378 '''Returns roots and exact directories from patterns.
1360 1379
1361 1380 `roots` are directories to match recursively, `dirs` should
1362 1381 be matched non-recursively, and `parents` are the implicitly required
1363 1382 directories to walk to items in either roots or dirs.
1364 1383
1365 1384 Returns a tuple of (roots, dirs, parents).
1366 1385
1367 1386 >>> _rootsdirsandparents(
1368 1387 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1369 1388 ... (b'glob', b'g*', b'')])
1370 (['g/h', 'g/h', '.'], [], ['g', '.'])
1389 (['g/h', 'g/h', ''], [], ['g', ''])
1371 1390 >>> _rootsdirsandparents(
1372 1391 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1373 ([], ['g/h', '.'], ['g', '.'])
1392 ([], ['g/h', ''], ['g', ''])
1374 1393 >>> _rootsdirsandparents(
1375 1394 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1376 1395 ... (b'path', b'', b'')])
1377 (['r', 'p/p', '.'], [], ['p', '.'])
1396 (['r', 'p/p', ''], [], ['p', ''])
1378 1397 >>> _rootsdirsandparents(
1379 1398 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1380 1399 ... (b'relre', b'rr', b'')])
1381 (['.', '.', '.'], [], ['.'])
1400 (['', '', ''], [], [''])
1382 1401 '''
1383 1402 r, d = _patternrootsanddirs(kindpats)
1384 1403
1385 1404 p = []
1386 1405 # Append the parents as non-recursive/exact directories, since they must be
1387 1406 # scanned to get to either the roots or the other exact directories.
1388 1407 p.extend(util.dirs(d))
1389 1408 p.extend(util.dirs(r))
1390 1409 # util.dirs() does not include the root directory, so add it manually
1391 p.append('.')
1410 p.append('')
1392 1411
1393 1412 # FIXME: all uses of this function convert these to sets, do so before
1394 1413 # returning.
1395 1414 # FIXME: all uses of this function do not need anything in 'roots' and
1396 1415 # 'dirs' to also be in 'parents', consider removing them before returning.
1397 1416 return r, d, p
1398 1417
1399 1418 def _explicitfiles(kindpats):
1400 1419 '''Returns the potential explicit filenames from the patterns.
1401 1420
1402 1421 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1403 1422 ['foo/bar']
1404 1423 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1405 1424 []
1406 1425 '''
1407 1426 # Keep only the pattern kinds where one can specify filenames (vs only
1408 1427 # directory names).
1409 1428 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1410 1429 return _roots(filable)
1411 1430
1412 1431 def _prefix(kindpats):
1413 1432 '''Whether all the patterns match a prefix (i.e. recursively)'''
1414 1433 for kind, pat, source in kindpats:
1415 1434 if kind not in ('path', 'relpath'):
1416 1435 return False
1417 1436 return True
1418 1437
1419 1438 _commentre = None
1420 1439
1421 1440 def readpatternfile(filepath, warn, sourceinfo=False):
1422 1441 '''parse a pattern file, returning a list of
1423 1442 patterns. These patterns should be given to compile()
1424 1443 to be validated and converted into a match function.
1425 1444
1426 1445 trailing white space is dropped.
1427 1446 the escape character is backslash.
1428 1447 comments start with #.
1429 1448 empty lines are skipped.
1430 1449
1431 1450 lines can be of the following formats:
1432 1451
1433 1452 syntax: regexp # defaults following lines to non-rooted regexps
1434 1453 syntax: glob # defaults following lines to non-rooted globs
1435 1454 re:pattern # non-rooted regular expression
1436 1455 glob:pattern # non-rooted glob
1437 1456 rootglob:pat # rooted glob (same root as ^ in regexps)
1438 1457 pattern # pattern of the current default type
1439 1458
1440 1459 if sourceinfo is set, returns a list of tuples:
1441 1460 (pattern, lineno, originalline).
1442 1461 This is useful to debug ignore patterns.
1443 1462 '''
1444 1463
1445 1464 if rustext is not None:
1446 1465 result, warnings = rustext.filepatterns.read_pattern_file(
1447 1466 filepath,
1448 1467 bool(warn),
1449 1468 sourceinfo,
1450 1469 )
1451 1470
1452 1471 for warning_params in warnings:
1453 1472 # Can't be easily emitted from Rust, because it would require
1454 1473 # a mechanism for both gettext and calling the `warn` function.
1455 1474 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1456 1475
1457 1476 return result
1458 1477
1459 1478 syntaxes = {
1460 1479 're': 'relre:',
1461 1480 'regexp': 'relre:',
1462 1481 'glob': 'relglob:',
1463 1482 'rootglob': 'rootglob:',
1464 1483 'include': 'include',
1465 1484 'subinclude': 'subinclude',
1466 1485 }
1467 1486 syntax = 'relre:'
1468 1487 patterns = []
1469 1488
1470 1489 fp = open(filepath, 'rb')
1471 1490 for lineno, line in enumerate(util.iterfile(fp), start=1):
1472 1491 if "#" in line:
1473 1492 global _commentre
1474 1493 if not _commentre:
1475 1494 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1476 1495 # remove comments prefixed by an even number of escapes
1477 1496 m = _commentre.search(line)
1478 1497 if m:
1479 1498 line = line[:m.end(1)]
1480 1499 # fixup properly escaped comments that survived the above
1481 1500 line = line.replace("\\#", "#")
1482 1501 line = line.rstrip()
1483 1502 if not line:
1484 1503 continue
1485 1504
1486 1505 if line.startswith('syntax:'):
1487 1506 s = line[7:].strip()
1488 1507 try:
1489 1508 syntax = syntaxes[s]
1490 1509 except KeyError:
1491 1510 if warn:
1492 1511 warn(_("%s: ignoring invalid syntax '%s'\n") %
1493 1512 (filepath, s))
1494 1513 continue
1495 1514
1496 1515 linesyntax = syntax
1497 1516 for s, rels in syntaxes.iteritems():
1498 1517 if line.startswith(rels):
1499 1518 linesyntax = rels
1500 1519 line = line[len(rels):]
1501 1520 break
1502 1521 elif line.startswith(s+':'):
1503 1522 linesyntax = rels
1504 1523 line = line[len(s) + 1:]
1505 1524 break
1506 1525 if sourceinfo:
1507 1526 patterns.append((linesyntax + line, lineno, line))
1508 1527 else:
1509 1528 patterns.append(linesyntax + line)
1510 1529 fp.close()
1511 1530 return patterns
@@ -1,654 +1,654 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import functools
12 12 import hashlib
13 13 import os
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from . import (
18 18 error,
19 19 node,
20 20 policy,
21 21 pycompat,
22 22 util,
23 23 vfs as vfsmod,
24 24 )
25 25
26 26 parsers = policy.importmod(r'parsers')
27 27 # how much bytes should be read from fncache in one read
28 28 # It is done to prevent loading large fncache files into memory
29 29 fncache_chunksize = 10 ** 6
30 30
31 31 def _matchtrackedpath(path, matcher):
32 32 """parses a fncache entry and returns whether the entry is tracking a path
33 33 matched by matcher or not.
34 34
35 35 If matcher is None, returns True"""
36 36
37 37 if matcher is None:
38 38 return True
39 39 path = decodedir(path)
40 40 if path.startswith('data/'):
41 41 return matcher(path[len('data/'):-len('.i')])
42 42 elif path.startswith('meta/'):
43 return matcher.visitdir(path[len('meta/'):-len('/00manifest.i')] or '.')
43 return matcher.visitdir(path[len('meta/'):-len('/00manifest.i')])
44 44
45 45 raise error.ProgrammingError("cannot decode path %s" % path)
46 46
47 47 # This avoids a collision between a file named foo and a dir named
48 48 # foo.i or foo.d
49 49 def _encodedir(path):
50 50 '''
51 51 >>> _encodedir(b'data/foo.i')
52 52 'data/foo.i'
53 53 >>> _encodedir(b'data/foo.i/bla.i')
54 54 'data/foo.i.hg/bla.i'
55 55 >>> _encodedir(b'data/foo.i.hg/bla.i')
56 56 'data/foo.i.hg.hg/bla.i'
57 57 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
58 58 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
59 59 '''
60 60 return (path
61 61 .replace(".hg/", ".hg.hg/")
62 62 .replace(".i/", ".i.hg/")
63 63 .replace(".d/", ".d.hg/"))
64 64
65 65 encodedir = getattr(parsers, 'encodedir', _encodedir)
66 66
67 67 def decodedir(path):
68 68 '''
69 69 >>> decodedir(b'data/foo.i')
70 70 'data/foo.i'
71 71 >>> decodedir(b'data/foo.i.hg/bla.i')
72 72 'data/foo.i/bla.i'
73 73 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
74 74 'data/foo.i.hg/bla.i'
75 75 '''
76 76 if ".hg/" not in path:
77 77 return path
78 78 return (path
79 79 .replace(".d.hg/", ".d/")
80 80 .replace(".i.hg/", ".i/")
81 81 .replace(".hg.hg/", ".hg/"))
82 82
83 83 def _reserved():
84 84 ''' characters that are problematic for filesystems
85 85
86 86 * ascii escapes (0..31)
87 87 * ascii hi (126..255)
88 88 * windows specials
89 89
90 90 these characters will be escaped by encodefunctions
91 91 '''
92 92 winreserved = [ord(x) for x in u'\\:*?"<>|']
93 93 for x in range(32):
94 94 yield x
95 95 for x in range(126, 256):
96 96 yield x
97 97 for x in winreserved:
98 98 yield x
99 99
100 100 def _buildencodefun():
101 101 '''
102 102 >>> enc, dec = _buildencodefun()
103 103
104 104 >>> enc(b'nothing/special.txt')
105 105 'nothing/special.txt'
106 106 >>> dec(b'nothing/special.txt')
107 107 'nothing/special.txt'
108 108
109 109 >>> enc(b'HELLO')
110 110 '_h_e_l_l_o'
111 111 >>> dec(b'_h_e_l_l_o')
112 112 'HELLO'
113 113
114 114 >>> enc(b'hello:world?')
115 115 'hello~3aworld~3f'
116 116 >>> dec(b'hello~3aworld~3f')
117 117 'hello:world?'
118 118
119 119 >>> enc(b'the\\x07quick\\xADshot')
120 120 'the~07quick~adshot'
121 121 >>> dec(b'the~07quick~adshot')
122 122 'the\\x07quick\\xadshot'
123 123 '''
124 124 e = '_'
125 125 xchr = pycompat.bytechr
126 126 asciistr = list(map(xchr, range(127)))
127 127 capitals = list(range(ord("A"), ord("Z") + 1))
128 128
129 129 cmap = dict((x, x) for x in asciistr)
130 130 for x in _reserved():
131 131 cmap[xchr(x)] = "~%02x" % x
132 132 for x in capitals + [ord(e)]:
133 133 cmap[xchr(x)] = e + xchr(x).lower()
134 134
135 135 dmap = {}
136 136 for k, v in cmap.iteritems():
137 137 dmap[v] = k
138 138 def decode(s):
139 139 i = 0
140 140 while i < len(s):
141 141 for l in pycompat.xrange(1, 4):
142 142 try:
143 143 yield dmap[s[i:i + l]]
144 144 i += l
145 145 break
146 146 except KeyError:
147 147 pass
148 148 else:
149 149 raise KeyError
150 150 return (lambda s: ''.join([cmap[s[c:c + 1]]
151 151 for c in pycompat.xrange(len(s))]),
152 152 lambda s: ''.join(list(decode(s))))
153 153
154 154 _encodefname, _decodefname = _buildencodefun()
155 155
156 156 def encodefilename(s):
157 157 '''
158 158 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
159 159 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
160 160 '''
161 161 return _encodefname(encodedir(s))
162 162
163 163 def decodefilename(s):
164 164 '''
165 165 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
166 166 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
167 167 '''
168 168 return decodedir(_decodefname(s))
169 169
170 170 def _buildlowerencodefun():
171 171 '''
172 172 >>> f = _buildlowerencodefun()
173 173 >>> f(b'nothing/special.txt')
174 174 'nothing/special.txt'
175 175 >>> f(b'HELLO')
176 176 'hello'
177 177 >>> f(b'hello:world?')
178 178 'hello~3aworld~3f'
179 179 >>> f(b'the\\x07quick\\xADshot')
180 180 'the~07quick~adshot'
181 181 '''
182 182 xchr = pycompat.bytechr
183 183 cmap = dict([(xchr(x), xchr(x)) for x in pycompat.xrange(127)])
184 184 for x in _reserved():
185 185 cmap[xchr(x)] = "~%02x" % x
186 186 for x in range(ord("A"), ord("Z") + 1):
187 187 cmap[xchr(x)] = xchr(x).lower()
188 188 def lowerencode(s):
189 189 return "".join([cmap[c] for c in pycompat.iterbytestr(s)])
190 190 return lowerencode
191 191
192 192 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
193 193
194 194 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
195 195 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
196 196 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
197 197 def _auxencode(path, dotencode):
198 198 '''
199 199 Encodes filenames containing names reserved by Windows or which end in
200 200 period or space. Does not touch other single reserved characters c.
201 201 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
202 202 Additionally encodes space or period at the beginning, if dotencode is
203 203 True. Parameter path is assumed to be all lowercase.
204 204 A segment only needs encoding if a reserved name appears as a
205 205 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
206 206 doesn't need encoding.
207 207
208 208 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
209 209 >>> _auxencode(s.split(b'/'), True)
210 210 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
211 211 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
212 212 >>> _auxencode(s.split(b'/'), False)
213 213 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
214 214 >>> _auxencode([b'foo. '], True)
215 215 ['foo.~20']
216 216 >>> _auxencode([b' .foo'], True)
217 217 ['~20.foo']
218 218 '''
219 219 for i, n in enumerate(path):
220 220 if not n:
221 221 continue
222 222 if dotencode and n[0] in '. ':
223 223 n = "~%02x" % ord(n[0:1]) + n[1:]
224 224 path[i] = n
225 225 else:
226 226 l = n.find('.')
227 227 if l == -1:
228 228 l = len(n)
229 229 if ((l == 3 and n[:3] in _winres3) or
230 230 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
231 231 and n[:3] in _winres4)):
232 232 # encode third letter ('aux' -> 'au~78')
233 233 ec = "~%02x" % ord(n[2:3])
234 234 n = n[0:2] + ec + n[3:]
235 235 path[i] = n
236 236 if n[-1] in '. ':
237 237 # encode last period or space ('foo...' -> 'foo..~2e')
238 238 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
239 239 return path
240 240
241 241 _maxstorepathlen = 120
242 242 _dirprefixlen = 8
243 243 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
244 244
245 245 def _hashencode(path, dotencode):
246 246 digest = node.hex(hashlib.sha1(path).digest())
247 247 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
248 248 parts = _auxencode(le, dotencode)
249 249 basename = parts[-1]
250 250 _root, ext = os.path.splitext(basename)
251 251 sdirs = []
252 252 sdirslen = 0
253 253 for p in parts[:-1]:
254 254 d = p[:_dirprefixlen]
255 255 if d[-1] in '. ':
256 256 # Windows can't access dirs ending in period or space
257 257 d = d[:-1] + '_'
258 258 if sdirslen == 0:
259 259 t = len(d)
260 260 else:
261 261 t = sdirslen + 1 + len(d)
262 262 if t > _maxshortdirslen:
263 263 break
264 264 sdirs.append(d)
265 265 sdirslen = t
266 266 dirs = '/'.join(sdirs)
267 267 if len(dirs) > 0:
268 268 dirs += '/'
269 269 res = 'dh/' + dirs + digest + ext
270 270 spaceleft = _maxstorepathlen - len(res)
271 271 if spaceleft > 0:
272 272 filler = basename[:spaceleft]
273 273 res = 'dh/' + dirs + filler + digest + ext
274 274 return res
275 275
276 276 def _hybridencode(path, dotencode):
277 277 '''encodes path with a length limit
278 278
279 279 Encodes all paths that begin with 'data/', according to the following.
280 280
281 281 Default encoding (reversible):
282 282
283 283 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
284 284 characters are encoded as '~xx', where xx is the two digit hex code
285 285 of the character (see encodefilename).
286 286 Relevant path components consisting of Windows reserved filenames are
287 287 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
288 288
289 289 Hashed encoding (not reversible):
290 290
291 291 If the default-encoded path is longer than _maxstorepathlen, a
292 292 non-reversible hybrid hashing of the path is done instead.
293 293 This encoding uses up to _dirprefixlen characters of all directory
294 294 levels of the lowerencoded path, but not more levels than can fit into
295 295 _maxshortdirslen.
296 296 Then follows the filler followed by the sha digest of the full path.
297 297 The filler is the beginning of the basename of the lowerencoded path
298 298 (the basename is everything after the last path separator). The filler
299 299 is as long as possible, filling in characters from the basename until
300 300 the encoded path has _maxstorepathlen characters (or all chars of the
301 301 basename have been taken).
302 302 The extension (e.g. '.i' or '.d') is preserved.
303 303
304 304 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
305 305 encoding was used.
306 306 '''
307 307 path = encodedir(path)
308 308 ef = _encodefname(path).split('/')
309 309 res = '/'.join(_auxencode(ef, dotencode))
310 310 if len(res) > _maxstorepathlen:
311 311 res = _hashencode(path, dotencode)
312 312 return res
313 313
314 314 def _pathencode(path):
315 315 de = encodedir(path)
316 316 if len(path) > _maxstorepathlen:
317 317 return _hashencode(de, True)
318 318 ef = _encodefname(de).split('/')
319 319 res = '/'.join(_auxencode(ef, True))
320 320 if len(res) > _maxstorepathlen:
321 321 return _hashencode(de, True)
322 322 return res
323 323
324 324 _pathencode = getattr(parsers, 'pathencode', _pathencode)
325 325
326 326 def _plainhybridencode(f):
327 327 return _hybridencode(f, False)
328 328
329 329 def _calcmode(vfs):
330 330 try:
331 331 # files in .hg/ will be created using this mode
332 332 mode = vfs.stat().st_mode
333 333 # avoid some useless chmods
334 334 if (0o777 & ~util.umask) == (0o777 & mode):
335 335 mode = None
336 336 except OSError:
337 337 mode = None
338 338 return mode
339 339
340 340 _data = ('bookmarks narrowspec data meta 00manifest.d 00manifest.i'
341 341 ' 00changelog.d 00changelog.i phaseroots obsstore')
342 342
343 343 def isrevlog(f, kind, st):
344 344 return kind == stat.S_IFREG and f[-2:] in ('.i', '.d')
345 345
346 346 class basicstore(object):
347 347 '''base class for local repository stores'''
348 348 def __init__(self, path, vfstype):
349 349 vfs = vfstype(path)
350 350 self.path = vfs.base
351 351 self.createmode = _calcmode(vfs)
352 352 vfs.createmode = self.createmode
353 353 self.rawvfs = vfs
354 354 self.vfs = vfsmod.filtervfs(vfs, encodedir)
355 355 self.opener = self.vfs
356 356
357 357 def join(self, f):
358 358 return self.path + '/' + encodedir(f)
359 359
360 360 def _walk(self, relpath, recurse, filefilter=isrevlog):
361 361 '''yields (unencoded, encoded, size)'''
362 362 path = self.path
363 363 if relpath:
364 364 path += '/' + relpath
365 365 striplen = len(self.path) + 1
366 366 l = []
367 367 if self.rawvfs.isdir(path):
368 368 visit = [path]
369 369 readdir = self.rawvfs.readdir
370 370 while visit:
371 371 p = visit.pop()
372 372 for f, kind, st in readdir(p, stat=True):
373 373 fp = p + '/' + f
374 374 if filefilter(f, kind, st):
375 375 n = util.pconvert(fp[striplen:])
376 376 l.append((decodedir(n), n, st.st_size))
377 377 elif kind == stat.S_IFDIR and recurse:
378 378 visit.append(fp)
379 379 l.sort()
380 380 return l
381 381
382 382 def datafiles(self, matcher=None):
383 383 return self._walk('data', True) + self._walk('meta', True)
384 384
385 385 def topfiles(self):
386 386 # yield manifest before changelog
387 387 return reversed(self._walk('', False))
388 388
389 389 def walk(self, matcher=None):
390 390 '''yields (unencoded, encoded, size)
391 391
392 392 if a matcher is passed, storage files of only those tracked paths
393 393 are passed with matches the matcher
394 394 '''
395 395 # yield data files first
396 396 for x in self.datafiles(matcher):
397 397 yield x
398 398 for x in self.topfiles():
399 399 yield x
400 400
401 401 def copylist(self):
402 402 return ['requires'] + _data.split()
403 403
404 404 def write(self, tr):
405 405 pass
406 406
407 407 def invalidatecaches(self):
408 408 pass
409 409
410 410 def markremoved(self, fn):
411 411 pass
412 412
413 413 def __contains__(self, path):
414 414 '''Checks if the store contains path'''
415 415 path = "/".join(("data", path))
416 416 # file?
417 417 if self.vfs.exists(path + ".i"):
418 418 return True
419 419 # dir?
420 420 if not path.endswith("/"):
421 421 path = path + "/"
422 422 return self.vfs.exists(path)
423 423
424 424 class encodedstore(basicstore):
425 425 def __init__(self, path, vfstype):
426 426 vfs = vfstype(path + '/store')
427 427 self.path = vfs.base
428 428 self.createmode = _calcmode(vfs)
429 429 vfs.createmode = self.createmode
430 430 self.rawvfs = vfs
431 431 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
432 432 self.opener = self.vfs
433 433
434 434 def datafiles(self, matcher=None):
435 435 for a, b, size in super(encodedstore, self).datafiles():
436 436 try:
437 437 a = decodefilename(a)
438 438 except KeyError:
439 439 a = None
440 440 if a is not None and not _matchtrackedpath(a, matcher):
441 441 continue
442 442 yield a, b, size
443 443
444 444 def join(self, f):
445 445 return self.path + '/' + encodefilename(f)
446 446
447 447 def copylist(self):
448 448 return (['requires', '00changelog.i'] +
449 449 ['store/' + f for f in _data.split()])
450 450
451 451 class fncache(object):
452 452 # the filename used to be partially encoded
453 453 # hence the encodedir/decodedir dance
454 454 def __init__(self, vfs):
455 455 self.vfs = vfs
456 456 self.entries = None
457 457 self._dirty = False
458 458 # set of new additions to fncache
459 459 self.addls = set()
460 460
461 461 def _load(self):
462 462 '''fill the entries from the fncache file'''
463 463 self._dirty = False
464 464 try:
465 465 fp = self.vfs('fncache', mode='rb')
466 466 except IOError:
467 467 # skip nonexistent file
468 468 self.entries = set()
469 469 return
470 470
471 471 self.entries = set()
472 472 chunk = b''
473 473 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
474 474 chunk += c
475 475 try:
476 476 p = chunk.rindex(b'\n')
477 477 self.entries.update(decodedir(chunk[:p + 1]).splitlines())
478 478 chunk = chunk[p + 1:]
479 479 except ValueError:
480 480 # substring '\n' not found, maybe the entry is bigger than the
481 481 # chunksize, so let's keep iterating
482 482 pass
483 483
484 484 if chunk:
485 485 raise error.Abort(_("fncache does not ends with a newline"),
486 486 hint=_("use 'hg debugrebuildfncache' to rebuild"
487 487 " the fncache"))
488 488 self._checkentries(fp)
489 489 fp.close()
490 490
491 491 def _checkentries(self, fp):
492 492 """ make sure there is no empty string in entries """
493 493 if '' in self.entries:
494 494 fp.seek(0)
495 495 for n, line in enumerate(util.iterfile(fp)):
496 496 if not line.rstrip('\n'):
497 497 t = _('invalid entry in fncache, line %d') % (n + 1)
498 498 raise error.Abort(t)
499 499
500 500 def write(self, tr):
501 501 if self._dirty:
502 502 assert self.entries is not None
503 503 self.entries = self.entries | self.addls
504 504 self.addls = set()
505 505 tr.addbackup('fncache')
506 506 fp = self.vfs('fncache', mode='wb', atomictemp=True)
507 507 if self.entries:
508 508 fp.write(encodedir('\n'.join(self.entries) + '\n'))
509 509 fp.close()
510 510 self._dirty = False
511 511 if self.addls:
512 512 # if we have just new entries, let's append them to the fncache
513 513 tr.addbackup('fncache')
514 514 fp = self.vfs('fncache', mode='ab', atomictemp=True)
515 515 if self.addls:
516 516 fp.write(encodedir('\n'.join(self.addls) + '\n'))
517 517 fp.close()
518 518 self.entries = None
519 519 self.addls = set()
520 520
521 521 def add(self, fn):
522 522 if self.entries is None:
523 523 self._load()
524 524 if fn not in self.entries:
525 525 self.addls.add(fn)
526 526
527 527 def remove(self, fn):
528 528 if self.entries is None:
529 529 self._load()
530 530 if fn in self.addls:
531 531 self.addls.remove(fn)
532 532 return
533 533 try:
534 534 self.entries.remove(fn)
535 535 self._dirty = True
536 536 except KeyError:
537 537 pass
538 538
539 539 def __contains__(self, fn):
540 540 if fn in self.addls:
541 541 return True
542 542 if self.entries is None:
543 543 self._load()
544 544 return fn in self.entries
545 545
546 546 def __iter__(self):
547 547 if self.entries is None:
548 548 self._load()
549 549 return iter(self.entries | self.addls)
550 550
551 551 class _fncachevfs(vfsmod.proxyvfs):
552 552 def __init__(self, vfs, fnc, encode):
553 553 vfsmod.proxyvfs.__init__(self, vfs)
554 554 self.fncache = fnc
555 555 self.encode = encode
556 556
557 557 def __call__(self, path, mode='r', *args, **kw):
558 558 encoded = self.encode(path)
559 559 if mode not in ('r', 'rb') and (path.startswith('data/') or
560 560 path.startswith('meta/')):
561 561 # do not trigger a fncache load when adding a file that already is
562 562 # known to exist.
563 563 notload = self.fncache.entries is None and self.vfs.exists(encoded)
564 564 if notload and 'a' in mode and not self.vfs.stat(encoded).st_size:
565 565 # when appending to an existing file, if the file has size zero,
566 566 # it should be considered as missing. Such zero-size files are
567 567 # the result of truncation when a transaction is aborted.
568 568 notload = False
569 569 if not notload:
570 570 self.fncache.add(path)
571 571 return self.vfs(encoded, mode, *args, **kw)
572 572
573 573 def join(self, path):
574 574 if path:
575 575 return self.vfs.join(self.encode(path))
576 576 else:
577 577 return self.vfs.join(path)
578 578
579 579 class fncachestore(basicstore):
580 580 def __init__(self, path, vfstype, dotencode):
581 581 if dotencode:
582 582 encode = _pathencode
583 583 else:
584 584 encode = _plainhybridencode
585 585 self.encode = encode
586 586 vfs = vfstype(path + '/store')
587 587 self.path = vfs.base
588 588 self.pathsep = self.path + '/'
589 589 self.createmode = _calcmode(vfs)
590 590 vfs.createmode = self.createmode
591 591 self.rawvfs = vfs
592 592 fnc = fncache(vfs)
593 593 self.fncache = fnc
594 594 self.vfs = _fncachevfs(vfs, fnc, encode)
595 595 self.opener = self.vfs
596 596
597 597 def join(self, f):
598 598 return self.pathsep + self.encode(f)
599 599
600 600 def getsize(self, path):
601 601 return self.rawvfs.stat(path).st_size
602 602
603 603 def datafiles(self, matcher=None):
604 604 for f in sorted(self.fncache):
605 605 if not _matchtrackedpath(f, matcher):
606 606 continue
607 607 ef = self.encode(f)
608 608 try:
609 609 yield f, ef, self.getsize(ef)
610 610 except OSError as err:
611 611 if err.errno != errno.ENOENT:
612 612 raise
613 613
614 614 def copylist(self):
615 615 d = ('bookmarks narrowspec data meta dh fncache phaseroots obsstore'
616 616 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
617 617 return (['requires', '00changelog.i'] +
618 618 ['store/' + f for f in d.split()])
619 619
620 620 def write(self, tr):
621 621 self.fncache.write(tr)
622 622
623 623 def invalidatecaches(self):
624 624 self.fncache.entries = None
625 625 self.fncache.addls = set()
626 626
627 627 def markremoved(self, fn):
628 628 self.fncache.remove(fn)
629 629
630 630 def _exists(self, f):
631 631 ef = self.encode(f)
632 632 try:
633 633 self.getsize(ef)
634 634 return True
635 635 except OSError as err:
636 636 if err.errno != errno.ENOENT:
637 637 raise
638 638 # nonexistent entry
639 639 return False
640 640
641 641 def __contains__(self, path):
642 642 '''Checks if the store contains path'''
643 643 path = "/".join(("data", path))
644 644 # check for files (exact match)
645 645 e = path + '.i'
646 646 if e in self.fncache and self._exists(e):
647 647 return True
648 648 # now check for directories (prefix match)
649 649 if not path.endswith('/'):
650 650 path += '/'
651 651 for e in self.fncache:
652 652 if e.startswith(path) and self._exists(e):
653 653 return True
654 654 return False
@@ -1,31 +1,34 b''
1 1 == New Features ==
2 2
3 3 * New config `commands.commit.post-status` shows status after successful
4 4 commit.
5 5
6 6
7 7 == New Experimental Features ==
8 8
9 9 * New config `experimental.log.topo` makes `hg log -G` use
10 10 topological sorting. This is especially useful for aliases since it
11 11 lets the alias accept an `-r` option while still using topological
12 12 sorting with or without the `-r` (unlike if you use the `sort(...,
13 13 topo)` revset).
14 14
15 15
16 16 == Bug Fixes ==
17 17
18 18
19 19 == Backwards Compatibility Changes ==
20 20
21 21 * Removed (experimental) support for log graph lines mixing
22 22 parent/grandparent styles. Setting
23 23 e.g. `experimental.graphstyle.parent = !` and
24 24 `experimental.graphstyle.grandparent = 3.` would use `!` for the
25 25 first three lines of the graph and then `.`. This is no longer
26 26 supported.
27 27
28 28
29 29 == Internal API Changes ==
30 30
31 31 * Matchers are no longer iterable. Use `match.files()` instead.
32
33 * `match.visitdir()` and `match.visitchildrenset()` now expect the
34 empty string instead of '.' to indicate the root directory.
@@ -1,832 +1,832 b''
1 1 from __future__ import absolute_import
2 2
3 3 import unittest
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import (
8 8 match as matchmod,
9 9 util,
10 10 )
11 11
12 12 class BaseMatcherTests(unittest.TestCase):
13 13
14 14 def testVisitdir(self):
15 15 m = matchmod.basematcher()
16 self.assertTrue(m.visitdir(b'.'))
16 self.assertTrue(m.visitdir(b''))
17 17 self.assertTrue(m.visitdir(b'dir'))
18 18
19 19 def testVisitchildrenset(self):
20 20 m = matchmod.basematcher()
21 self.assertEqual(m.visitchildrenset(b'.'), b'this')
21 self.assertEqual(m.visitchildrenset(b''), b'this')
22 22 self.assertEqual(m.visitchildrenset(b'dir'), b'this')
23 23
24 24 class AlwaysMatcherTests(unittest.TestCase):
25 25
26 26 def testVisitdir(self):
27 27 m = matchmod.alwaysmatcher()
28 self.assertEqual(m.visitdir(b'.'), b'all')
28 self.assertEqual(m.visitdir(b''), b'all')
29 29 self.assertEqual(m.visitdir(b'dir'), b'all')
30 30
31 31 def testVisitchildrenset(self):
32 32 m = matchmod.alwaysmatcher()
33 self.assertEqual(m.visitchildrenset(b'.'), b'all')
33 self.assertEqual(m.visitchildrenset(b''), b'all')
34 34 self.assertEqual(m.visitchildrenset(b'dir'), b'all')
35 35
36 36 class NeverMatcherTests(unittest.TestCase):
37 37
38 38 def testVisitdir(self):
39 39 m = matchmod.nevermatcher()
40 self.assertFalse(m.visitdir(b'.'))
40 self.assertFalse(m.visitdir(b''))
41 41 self.assertFalse(m.visitdir(b'dir'))
42 42
43 43 def testVisitchildrenset(self):
44 44 m = matchmod.nevermatcher()
45 self.assertEqual(m.visitchildrenset(b'.'), set())
45 self.assertEqual(m.visitchildrenset(b''), set())
46 46 self.assertEqual(m.visitchildrenset(b'dir'), set())
47 47
48 48 class PredicateMatcherTests(unittest.TestCase):
49 49 # predicatematcher does not currently define either of these methods, so
50 50 # this is equivalent to BaseMatcherTests.
51 51
52 52 def testVisitdir(self):
53 53 m = matchmod.predicatematcher(lambda *a: False)
54 self.assertTrue(m.visitdir(b'.'))
54 self.assertTrue(m.visitdir(b''))
55 55 self.assertTrue(m.visitdir(b'dir'))
56 56
57 57 def testVisitchildrenset(self):
58 58 m = matchmod.predicatematcher(lambda *a: False)
59 self.assertEqual(m.visitchildrenset(b'.'), b'this')
59 self.assertEqual(m.visitchildrenset(b''), b'this')
60 60 self.assertEqual(m.visitchildrenset(b'dir'), b'this')
61 61
62 62 class PatternMatcherTests(unittest.TestCase):
63 63
64 64 def testVisitdirPrefix(self):
65 65 m = matchmod.match(b'x', b'', patterns=[b'path:dir/subdir'])
66 66 assert isinstance(m, matchmod.patternmatcher)
67 self.assertTrue(m.visitdir(b'.'))
67 self.assertTrue(m.visitdir(b''))
68 68 self.assertTrue(m.visitdir(b'dir'))
69 69 self.assertEqual(m.visitdir(b'dir/subdir'), b'all')
70 70 # OPT: This should probably be 'all' if its parent is?
71 71 self.assertTrue(m.visitdir(b'dir/subdir/x'))
72 72 self.assertFalse(m.visitdir(b'folder'))
73 73
74 74 def testVisitchildrensetPrefix(self):
75 75 m = matchmod.match(b'x', b'', patterns=[b'path:dir/subdir'])
76 76 assert isinstance(m, matchmod.patternmatcher)
77 self.assertEqual(m.visitchildrenset(b'.'), b'this')
77 self.assertEqual(m.visitchildrenset(b''), b'this')
78 78 self.assertEqual(m.visitchildrenset(b'dir'), b'this')
79 79 self.assertEqual(m.visitchildrenset(b'dir/subdir'), b'all')
80 80 # OPT: This should probably be 'all' if its parent is?
81 81 self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), b'this')
82 82 self.assertEqual(m.visitchildrenset(b'folder'), set())
83 83
84 84 def testVisitdirRootfilesin(self):
85 85 m = matchmod.match(b'x', b'', patterns=[b'rootfilesin:dir/subdir'])
86 86 assert isinstance(m, matchmod.patternmatcher)
87 self.assertTrue(m.visitdir(b'.'))
87 self.assertTrue(m.visitdir(b''))
88 88 self.assertFalse(m.visitdir(b'dir/subdir/x'))
89 89 self.assertFalse(m.visitdir(b'folder'))
90 90 # FIXME: These should probably be True.
91 91 self.assertFalse(m.visitdir(b'dir'))
92 92 self.assertFalse(m.visitdir(b'dir/subdir'))
93 93
94 94 def testVisitchildrensetRootfilesin(self):
95 95 m = matchmod.match(b'x', b'', patterns=[b'rootfilesin:dir/subdir'])
96 96 assert isinstance(m, matchmod.patternmatcher)
97 self.assertEqual(m.visitchildrenset(b'.'), b'this')
97 self.assertEqual(m.visitchildrenset(b''), b'this')
98 98 self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), set())
99 99 self.assertEqual(m.visitchildrenset(b'folder'), set())
100 100 # FIXME: These should probably be {'subdir'} and 'this', respectively,
101 101 # or at least 'this' and 'this'.
102 102 self.assertEqual(m.visitchildrenset(b'dir'), set())
103 103 self.assertEqual(m.visitchildrenset(b'dir/subdir'), set())
104 104
105 105 def testVisitdirGlob(self):
106 106 m = matchmod.match(b'x', b'', patterns=[b'glob:dir/z*'])
107 107 assert isinstance(m, matchmod.patternmatcher)
108 self.assertTrue(m.visitdir(b'.'))
108 self.assertTrue(m.visitdir(b''))
109 109 self.assertTrue(m.visitdir(b'dir'))
110 110 self.assertFalse(m.visitdir(b'folder'))
111 111 # OPT: these should probably be False.
112 112 self.assertTrue(m.visitdir(b'dir/subdir'))
113 113 self.assertTrue(m.visitdir(b'dir/subdir/x'))
114 114
115 115 def testVisitchildrensetGlob(self):
116 116 m = matchmod.match(b'x', b'', patterns=[b'glob:dir/z*'])
117 117 assert isinstance(m, matchmod.patternmatcher)
118 self.assertEqual(m.visitchildrenset(b'.'), b'this')
118 self.assertEqual(m.visitchildrenset(b''), b'this')
119 119 self.assertEqual(m.visitchildrenset(b'folder'), set())
120 120 self.assertEqual(m.visitchildrenset(b'dir'), b'this')
121 121 # OPT: these should probably be set().
122 122 self.assertEqual(m.visitchildrenset(b'dir/subdir'), b'this')
123 123 self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), b'this')
124 124
125 125 class IncludeMatcherTests(unittest.TestCase):
126 126
127 127 def testVisitdirPrefix(self):
128 128 m = matchmod.match(b'x', b'', include=[b'path:dir/subdir'])
129 129 assert isinstance(m, matchmod.includematcher)
130 self.assertTrue(m.visitdir(b'.'))
130 self.assertTrue(m.visitdir(b''))
131 131 self.assertTrue(m.visitdir(b'dir'))
132 132 self.assertEqual(m.visitdir(b'dir/subdir'), b'all')
133 133 # OPT: This should probably be 'all' if its parent is?
134 134 self.assertTrue(m.visitdir(b'dir/subdir/x'))
135 135 self.assertFalse(m.visitdir(b'folder'))
136 136
137 137 def testVisitchildrensetPrefix(self):
138 138 m = matchmod.match(b'x', b'', include=[b'path:dir/subdir'])
139 139 assert isinstance(m, matchmod.includematcher)
140 self.assertEqual(m.visitchildrenset(b'.'), {b'dir'})
140 self.assertEqual(m.visitchildrenset(b''), {b'dir'})
141 141 self.assertEqual(m.visitchildrenset(b'dir'), {b'subdir'})
142 142 self.assertEqual(m.visitchildrenset(b'dir/subdir'), b'all')
143 143 # OPT: This should probably be 'all' if its parent is?
144 144 self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), b'this')
145 145 self.assertEqual(m.visitchildrenset(b'folder'), set())
146 146
147 147 def testVisitdirRootfilesin(self):
148 148 m = matchmod.match(b'x', b'', include=[b'rootfilesin:dir/subdir'])
149 149 assert isinstance(m, matchmod.includematcher)
150 self.assertTrue(m.visitdir(b'.'))
150 self.assertTrue(m.visitdir(b''))
151 151 self.assertTrue(m.visitdir(b'dir'))
152 152 self.assertTrue(m.visitdir(b'dir/subdir'))
153 153 self.assertFalse(m.visitdir(b'dir/subdir/x'))
154 154 self.assertFalse(m.visitdir(b'folder'))
155 155
156 156 def testVisitchildrensetRootfilesin(self):
157 157 m = matchmod.match(b'x', b'', include=[b'rootfilesin:dir/subdir'])
158 158 assert isinstance(m, matchmod.includematcher)
159 self.assertEqual(m.visitchildrenset(b'.'), {b'dir'})
159 self.assertEqual(m.visitchildrenset(b''), {b'dir'})
160 160 self.assertEqual(m.visitchildrenset(b'dir'), {b'subdir'})
161 161 self.assertEqual(m.visitchildrenset(b'dir/subdir'), b'this')
162 162 self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), set())
163 163 self.assertEqual(m.visitchildrenset(b'folder'), set())
164 164
165 165 def testVisitdirGlob(self):
166 166 m = matchmod.match(b'x', b'', include=[b'glob:dir/z*'])
167 167 assert isinstance(m, matchmod.includematcher)
168 self.assertTrue(m.visitdir(b'.'))
168 self.assertTrue(m.visitdir(b''))
169 169 self.assertTrue(m.visitdir(b'dir'))
170 170 self.assertFalse(m.visitdir(b'folder'))
171 171 # OPT: these should probably be False.
172 172 self.assertTrue(m.visitdir(b'dir/subdir'))
173 173 self.assertTrue(m.visitdir(b'dir/subdir/x'))
174 174
175 175 def testVisitchildrensetGlob(self):
176 176 m = matchmod.match(b'x', b'', include=[b'glob:dir/z*'])
177 177 assert isinstance(m, matchmod.includematcher)
178 self.assertEqual(m.visitchildrenset(b'.'), {b'dir'})
178 self.assertEqual(m.visitchildrenset(b''), {b'dir'})
179 179 self.assertEqual(m.visitchildrenset(b'folder'), set())
180 180 self.assertEqual(m.visitchildrenset(b'dir'), b'this')
181 181 # OPT: these should probably be set().
182 182 self.assertEqual(m.visitchildrenset(b'dir/subdir'), b'this')
183 183 self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), b'this')
184 184
185 185 class ExactMatcherTests(unittest.TestCase):
186 186
187 187 def testVisitdir(self):
188 188 m = matchmod.exact(files=[b'dir/subdir/foo.txt'])
189 189 assert isinstance(m, matchmod.exactmatcher)
190 self.assertTrue(m.visitdir(b'.'))
190 self.assertTrue(m.visitdir(b''))
191 191 self.assertTrue(m.visitdir(b'dir'))
192 192 self.assertTrue(m.visitdir(b'dir/subdir'))
193 193 self.assertFalse(m.visitdir(b'dir/subdir/foo.txt'))
194 194 self.assertFalse(m.visitdir(b'dir/foo'))
195 195 self.assertFalse(m.visitdir(b'dir/subdir/x'))
196 196 self.assertFalse(m.visitdir(b'folder'))
197 197
198 198 def testVisitchildrenset(self):
199 199 m = matchmod.exact(files=[b'dir/subdir/foo.txt'])
200 200 assert isinstance(m, matchmod.exactmatcher)
201 self.assertEqual(m.visitchildrenset(b'.'), {b'dir'})
201 self.assertEqual(m.visitchildrenset(b''), {b'dir'})
202 202 self.assertEqual(m.visitchildrenset(b'dir'), {b'subdir'})
203 203 self.assertEqual(m.visitchildrenset(b'dir/subdir'), {b'foo.txt'})
204 204 self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), set())
205 205 self.assertEqual(m.visitchildrenset(b'dir/subdir/foo.txt'), set())
206 206 self.assertEqual(m.visitchildrenset(b'folder'), set())
207 207
208 208 def testVisitchildrensetFilesAndDirs(self):
209 209 m = matchmod.exact(files=[b'rootfile.txt',
210 210 b'a/file1.txt',
211 211 b'a/b/file2.txt',
212 212 # no file in a/b/c
213 213 b'a/b/c/d/file4.txt'])
214 214 assert isinstance(m, matchmod.exactmatcher)
215 self.assertEqual(m.visitchildrenset(b'.'), {b'a', b'rootfile.txt'})
215 self.assertEqual(m.visitchildrenset(b''), {b'a', b'rootfile.txt'})
216 216 self.assertEqual(m.visitchildrenset(b'a'), {b'b', b'file1.txt'})
217 217 self.assertEqual(m.visitchildrenset(b'a/b'), {b'c', b'file2.txt'})
218 218 self.assertEqual(m.visitchildrenset(b'a/b/c'), {b'd'})
219 219 self.assertEqual(m.visitchildrenset(b'a/b/c/d'), {b'file4.txt'})
220 220 self.assertEqual(m.visitchildrenset(b'a/b/c/d/e'), set())
221 221 self.assertEqual(m.visitchildrenset(b'folder'), set())
222 222
223 223 class DifferenceMatcherTests(unittest.TestCase):
224 224
225 225 def testVisitdirM2always(self):
226 226 m1 = matchmod.alwaysmatcher()
227 227 m2 = matchmod.alwaysmatcher()
228 228 dm = matchmod.differencematcher(m1, m2)
229 229 # dm should be equivalent to a nevermatcher.
230 self.assertFalse(dm.visitdir(b'.'))
230 self.assertFalse(dm.visitdir(b''))
231 231 self.assertFalse(dm.visitdir(b'dir'))
232 232 self.assertFalse(dm.visitdir(b'dir/subdir'))
233 233 self.assertFalse(dm.visitdir(b'dir/subdir/z'))
234 234 self.assertFalse(dm.visitdir(b'dir/foo'))
235 235 self.assertFalse(dm.visitdir(b'dir/subdir/x'))
236 236 self.assertFalse(dm.visitdir(b'folder'))
237 237
238 238 def testVisitchildrensetM2always(self):
239 239 m1 = matchmod.alwaysmatcher()
240 240 m2 = matchmod.alwaysmatcher()
241 241 dm = matchmod.differencematcher(m1, m2)
242 242 # dm should be equivalent to a nevermatcher.
243 self.assertEqual(dm.visitchildrenset(b'.'), set())
243 self.assertEqual(dm.visitchildrenset(b''), set())
244 244 self.assertEqual(dm.visitchildrenset(b'dir'), set())
245 245 self.assertEqual(dm.visitchildrenset(b'dir/subdir'), set())
246 246 self.assertEqual(dm.visitchildrenset(b'dir/subdir/z'), set())
247 247 self.assertEqual(dm.visitchildrenset(b'dir/foo'), set())
248 248 self.assertEqual(dm.visitchildrenset(b'dir/subdir/x'), set())
249 249 self.assertEqual(dm.visitchildrenset(b'folder'), set())
250 250
251 251 def testVisitdirM2never(self):
252 252 m1 = matchmod.alwaysmatcher()
253 253 m2 = matchmod.nevermatcher()
254 254 dm = matchmod.differencematcher(m1, m2)
255 255 # dm should be equivalent to a alwaysmatcher.
256 256 #
257 257 # We're testing Equal-to-True instead of just 'assertTrue' since
258 258 # assertTrue does NOT verify that it's a bool, just that it's truthy.
259 259 # While we may want to eventually make these return 'all', they should
260 260 # not currently do so.
261 self.assertEqual(dm.visitdir(b'.'), b'all')
261 self.assertEqual(dm.visitdir(b''), b'all')
262 262 self.assertEqual(dm.visitdir(b'dir'), b'all')
263 263 self.assertEqual(dm.visitdir(b'dir/subdir'), b'all')
264 264 self.assertEqual(dm.visitdir(b'dir/subdir/z'), b'all')
265 265 self.assertEqual(dm.visitdir(b'dir/foo'), b'all')
266 266 self.assertEqual(dm.visitdir(b'dir/subdir/x'), b'all')
267 267 self.assertEqual(dm.visitdir(b'folder'), b'all')
268 268
269 269 def testVisitchildrensetM2never(self):
270 270 m1 = matchmod.alwaysmatcher()
271 271 m2 = matchmod.nevermatcher()
272 272 dm = matchmod.differencematcher(m1, m2)
273 273 # dm should be equivalent to a alwaysmatcher.
274 self.assertEqual(dm.visitchildrenset(b'.'), b'all')
274 self.assertEqual(dm.visitchildrenset(b''), b'all')
275 275 self.assertEqual(dm.visitchildrenset(b'dir'), b'all')
276 276 self.assertEqual(dm.visitchildrenset(b'dir/subdir'), b'all')
277 277 self.assertEqual(dm.visitchildrenset(b'dir/subdir/z'), b'all')
278 278 self.assertEqual(dm.visitchildrenset(b'dir/foo'), b'all')
279 279 self.assertEqual(dm.visitchildrenset(b'dir/subdir/x'), b'all')
280 280 self.assertEqual(dm.visitchildrenset(b'folder'), b'all')
281 281
282 282 def testVisitdirM2SubdirPrefix(self):
283 283 m1 = matchmod.alwaysmatcher()
284 284 m2 = matchmod.match(b'', b'', patterns=[b'path:dir/subdir'])
285 285 dm = matchmod.differencematcher(m1, m2)
286 self.assertEqual(dm.visitdir(b'.'), True)
286 self.assertEqual(dm.visitdir(b''), True)
287 287 self.assertEqual(dm.visitdir(b'dir'), True)
288 288 self.assertFalse(dm.visitdir(b'dir/subdir'))
289 289 # OPT: We should probably return False for these; we don't because
290 290 # patternmatcher.visitdir() (our m2) doesn't return 'all' for subdirs of
291 291 # an 'all' pattern, just True.
292 292 self.assertEqual(dm.visitdir(b'dir/subdir/z'), True)
293 293 self.assertEqual(dm.visitdir(b'dir/subdir/x'), True)
294 294 self.assertEqual(dm.visitdir(b'dir/foo'), b'all')
295 295 self.assertEqual(dm.visitdir(b'folder'), b'all')
296 296
297 297 def testVisitchildrensetM2SubdirPrefix(self):
298 298 m1 = matchmod.alwaysmatcher()
299 299 m2 = matchmod.match(b'', b'', patterns=[b'path:dir/subdir'])
300 300 dm = matchmod.differencematcher(m1, m2)
301 self.assertEqual(dm.visitchildrenset(b'.'), b'this')
301 self.assertEqual(dm.visitchildrenset(b''), b'this')
302 302 self.assertEqual(dm.visitchildrenset(b'dir'), b'this')
303 303 self.assertEqual(dm.visitchildrenset(b'dir/subdir'), set())
304 304 self.assertEqual(dm.visitchildrenset(b'dir/foo'), b'all')
305 305 self.assertEqual(dm.visitchildrenset(b'folder'), b'all')
306 306 # OPT: We should probably return set() for these; we don't because
307 307 # patternmatcher.visitdir() (our m2) doesn't return 'all' for subdirs of
308 308 # an 'all' pattern, just 'this'.
309 309 self.assertEqual(dm.visitchildrenset(b'dir/subdir/z'), b'this')
310 310 self.assertEqual(dm.visitchildrenset(b'dir/subdir/x'), b'this')
311 311
312 312 # We're using includematcher instead of patterns because it behaves slightly
313 313 # better (giving narrower results) than patternmatcher.
314 314 def testVisitdirIncludeIncludfe(self):
315 315 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
316 316 m2 = matchmod.match(b'', b'', include=[b'rootfilesin:dir'])
317 317 dm = matchmod.differencematcher(m1, m2)
318 self.assertEqual(dm.visitdir(b'.'), True)
318 self.assertEqual(dm.visitdir(b''), True)
319 319 self.assertEqual(dm.visitdir(b'dir'), True)
320 320 self.assertEqual(dm.visitdir(b'dir/subdir'), b'all')
321 321 self.assertFalse(dm.visitdir(b'dir/foo'))
322 322 self.assertFalse(dm.visitdir(b'folder'))
323 323 # OPT: We should probably return False for these; we don't because
324 324 # patternmatcher.visitdir() (our m2) doesn't return 'all' for subdirs of
325 325 # an 'all' pattern, just True.
326 326 self.assertEqual(dm.visitdir(b'dir/subdir/z'), True)
327 327 self.assertEqual(dm.visitdir(b'dir/subdir/x'), True)
328 328
329 329 def testVisitchildrensetIncludeInclude(self):
330 330 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
331 331 m2 = matchmod.match(b'', b'', include=[b'rootfilesin:dir'])
332 332 dm = matchmod.differencematcher(m1, m2)
333 self.assertEqual(dm.visitchildrenset(b'.'), {b'dir'})
333 self.assertEqual(dm.visitchildrenset(b''), {b'dir'})
334 334 self.assertEqual(dm.visitchildrenset(b'dir'), {b'subdir'})
335 335 self.assertEqual(dm.visitchildrenset(b'dir/subdir'), b'all')
336 336 self.assertEqual(dm.visitchildrenset(b'dir/foo'), set())
337 337 self.assertEqual(dm.visitchildrenset(b'folder'), set())
338 338 # OPT: We should probably return set() for these; we don't because
339 339 # patternmatcher.visitdir() (our m2) doesn't return 'all' for subdirs of
340 340 # an 'all' pattern, just 'this'.
341 341 self.assertEqual(dm.visitchildrenset(b'dir/subdir/z'), b'this')
342 342 self.assertEqual(dm.visitchildrenset(b'dir/subdir/x'), b'this')
343 343
344 344 class IntersectionMatcherTests(unittest.TestCase):
345 345
346 346 def testVisitdirM2always(self):
347 347 m1 = matchmod.alwaysmatcher()
348 348 m2 = matchmod.alwaysmatcher()
349 349 im = matchmod.intersectmatchers(m1, m2)
350 350 # im should be equivalent to a alwaysmatcher.
351 self.assertEqual(im.visitdir(b'.'), b'all')
351 self.assertEqual(im.visitdir(b''), b'all')
352 352 self.assertEqual(im.visitdir(b'dir'), b'all')
353 353 self.assertEqual(im.visitdir(b'dir/subdir'), b'all')
354 354 self.assertEqual(im.visitdir(b'dir/subdir/z'), b'all')
355 355 self.assertEqual(im.visitdir(b'dir/foo'), b'all')
356 356 self.assertEqual(im.visitdir(b'dir/subdir/x'), b'all')
357 357 self.assertEqual(im.visitdir(b'folder'), b'all')
358 358
359 359 def testVisitchildrensetM2always(self):
360 360 m1 = matchmod.alwaysmatcher()
361 361 m2 = matchmod.alwaysmatcher()
362 362 im = matchmod.intersectmatchers(m1, m2)
363 363 # im should be equivalent to a alwaysmatcher.
364 self.assertEqual(im.visitchildrenset(b'.'), b'all')
364 self.assertEqual(im.visitchildrenset(b''), b'all')
365 365 self.assertEqual(im.visitchildrenset(b'dir'), b'all')
366 366 self.assertEqual(im.visitchildrenset(b'dir/subdir'), b'all')
367 367 self.assertEqual(im.visitchildrenset(b'dir/subdir/z'), b'all')
368 368 self.assertEqual(im.visitchildrenset(b'dir/foo'), b'all')
369 369 self.assertEqual(im.visitchildrenset(b'dir/subdir/x'), b'all')
370 370 self.assertEqual(im.visitchildrenset(b'folder'), b'all')
371 371
372 372 def testVisitdirM2never(self):
373 373 m1 = matchmod.alwaysmatcher()
374 374 m2 = matchmod.nevermatcher()
375 375 im = matchmod.intersectmatchers(m1, m2)
376 376 # im should be equivalent to a nevermatcher.
377 self.assertFalse(im.visitdir(b'.'))
377 self.assertFalse(im.visitdir(b''))
378 378 self.assertFalse(im.visitdir(b'dir'))
379 379 self.assertFalse(im.visitdir(b'dir/subdir'))
380 380 self.assertFalse(im.visitdir(b'dir/subdir/z'))
381 381 self.assertFalse(im.visitdir(b'dir/foo'))
382 382 self.assertFalse(im.visitdir(b'dir/subdir/x'))
383 383 self.assertFalse(im.visitdir(b'folder'))
384 384
385 385 def testVisitchildrensetM2never(self):
386 386 m1 = matchmod.alwaysmatcher()
387 387 m2 = matchmod.nevermatcher()
388 388 im = matchmod.intersectmatchers(m1, m2)
389 389 # im should be equivalent to a nevermqtcher.
390 self.assertEqual(im.visitchildrenset(b'.'), set())
390 self.assertEqual(im.visitchildrenset(b''), set())
391 391 self.assertEqual(im.visitchildrenset(b'dir'), set())
392 392 self.assertEqual(im.visitchildrenset(b'dir/subdir'), set())
393 393 self.assertEqual(im.visitchildrenset(b'dir/subdir/z'), set())
394 394 self.assertEqual(im.visitchildrenset(b'dir/foo'), set())
395 395 self.assertEqual(im.visitchildrenset(b'dir/subdir/x'), set())
396 396 self.assertEqual(im.visitchildrenset(b'folder'), set())
397 397
398 398 def testVisitdirM2SubdirPrefix(self):
399 399 m1 = matchmod.alwaysmatcher()
400 400 m2 = matchmod.match(b'', b'', patterns=[b'path:dir/subdir'])
401 401 im = matchmod.intersectmatchers(m1, m2)
402 self.assertEqual(im.visitdir(b'.'), True)
402 self.assertEqual(im.visitdir(b''), True)
403 403 self.assertEqual(im.visitdir(b'dir'), True)
404 404 self.assertEqual(im.visitdir(b'dir/subdir'), b'all')
405 405 self.assertFalse(im.visitdir(b'dir/foo'))
406 406 self.assertFalse(im.visitdir(b'folder'))
407 407 # OPT: We should probably return 'all' for these; we don't because
408 408 # patternmatcher.visitdir() (our m2) doesn't return 'all' for subdirs of
409 409 # an 'all' pattern, just True.
410 410 self.assertEqual(im.visitdir(b'dir/subdir/z'), True)
411 411 self.assertEqual(im.visitdir(b'dir/subdir/x'), True)
412 412
413 413 def testVisitchildrensetM2SubdirPrefix(self):
414 414 m1 = matchmod.alwaysmatcher()
415 415 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
416 416 im = matchmod.intersectmatchers(m1, m2)
417 self.assertEqual(im.visitchildrenset(b'.'), {b'dir'})
417 self.assertEqual(im.visitchildrenset(b''), {b'dir'})
418 418 self.assertEqual(im.visitchildrenset(b'dir'), {b'subdir'})
419 419 self.assertEqual(im.visitchildrenset(b'dir/subdir'), b'all')
420 420 self.assertEqual(im.visitchildrenset(b'dir/foo'), set())
421 421 self.assertEqual(im.visitchildrenset(b'folder'), set())
422 422 # OPT: We should probably return 'all' for these
423 423 self.assertEqual(im.visitchildrenset(b'dir/subdir/z'), b'this')
424 424 self.assertEqual(im.visitchildrenset(b'dir/subdir/x'), b'this')
425 425
426 426 # We're using includematcher instead of patterns because it behaves slightly
427 427 # better (giving narrower results) than patternmatcher.
428 428 def testVisitdirIncludeIncludfe(self):
429 429 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
430 430 m2 = matchmod.match(b'', b'', include=[b'rootfilesin:dir'])
431 431 im = matchmod.intersectmatchers(m1, m2)
432 self.assertEqual(im.visitdir(b'.'), True)
432 self.assertEqual(im.visitdir(b''), True)
433 433 self.assertEqual(im.visitdir(b'dir'), True)
434 434 self.assertFalse(im.visitdir(b'dir/subdir'))
435 435 self.assertFalse(im.visitdir(b'dir/foo'))
436 436 self.assertFalse(im.visitdir(b'folder'))
437 437 self.assertFalse(im.visitdir(b'dir/subdir/z'))
438 438 self.assertFalse(im.visitdir(b'dir/subdir/x'))
439 439
440 440 def testVisitchildrensetIncludeInclude(self):
441 441 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
442 442 m2 = matchmod.match(b'', b'', include=[b'rootfilesin:dir'])
443 443 im = matchmod.intersectmatchers(m1, m2)
444 self.assertEqual(im.visitchildrenset(b'.'), {b'dir'})
444 self.assertEqual(im.visitchildrenset(b''), {b'dir'})
445 445 self.assertEqual(im.visitchildrenset(b'dir'), b'this')
446 446 self.assertEqual(im.visitchildrenset(b'dir/subdir'), set())
447 447 self.assertEqual(im.visitchildrenset(b'dir/foo'), set())
448 448 self.assertEqual(im.visitchildrenset(b'folder'), set())
449 449 self.assertEqual(im.visitchildrenset(b'dir/subdir/z'), set())
450 450 self.assertEqual(im.visitchildrenset(b'dir/subdir/x'), set())
451 451
452 452 # We're using includematcher instead of patterns because it behaves slightly
453 453 # better (giving narrower results) than patternmatcher.
454 454 def testVisitdirIncludeInclude2(self):
455 455 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
456 456 m2 = matchmod.match(b'', b'', include=[b'path:folder'])
457 457 im = matchmod.intersectmatchers(m1, m2)
458 458 # FIXME: is True correct here?
459 self.assertEqual(im.visitdir(b'.'), True)
459 self.assertEqual(im.visitdir(b''), True)
460 460 self.assertFalse(im.visitdir(b'dir'))
461 461 self.assertFalse(im.visitdir(b'dir/subdir'))
462 462 self.assertFalse(im.visitdir(b'dir/foo'))
463 463 self.assertFalse(im.visitdir(b'folder'))
464 464 self.assertFalse(im.visitdir(b'dir/subdir/z'))
465 465 self.assertFalse(im.visitdir(b'dir/subdir/x'))
466 466
467 467 def testVisitchildrensetIncludeInclude2(self):
468 468 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
469 469 m2 = matchmod.match(b'', b'', include=[b'path:folder'])
470 470 im = matchmod.intersectmatchers(m1, m2)
471 471 # FIXME: is set() correct here?
472 self.assertEqual(im.visitchildrenset(b'.'), set())
472 self.assertEqual(im.visitchildrenset(b''), set())
473 473 self.assertEqual(im.visitchildrenset(b'dir'), set())
474 474 self.assertEqual(im.visitchildrenset(b'dir/subdir'), set())
475 475 self.assertEqual(im.visitchildrenset(b'dir/foo'), set())
476 476 self.assertEqual(im.visitchildrenset(b'folder'), set())
477 477 self.assertEqual(im.visitchildrenset(b'dir/subdir/z'), set())
478 478 self.assertEqual(im.visitchildrenset(b'dir/subdir/x'), set())
479 479
480 480 # We're using includematcher instead of patterns because it behaves slightly
481 481 # better (giving narrower results) than patternmatcher.
482 482 def testVisitdirIncludeInclude3(self):
483 483 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
484 484 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
485 485 im = matchmod.intersectmatchers(m1, m2)
486 self.assertEqual(im.visitdir(b'.'), True)
486 self.assertEqual(im.visitdir(b''), True)
487 487 self.assertEqual(im.visitdir(b'dir'), True)
488 488 self.assertEqual(im.visitdir(b'dir/subdir'), True)
489 489 self.assertFalse(im.visitdir(b'dir/foo'))
490 490 self.assertFalse(im.visitdir(b'folder'))
491 491 self.assertFalse(im.visitdir(b'dir/subdir/z'))
492 492 # OPT: this should probably be 'all' not True.
493 493 self.assertEqual(im.visitdir(b'dir/subdir/x'), True)
494 494
495 495 def testVisitchildrensetIncludeInclude3(self):
496 496 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
497 497 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
498 498 im = matchmod.intersectmatchers(m1, m2)
499 self.assertEqual(im.visitchildrenset(b'.'), {b'dir'})
499 self.assertEqual(im.visitchildrenset(b''), {b'dir'})
500 500 self.assertEqual(im.visitchildrenset(b'dir'), {b'subdir'})
501 501 self.assertEqual(im.visitchildrenset(b'dir/subdir'), {b'x'})
502 502 self.assertEqual(im.visitchildrenset(b'dir/foo'), set())
503 503 self.assertEqual(im.visitchildrenset(b'folder'), set())
504 504 self.assertEqual(im.visitchildrenset(b'dir/subdir/z'), set())
505 505 # OPT: this should probably be 'all' not 'this'.
506 506 self.assertEqual(im.visitchildrenset(b'dir/subdir/x'), b'this')
507 507
508 508 # We're using includematcher instead of patterns because it behaves slightly
509 509 # better (giving narrower results) than patternmatcher.
510 510 def testVisitdirIncludeInclude4(self):
511 511 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
512 512 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir/z'])
513 513 im = matchmod.intersectmatchers(m1, m2)
514 514 # OPT: these next three could probably be False as well.
515 self.assertEqual(im.visitdir(b'.'), True)
515 self.assertEqual(im.visitdir(b''), True)
516 516 self.assertEqual(im.visitdir(b'dir'), True)
517 517 self.assertEqual(im.visitdir(b'dir/subdir'), True)
518 518 self.assertFalse(im.visitdir(b'dir/foo'))
519 519 self.assertFalse(im.visitdir(b'folder'))
520 520 self.assertFalse(im.visitdir(b'dir/subdir/z'))
521 521 self.assertFalse(im.visitdir(b'dir/subdir/x'))
522 522
523 523 def testVisitchildrensetIncludeInclude4(self):
524 524 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
525 525 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir/z'])
526 526 im = matchmod.intersectmatchers(m1, m2)
527 527 # OPT: these next two could probably be set() as well.
528 self.assertEqual(im.visitchildrenset(b'.'), {b'dir'})
528 self.assertEqual(im.visitchildrenset(b''), {b'dir'})
529 529 self.assertEqual(im.visitchildrenset(b'dir'), {b'subdir'})
530 530 self.assertEqual(im.visitchildrenset(b'dir/subdir'), set())
531 531 self.assertEqual(im.visitchildrenset(b'dir/foo'), set())
532 532 self.assertEqual(im.visitchildrenset(b'folder'), set())
533 533 self.assertEqual(im.visitchildrenset(b'dir/subdir/z'), set())
534 534 self.assertEqual(im.visitchildrenset(b'dir/subdir/x'), set())
535 535
536 536 class UnionMatcherTests(unittest.TestCase):
537 537
538 538 def testVisitdirM2always(self):
539 539 m1 = matchmod.alwaysmatcher()
540 540 m2 = matchmod.alwaysmatcher()
541 541 um = matchmod.unionmatcher([m1, m2])
542 542 # um should be equivalent to a alwaysmatcher.
543 self.assertEqual(um.visitdir(b'.'), b'all')
543 self.assertEqual(um.visitdir(b''), b'all')
544 544 self.assertEqual(um.visitdir(b'dir'), b'all')
545 545 self.assertEqual(um.visitdir(b'dir/subdir'), b'all')
546 546 self.assertEqual(um.visitdir(b'dir/subdir/z'), b'all')
547 547 self.assertEqual(um.visitdir(b'dir/foo'), b'all')
548 548 self.assertEqual(um.visitdir(b'dir/subdir/x'), b'all')
549 549 self.assertEqual(um.visitdir(b'folder'), b'all')
550 550
551 551 def testVisitchildrensetM2always(self):
552 552 m1 = matchmod.alwaysmatcher()
553 553 m2 = matchmod.alwaysmatcher()
554 554 um = matchmod.unionmatcher([m1, m2])
555 555 # um should be equivalent to a alwaysmatcher.
556 self.assertEqual(um.visitchildrenset(b'.'), b'all')
556 self.assertEqual(um.visitchildrenset(b''), b'all')
557 557 self.assertEqual(um.visitchildrenset(b'dir'), b'all')
558 558 self.assertEqual(um.visitchildrenset(b'dir/subdir'), b'all')
559 559 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'all')
560 560 self.assertEqual(um.visitchildrenset(b'dir/foo'), b'all')
561 561 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'all')
562 562 self.assertEqual(um.visitchildrenset(b'folder'), b'all')
563 563
564 564 def testVisitdirM1never(self):
565 565 m1 = matchmod.nevermatcher()
566 566 m2 = matchmod.alwaysmatcher()
567 567 um = matchmod.unionmatcher([m1, m2])
568 568 # um should be equivalent to a alwaysmatcher.
569 self.assertEqual(um.visitdir(b'.'), b'all')
569 self.assertEqual(um.visitdir(b''), b'all')
570 570 self.assertEqual(um.visitdir(b'dir'), b'all')
571 571 self.assertEqual(um.visitdir(b'dir/subdir'), b'all')
572 572 self.assertEqual(um.visitdir(b'dir/subdir/z'), b'all')
573 573 self.assertEqual(um.visitdir(b'dir/foo'), b'all')
574 574 self.assertEqual(um.visitdir(b'dir/subdir/x'), b'all')
575 575 self.assertEqual(um.visitdir(b'folder'), b'all')
576 576
577 577 def testVisitchildrensetM1never(self):
578 578 m1 = matchmod.nevermatcher()
579 579 m2 = matchmod.alwaysmatcher()
580 580 um = matchmod.unionmatcher([m1, m2])
581 581 # um should be equivalent to a alwaysmatcher.
582 self.assertEqual(um.visitchildrenset(b'.'), b'all')
582 self.assertEqual(um.visitchildrenset(b''), b'all')
583 583 self.assertEqual(um.visitchildrenset(b'dir'), b'all')
584 584 self.assertEqual(um.visitchildrenset(b'dir/subdir'), b'all')
585 585 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'all')
586 586 self.assertEqual(um.visitchildrenset(b'dir/foo'), b'all')
587 587 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'all')
588 588 self.assertEqual(um.visitchildrenset(b'folder'), b'all')
589 589
590 590 def testVisitdirM2never(self):
591 591 m1 = matchmod.alwaysmatcher()
592 592 m2 = matchmod.nevermatcher()
593 593 um = matchmod.unionmatcher([m1, m2])
594 594 # um should be equivalent to a alwaysmatcher.
595 self.assertEqual(um.visitdir(b'.'), b'all')
595 self.assertEqual(um.visitdir(b''), b'all')
596 596 self.assertEqual(um.visitdir(b'dir'), b'all')
597 597 self.assertEqual(um.visitdir(b'dir/subdir'), b'all')
598 598 self.assertEqual(um.visitdir(b'dir/subdir/z'), b'all')
599 599 self.assertEqual(um.visitdir(b'dir/foo'), b'all')
600 600 self.assertEqual(um.visitdir(b'dir/subdir/x'), b'all')
601 601 self.assertEqual(um.visitdir(b'folder'), b'all')
602 602
603 603 def testVisitchildrensetM2never(self):
604 604 m1 = matchmod.alwaysmatcher()
605 605 m2 = matchmod.nevermatcher()
606 606 um = matchmod.unionmatcher([m1, m2])
607 607 # um should be equivalent to a alwaysmatcher.
608 self.assertEqual(um.visitchildrenset(b'.'), b'all')
608 self.assertEqual(um.visitchildrenset(b''), b'all')
609 609 self.assertEqual(um.visitchildrenset(b'dir'), b'all')
610 610 self.assertEqual(um.visitchildrenset(b'dir/subdir'), b'all')
611 611 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'all')
612 612 self.assertEqual(um.visitchildrenset(b'dir/foo'), b'all')
613 613 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'all')
614 614 self.assertEqual(um.visitchildrenset(b'folder'), b'all')
615 615
616 616 def testVisitdirM2SubdirPrefix(self):
617 617 m1 = matchmod.alwaysmatcher()
618 618 m2 = matchmod.match(b'', b'', patterns=[b'path:dir/subdir'])
619 619 um = matchmod.unionmatcher([m1, m2])
620 self.assertEqual(um.visitdir(b'.'), b'all')
620 self.assertEqual(um.visitdir(b''), b'all')
621 621 self.assertEqual(um.visitdir(b'dir'), b'all')
622 622 self.assertEqual(um.visitdir(b'dir/subdir'), b'all')
623 623 self.assertEqual(um.visitdir(b'dir/foo'), b'all')
624 624 self.assertEqual(um.visitdir(b'folder'), b'all')
625 625 self.assertEqual(um.visitdir(b'dir/subdir/z'), b'all')
626 626 self.assertEqual(um.visitdir(b'dir/subdir/x'), b'all')
627 627
628 628 def testVisitchildrensetM2SubdirPrefix(self):
629 629 m1 = matchmod.alwaysmatcher()
630 630 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
631 631 um = matchmod.unionmatcher([m1, m2])
632 self.assertEqual(um.visitchildrenset(b'.'), b'all')
632 self.assertEqual(um.visitchildrenset(b''), b'all')
633 633 self.assertEqual(um.visitchildrenset(b'dir'), b'all')
634 634 self.assertEqual(um.visitchildrenset(b'dir/subdir'), b'all')
635 635 self.assertEqual(um.visitchildrenset(b'dir/foo'), b'all')
636 636 self.assertEqual(um.visitchildrenset(b'folder'), b'all')
637 637 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'all')
638 638 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'all')
639 639
640 640 # We're using includematcher instead of patterns because it behaves slightly
641 641 # better (giving narrower results) than patternmatcher.
642 642 def testVisitdirIncludeIncludfe(self):
643 643 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
644 644 m2 = matchmod.match(b'', b'', include=[b'rootfilesin:dir'])
645 645 um = matchmod.unionmatcher([m1, m2])
646 self.assertEqual(um.visitdir(b'.'), True)
646 self.assertEqual(um.visitdir(b''), True)
647 647 self.assertEqual(um.visitdir(b'dir'), True)
648 648 self.assertEqual(um.visitdir(b'dir/subdir'), b'all')
649 649 self.assertFalse(um.visitdir(b'dir/foo'))
650 650 self.assertFalse(um.visitdir(b'folder'))
651 651 # OPT: These two should probably be 'all' not True.
652 652 self.assertEqual(um.visitdir(b'dir/subdir/z'), True)
653 653 self.assertEqual(um.visitdir(b'dir/subdir/x'), True)
654 654
655 655 def testVisitchildrensetIncludeInclude(self):
656 656 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
657 657 m2 = matchmod.match(b'', b'', include=[b'rootfilesin:dir'])
658 658 um = matchmod.unionmatcher([m1, m2])
659 self.assertEqual(um.visitchildrenset(b'.'), {b'dir'})
659 self.assertEqual(um.visitchildrenset(b''), {b'dir'})
660 660 self.assertEqual(um.visitchildrenset(b'dir'), b'this')
661 661 self.assertEqual(um.visitchildrenset(b'dir/subdir'), b'all')
662 662 self.assertEqual(um.visitchildrenset(b'dir/foo'), set())
663 663 self.assertEqual(um.visitchildrenset(b'folder'), set())
664 664 # OPT: These next two could be 'all' instead of 'this'.
665 665 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'this')
666 666 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'this')
667 667
668 668 # We're using includematcher instead of patterns because it behaves slightly
669 669 # better (giving narrower results) than patternmatcher.
670 670 def testVisitdirIncludeInclude2(self):
671 671 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
672 672 m2 = matchmod.match(b'', b'', include=[b'path:folder'])
673 673 um = matchmod.unionmatcher([m1, m2])
674 self.assertEqual(um.visitdir(b'.'), True)
674 self.assertEqual(um.visitdir(b''), True)
675 675 self.assertEqual(um.visitdir(b'dir'), True)
676 676 self.assertEqual(um.visitdir(b'dir/subdir'), b'all')
677 677 self.assertFalse(um.visitdir(b'dir/foo'))
678 678 self.assertEqual(um.visitdir(b'folder'), b'all')
679 679 # OPT: These should probably be 'all' not True.
680 680 self.assertEqual(um.visitdir(b'dir/subdir/z'), True)
681 681 self.assertEqual(um.visitdir(b'dir/subdir/x'), True)
682 682
683 683 def testVisitchildrensetIncludeInclude2(self):
684 684 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
685 685 m2 = matchmod.match(b'', b'', include=[b'path:folder'])
686 686 um = matchmod.unionmatcher([m1, m2])
687 self.assertEqual(um.visitchildrenset(b'.'), {b'folder', b'dir'})
687 self.assertEqual(um.visitchildrenset(b''), {b'folder', b'dir'})
688 688 self.assertEqual(um.visitchildrenset(b'dir'), {b'subdir'})
689 689 self.assertEqual(um.visitchildrenset(b'dir/subdir'), b'all')
690 690 self.assertEqual(um.visitchildrenset(b'dir/foo'), set())
691 691 self.assertEqual(um.visitchildrenset(b'folder'), b'all')
692 692 # OPT: These next two could be 'all' instead of 'this'.
693 693 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'this')
694 694 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'this')
695 695
696 696 # We're using includematcher instead of patterns because it behaves slightly
697 697 # better (giving narrower results) than patternmatcher.
698 698 def testVisitdirIncludeInclude3(self):
699 699 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
700 700 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
701 701 um = matchmod.unionmatcher([m1, m2])
702 self.assertEqual(um.visitdir(b'.'), True)
702 self.assertEqual(um.visitdir(b''), True)
703 703 self.assertEqual(um.visitdir(b'dir'), True)
704 704 self.assertEqual(um.visitdir(b'dir/subdir'), b'all')
705 705 self.assertFalse(um.visitdir(b'dir/foo'))
706 706 self.assertFalse(um.visitdir(b'folder'))
707 707 self.assertEqual(um.visitdir(b'dir/subdir/x'), b'all')
708 708 # OPT: this should probably be 'all' not True.
709 709 self.assertEqual(um.visitdir(b'dir/subdir/z'), True)
710 710
711 711 def testVisitchildrensetIncludeInclude3(self):
712 712 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
713 713 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
714 714 um = matchmod.unionmatcher([m1, m2])
715 self.assertEqual(um.visitchildrenset(b'.'), {b'dir'})
715 self.assertEqual(um.visitchildrenset(b''), {b'dir'})
716 716 self.assertEqual(um.visitchildrenset(b'dir'), {b'subdir'})
717 717 self.assertEqual(um.visitchildrenset(b'dir/subdir'), b'all')
718 718 self.assertEqual(um.visitchildrenset(b'dir/foo'), set())
719 719 self.assertEqual(um.visitchildrenset(b'folder'), set())
720 720 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'all')
721 721 # OPT: this should probably be 'all' not 'this'.
722 722 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'this')
723 723
724 724 # We're using includematcher instead of patterns because it behaves slightly
725 725 # better (giving narrower results) than patternmatcher.
726 726 def testVisitdirIncludeInclude4(self):
727 727 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
728 728 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir/z'])
729 729 um = matchmod.unionmatcher([m1, m2])
730 730 # OPT: these next three could probably be False as well.
731 self.assertEqual(um.visitdir(b'.'), True)
731 self.assertEqual(um.visitdir(b''), True)
732 732 self.assertEqual(um.visitdir(b'dir'), True)
733 733 self.assertEqual(um.visitdir(b'dir/subdir'), True)
734 734 self.assertFalse(um.visitdir(b'dir/foo'))
735 735 self.assertFalse(um.visitdir(b'folder'))
736 736 self.assertEqual(um.visitdir(b'dir/subdir/z'), b'all')
737 737 self.assertEqual(um.visitdir(b'dir/subdir/x'), b'all')
738 738
739 739 def testVisitchildrensetIncludeInclude4(self):
740 740 m1 = matchmod.match(b'', b'', include=[b'path:dir/subdir/x'])
741 741 m2 = matchmod.match(b'', b'', include=[b'path:dir/subdir/z'])
742 742 um = matchmod.unionmatcher([m1, m2])
743 self.assertEqual(um.visitchildrenset(b'.'), {b'dir'})
743 self.assertEqual(um.visitchildrenset(b''), {b'dir'})
744 744 self.assertEqual(um.visitchildrenset(b'dir'), {b'subdir'})
745 745 self.assertEqual(um.visitchildrenset(b'dir/subdir'), {b'x', b'z'})
746 746 self.assertEqual(um.visitchildrenset(b'dir/foo'), set())
747 747 self.assertEqual(um.visitchildrenset(b'folder'), set())
748 748 self.assertEqual(um.visitchildrenset(b'dir/subdir/z'), b'all')
749 749 self.assertEqual(um.visitchildrenset(b'dir/subdir/x'), b'all')
750 750
751 751 class SubdirMatcherTests(unittest.TestCase):
752 752
753 753 def testVisitdir(self):
754 754 m = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
755 755 sm = matchmod.subdirmatcher(b'dir', m)
756 756
757 self.assertEqual(sm.visitdir(b'.'), True)
757 self.assertEqual(sm.visitdir(b''), True)
758 758 self.assertEqual(sm.visitdir(b'subdir'), b'all')
759 759 # OPT: These next two should probably be 'all' not True.
760 760 self.assertEqual(sm.visitdir(b'subdir/x'), True)
761 761 self.assertEqual(sm.visitdir(b'subdir/z'), True)
762 762 self.assertFalse(sm.visitdir(b'foo'))
763 763
764 764 def testVisitchildrenset(self):
765 765 m = matchmod.match(b'', b'', include=[b'path:dir/subdir'])
766 766 sm = matchmod.subdirmatcher(b'dir', m)
767 767
768 self.assertEqual(sm.visitchildrenset(b'.'), {b'subdir'})
768 self.assertEqual(sm.visitchildrenset(b''), {b'subdir'})
769 769 self.assertEqual(sm.visitchildrenset(b'subdir'), b'all')
770 770 # OPT: These next two should probably be 'all' not 'this'.
771 771 self.assertEqual(sm.visitchildrenset(b'subdir/x'), b'this')
772 772 self.assertEqual(sm.visitchildrenset(b'subdir/z'), b'this')
773 773 self.assertEqual(sm.visitchildrenset(b'foo'), set())
774 774
775 775 class PrefixdirMatcherTests(unittest.TestCase):
776 776
777 777 def testVisitdir(self):
778 778 m = matchmod.match(util.localpath(b'root/d'), b'e/f',
779 779 [b'../a.txt', b'b.txt'])
780 780 pm = matchmod.prefixdirmatcher(b'd', m)
781 781
782 782 # `m` elides 'd' because it's part of the root, and the rest of the
783 783 # patterns are relative.
784 784 self.assertEqual(bool(m(b'a.txt')), False)
785 785 self.assertEqual(bool(m(b'b.txt')), False)
786 786 self.assertEqual(bool(m(b'e/a.txt')), True)
787 787 self.assertEqual(bool(m(b'e/b.txt')), False)
788 788 self.assertEqual(bool(m(b'e/f/b.txt')), True)
789 789
790 790 # The prefix matcher re-adds 'd' to the paths, so they need to be
791 791 # specified when using the prefixdirmatcher.
792 792 self.assertEqual(bool(pm(b'a.txt')), False)
793 793 self.assertEqual(bool(pm(b'b.txt')), False)
794 794 self.assertEqual(bool(pm(b'd/e/a.txt')), True)
795 795 self.assertEqual(bool(pm(b'd/e/b.txt')), False)
796 796 self.assertEqual(bool(pm(b'd/e/f/b.txt')), True)
797 797
798 self.assertEqual(m.visitdir(b'.'), True)
798 self.assertEqual(m.visitdir(b''), True)
799 799 self.assertEqual(m.visitdir(b'e'), True)
800 800 self.assertEqual(m.visitdir(b'e/f'), True)
801 801 self.assertEqual(m.visitdir(b'e/f/g'), False)
802 802
803 self.assertEqual(pm.visitdir(b'.'), True)
803 self.assertEqual(pm.visitdir(b''), True)
804 804 self.assertEqual(pm.visitdir(b'd'), True)
805 805 self.assertEqual(pm.visitdir(b'd/e'), True)
806 806 self.assertEqual(pm.visitdir(b'd/e/f'), True)
807 807 self.assertEqual(pm.visitdir(b'd/e/f/g'), False)
808 808
809 809 def testVisitchildrenset(self):
810 810 m = matchmod.match(util.localpath(b'root/d'), b'e/f',
811 811 [b'../a.txt', b'b.txt'])
812 812 pm = matchmod.prefixdirmatcher(b'd', m)
813 813
814 814 # OPT: visitchildrenset could possibly return {'e'} and {'f'} for these
815 815 # next two, respectively; patternmatcher does not have this
816 816 # optimization.
817 self.assertEqual(m.visitchildrenset(b'.'), b'this')
817 self.assertEqual(m.visitchildrenset(b''), b'this')
818 818 self.assertEqual(m.visitchildrenset(b'e'), b'this')
819 819 self.assertEqual(m.visitchildrenset(b'e/f'), b'this')
820 820 self.assertEqual(m.visitchildrenset(b'e/f/g'), set())
821 821
822 822 # OPT: visitchildrenset could possibly return {'d'}, {'e'}, and {'f'}
823 823 # for these next three, respectively; patternmatcher does not have this
824 824 # optimization.
825 self.assertEqual(pm.visitchildrenset(b'.'), b'this')
825 self.assertEqual(pm.visitchildrenset(b''), b'this')
826 826 self.assertEqual(pm.visitchildrenset(b'd'), b'this')
827 827 self.assertEqual(pm.visitchildrenset(b'd/e'), b'this')
828 828 self.assertEqual(pm.visitchildrenset(b'd/e/f'), b'this')
829 829 self.assertEqual(pm.visitchildrenset(b'd/e/f/g'), set())
830 830
831 831 if __name__ == '__main__':
832 832 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now