##// END OF EJS Templates
sidedata: rename `encode_copies_sidedata` to `encode_files_sidedata`...
marmoute -
r46143:64d18e9e default
parent child Browse files
Show More
@@ -1,597 +1,597
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import attr
16 from .thirdparty import attr
17
17
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 metadata,
21 metadata,
22 pycompat,
22 pycompat,
23 revlog,
23 revlog,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 dateutil,
26 dateutil,
27 stringutil,
27 stringutil,
28 )
28 )
29
29
30 from .revlogutils import sidedata as sidedatamod
30 from .revlogutils import sidedata as sidedatamod
31
31
32 _defaultextra = {b'branch': b'default'}
32 _defaultextra = {b'branch': b'default'}
33
33
34
34
35 def _string_escape(text):
35 def _string_escape(text):
36 """
36 """
37 >>> from .pycompat import bytechr as chr
37 >>> from .pycompat import bytechr as chr
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s
40 >>> s
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 >>> res = _string_escape(s)
42 >>> res = _string_escape(s)
43 >>> s == _string_unescape(res)
43 >>> s == _string_unescape(res)
44 True
44 True
45 """
45 """
46 # subset of the string_escape codec
46 # subset of the string_escape codec
47 text = (
47 text = (
48 text.replace(b'\\', b'\\\\')
48 text.replace(b'\\', b'\\\\')
49 .replace(b'\n', b'\\n')
49 .replace(b'\n', b'\\n')
50 .replace(b'\r', b'\\r')
50 .replace(b'\r', b'\\r')
51 )
51 )
52 return text.replace(b'\0', b'\\0')
52 return text.replace(b'\0', b'\\0')
53
53
54
54
55 def _string_unescape(text):
55 def _string_unescape(text):
56 if b'\\0' in text:
56 if b'\\0' in text:
57 # fix up \0 without getting into trouble with \\0
57 # fix up \0 without getting into trouble with \\0
58 text = text.replace(b'\\\\', b'\\\\\n')
58 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\0', b'\0')
59 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\n', b'')
60 text = text.replace(b'\n', b'')
61 return stringutil.unescapestr(text)
61 return stringutil.unescapestr(text)
62
62
63
63
64 def decodeextra(text):
64 def decodeextra(text):
65 """
65 """
66 >>> from .pycompat import bytechr as chr
66 >>> from .pycompat import bytechr as chr
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 ... ).items())
68 ... ).items())
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 ... b'baz': chr(92) + chr(0) + b'2'})
71 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... ).items())
72 ... ).items())
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 """
74 """
75 extra = _defaultextra.copy()
75 extra = _defaultextra.copy()
76 for l in text.split(b'\0'):
76 for l in text.split(b'\0'):
77 if l:
77 if l:
78 k, v = _string_unescape(l).split(b':', 1)
78 k, v = _string_unescape(l).split(b':', 1)
79 extra[k] = v
79 extra[k] = v
80 return extra
80 return extra
81
81
82
82
83 def encodeextra(d):
83 def encodeextra(d):
84 # keys must be sorted to produce a deterministic changelog entry
84 # keys must be sorted to produce a deterministic changelog entry
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 return b"\0".join(items)
86 return b"\0".join(items)
87
87
88
88
89 def stripdesc(desc):
89 def stripdesc(desc):
90 """strip trailing whitespace and leading and trailing empty lines"""
90 """strip trailing whitespace and leading and trailing empty lines"""
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92
92
93
93
94 class appender(object):
94 class appender(object):
95 '''the changelog index must be updated last on disk, so we use this class
95 '''the changelog index must be updated last on disk, so we use this class
96 to delay writes to it'''
96 to delay writes to it'''
97
97
98 def __init__(self, vfs, name, mode, buf):
98 def __init__(self, vfs, name, mode, buf):
99 self.data = buf
99 self.data = buf
100 fp = vfs(name, mode)
100 fp = vfs(name, mode)
101 self.fp = fp
101 self.fp = fp
102 self.offset = fp.tell()
102 self.offset = fp.tell()
103 self.size = vfs.fstat(fp).st_size
103 self.size = vfs.fstat(fp).st_size
104 self._end = self.size
104 self._end = self.size
105
105
106 def end(self):
106 def end(self):
107 return self._end
107 return self._end
108
108
109 def tell(self):
109 def tell(self):
110 return self.offset
110 return self.offset
111
111
112 def flush(self):
112 def flush(self):
113 pass
113 pass
114
114
115 @property
115 @property
116 def closed(self):
116 def closed(self):
117 return self.fp.closed
117 return self.fp.closed
118
118
119 def close(self):
119 def close(self):
120 self.fp.close()
120 self.fp.close()
121
121
122 def seek(self, offset, whence=0):
122 def seek(self, offset, whence=0):
123 '''virtual file offset spans real file and data'''
123 '''virtual file offset spans real file and data'''
124 if whence == 0:
124 if whence == 0:
125 self.offset = offset
125 self.offset = offset
126 elif whence == 1:
126 elif whence == 1:
127 self.offset += offset
127 self.offset += offset
128 elif whence == 2:
128 elif whence == 2:
129 self.offset = self.end() + offset
129 self.offset = self.end() + offset
130 if self.offset < self.size:
130 if self.offset < self.size:
131 self.fp.seek(self.offset)
131 self.fp.seek(self.offset)
132
132
133 def read(self, count=-1):
133 def read(self, count=-1):
134 '''only trick here is reads that span real file and data'''
134 '''only trick here is reads that span real file and data'''
135 ret = b""
135 ret = b""
136 if self.offset < self.size:
136 if self.offset < self.size:
137 s = self.fp.read(count)
137 s = self.fp.read(count)
138 ret = s
138 ret = s
139 self.offset += len(s)
139 self.offset += len(s)
140 if count > 0:
140 if count > 0:
141 count -= len(s)
141 count -= len(s)
142 if count != 0:
142 if count != 0:
143 doff = self.offset - self.size
143 doff = self.offset - self.size
144 self.data.insert(0, b"".join(self.data))
144 self.data.insert(0, b"".join(self.data))
145 del self.data[1:]
145 del self.data[1:]
146 s = self.data[0][doff : doff + count]
146 s = self.data[0][doff : doff + count]
147 self.offset += len(s)
147 self.offset += len(s)
148 ret += s
148 ret += s
149 return ret
149 return ret
150
150
151 def write(self, s):
151 def write(self, s):
152 self.data.append(bytes(s))
152 self.data.append(bytes(s))
153 self.offset += len(s)
153 self.offset += len(s)
154 self._end += len(s)
154 self._end += len(s)
155
155
156 def __enter__(self):
156 def __enter__(self):
157 self.fp.__enter__()
157 self.fp.__enter__()
158 return self
158 return self
159
159
160 def __exit__(self, *args):
160 def __exit__(self, *args):
161 return self.fp.__exit__(*args)
161 return self.fp.__exit__(*args)
162
162
163
163
164 class _divertopener(object):
164 class _divertopener(object):
165 def __init__(self, opener, target):
165 def __init__(self, opener, target):
166 self._opener = opener
166 self._opener = opener
167 self._target = target
167 self._target = target
168
168
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 if name != self._target:
170 if name != self._target:
171 return self._opener(name, mode, **kwargs)
171 return self._opener(name, mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
173
173
174 def __getattr__(self, attr):
174 def __getattr__(self, attr):
175 return getattr(self._opener, attr)
175 return getattr(self._opener, attr)
176
176
177
177
178 def _delayopener(opener, target, buf):
178 def _delayopener(opener, target, buf):
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180
180
181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 if name != target:
182 if name != target:
183 return opener(name, mode, **kwargs)
183 return opener(name, mode, **kwargs)
184 assert not kwargs
184 assert not kwargs
185 return appender(opener, name, mode, buf)
185 return appender(opener, name, mode, buf)
186
186
187 return _delay
187 return _delay
188
188
189
189
190 @attr.s
190 @attr.s
191 class _changelogrevision(object):
191 class _changelogrevision(object):
192 # Extensions might modify _defaultextra, so let the constructor below pass
192 # Extensions might modify _defaultextra, so let the constructor below pass
193 # it in
193 # it in
194 extra = attr.ib()
194 extra = attr.ib()
195 manifest = attr.ib(default=nullid)
195 manifest = attr.ib(default=nullid)
196 user = attr.ib(default=b'')
196 user = attr.ib(default=b'')
197 date = attr.ib(default=(0, 0))
197 date = attr.ib(default=(0, 0))
198 files = attr.ib(default=attr.Factory(list))
198 files = attr.ib(default=attr.Factory(list))
199 filesadded = attr.ib(default=None)
199 filesadded = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
203 description = attr.ib(default=b'')
203 description = attr.ib(default=b'')
204
204
205
205
206 class changelogrevision(object):
206 class changelogrevision(object):
207 """Holds results of a parsed changelog revision.
207 """Holds results of a parsed changelog revision.
208
208
209 Changelog revisions consist of multiple pieces of data, including
209 Changelog revisions consist of multiple pieces of data, including
210 the manifest node, user, and date. This object exposes a view into
210 the manifest node, user, and date. This object exposes a view into
211 the parsed object.
211 the parsed object.
212 """
212 """
213
213
214 __slots__ = (
214 __slots__ = (
215 '_offsets',
215 '_offsets',
216 '_text',
216 '_text',
217 '_sidedata',
217 '_sidedata',
218 '_cpsd',
218 '_cpsd',
219 )
219 )
220
220
221 def __new__(cls, text, sidedata, cpsd):
221 def __new__(cls, text, sidedata, cpsd):
222 if not text:
222 if not text:
223 return _changelogrevision(extra=_defaultextra)
223 return _changelogrevision(extra=_defaultextra)
224
224
225 self = super(changelogrevision, cls).__new__(cls)
225 self = super(changelogrevision, cls).__new__(cls)
226 # We could return here and implement the following as an __init__.
226 # We could return here and implement the following as an __init__.
227 # But doing it here is equivalent and saves an extra function call.
227 # But doing it here is equivalent and saves an extra function call.
228
228
229 # format used:
229 # format used:
230 # nodeid\n : manifest node in ascii
230 # nodeid\n : manifest node in ascii
231 # user\n : user, no \n or \r allowed
231 # user\n : user, no \n or \r allowed
232 # time tz extra\n : date (time is int or float, timezone is int)
232 # time tz extra\n : date (time is int or float, timezone is int)
233 # : extra is metadata, encoded and separated by '\0'
233 # : extra is metadata, encoded and separated by '\0'
234 # : older versions ignore it
234 # : older versions ignore it
235 # files\n\n : files modified by the cset, no \n or \r allowed
235 # files\n\n : files modified by the cset, no \n or \r allowed
236 # (.*) : comment (free text, ideally utf-8)
236 # (.*) : comment (free text, ideally utf-8)
237 #
237 #
238 # changelog v0 doesn't use extra
238 # changelog v0 doesn't use extra
239
239
240 nl1 = text.index(b'\n')
240 nl1 = text.index(b'\n')
241 nl2 = text.index(b'\n', nl1 + 1)
241 nl2 = text.index(b'\n', nl1 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
243
243
244 # The list of files may be empty. Which means nl3 is the first of the
244 # The list of files may be empty. Which means nl3 is the first of the
245 # double newline that precedes the description.
245 # double newline that precedes the description.
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 doublenl = nl3
247 doublenl = nl3
248 else:
248 else:
249 doublenl = text.index(b'\n\n', nl3 + 1)
249 doublenl = text.index(b'\n\n', nl3 + 1)
250
250
251 self._offsets = (nl1, nl2, nl3, doublenl)
251 self._offsets = (nl1, nl2, nl3, doublenl)
252 self._text = text
252 self._text = text
253 self._sidedata = sidedata
253 self._sidedata = sidedata
254 self._cpsd = cpsd
254 self._cpsd = cpsd
255
255
256 return self
256 return self
257
257
258 @property
258 @property
259 def manifest(self):
259 def manifest(self):
260 return bin(self._text[0 : self._offsets[0]])
260 return bin(self._text[0 : self._offsets[0]])
261
261
262 @property
262 @property
263 def user(self):
263 def user(self):
264 off = self._offsets
264 off = self._offsets
265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266
266
267 @property
267 @property
268 def _rawdate(self):
268 def _rawdate(self):
269 off = self._offsets
269 off = self._offsets
270 dateextra = self._text[off[1] + 1 : off[2]]
270 dateextra = self._text[off[1] + 1 : off[2]]
271 return dateextra.split(b' ', 2)[0:2]
271 return dateextra.split(b' ', 2)[0:2]
272
272
273 @property
273 @property
274 def _rawextra(self):
274 def _rawextra(self):
275 off = self._offsets
275 off = self._offsets
276 dateextra = self._text[off[1] + 1 : off[2]]
276 dateextra = self._text[off[1] + 1 : off[2]]
277 fields = dateextra.split(b' ', 2)
277 fields = dateextra.split(b' ', 2)
278 if len(fields) != 3:
278 if len(fields) != 3:
279 return None
279 return None
280
280
281 return fields[2]
281 return fields[2]
282
282
283 @property
283 @property
284 def date(self):
284 def date(self):
285 raw = self._rawdate
285 raw = self._rawdate
286 time = float(raw[0])
286 time = float(raw[0])
287 # Various tools did silly things with the timezone.
287 # Various tools did silly things with the timezone.
288 try:
288 try:
289 timezone = int(raw[1])
289 timezone = int(raw[1])
290 except ValueError:
290 except ValueError:
291 timezone = 0
291 timezone = 0
292
292
293 return time, timezone
293 return time, timezone
294
294
295 @property
295 @property
296 def extra(self):
296 def extra(self):
297 raw = self._rawextra
297 raw = self._rawextra
298 if raw is None:
298 if raw is None:
299 return _defaultextra
299 return _defaultextra
300
300
301 return decodeextra(raw)
301 return decodeextra(raw)
302
302
303 @property
303 @property
304 def files(self):
304 def files(self):
305 off = self._offsets
305 off = self._offsets
306 if off[2] == off[3]:
306 if off[2] == off[3]:
307 return []
307 return []
308
308
309 return self._text[off[2] + 1 : off[3]].split(b'\n')
309 return self._text[off[2] + 1 : off[3]].split(b'\n')
310
310
311 @property
311 @property
312 def filesadded(self):
312 def filesadded(self):
313 if self._cpsd:
313 if self._cpsd:
314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
315 if not rawindices:
315 if not rawindices:
316 return []
316 return []
317 else:
317 else:
318 rawindices = self.extra.get(b'filesadded')
318 rawindices = self.extra.get(b'filesadded')
319 if rawindices is None:
319 if rawindices is None:
320 return None
320 return None
321 return metadata.decodefileindices(self.files, rawindices)
321 return metadata.decodefileindices(self.files, rawindices)
322
322
323 @property
323 @property
324 def filesremoved(self):
324 def filesremoved(self):
325 if self._cpsd:
325 if self._cpsd:
326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
327 if not rawindices:
327 if not rawindices:
328 return []
328 return []
329 else:
329 else:
330 rawindices = self.extra.get(b'filesremoved')
330 rawindices = self.extra.get(b'filesremoved')
331 if rawindices is None:
331 if rawindices is None:
332 return None
332 return None
333 return metadata.decodefileindices(self.files, rawindices)
333 return metadata.decodefileindices(self.files, rawindices)
334
334
335 @property
335 @property
336 def p1copies(self):
336 def p1copies(self):
337 if self._cpsd:
337 if self._cpsd:
338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
339 if not rawcopies:
339 if not rawcopies:
340 return {}
340 return {}
341 else:
341 else:
342 rawcopies = self.extra.get(b'p1copies')
342 rawcopies = self.extra.get(b'p1copies')
343 if rawcopies is None:
343 if rawcopies is None:
344 return None
344 return None
345 return metadata.decodecopies(self.files, rawcopies)
345 return metadata.decodecopies(self.files, rawcopies)
346
346
347 @property
347 @property
348 def p2copies(self):
348 def p2copies(self):
349 if self._cpsd:
349 if self._cpsd:
350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
351 if not rawcopies:
351 if not rawcopies:
352 return {}
352 return {}
353 else:
353 else:
354 rawcopies = self.extra.get(b'p2copies')
354 rawcopies = self.extra.get(b'p2copies')
355 if rawcopies is None:
355 if rawcopies is None:
356 return None
356 return None
357 return metadata.decodecopies(self.files, rawcopies)
357 return metadata.decodecopies(self.files, rawcopies)
358
358
359 @property
359 @property
360 def description(self):
360 def description(self):
361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
362
362
363
363
364 class changelog(revlog.revlog):
364 class changelog(revlog.revlog):
365 def __init__(self, opener, trypending=False):
365 def __init__(self, opener, trypending=False):
366 """Load a changelog revlog using an opener.
366 """Load a changelog revlog using an opener.
367
367
368 If ``trypending`` is true, we attempt to load the index from a
368 If ``trypending`` is true, we attempt to load the index from a
369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
370 The ``00changelog.i.a`` file contains index (and possibly inline
370 The ``00changelog.i.a`` file contains index (and possibly inline
371 revision) data for a transaction that hasn't been finalized yet.
371 revision) data for a transaction that hasn't been finalized yet.
372 It exists in a separate file to facilitate readers (such as
372 It exists in a separate file to facilitate readers (such as
373 hooks processes) accessing data before a transaction is finalized.
373 hooks processes) accessing data before a transaction is finalized.
374 """
374 """
375 if trypending and opener.exists(b'00changelog.i.a'):
375 if trypending and opener.exists(b'00changelog.i.a'):
376 indexfile = b'00changelog.i.a'
376 indexfile = b'00changelog.i.a'
377 else:
377 else:
378 indexfile = b'00changelog.i'
378 indexfile = b'00changelog.i'
379
379
380 datafile = b'00changelog.d'
380 datafile = b'00changelog.d'
381 revlog.revlog.__init__(
381 revlog.revlog.__init__(
382 self,
382 self,
383 opener,
383 opener,
384 indexfile,
384 indexfile,
385 datafile=datafile,
385 datafile=datafile,
386 checkambig=True,
386 checkambig=True,
387 mmaplargeindex=True,
387 mmaplargeindex=True,
388 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
388 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
389 )
389 )
390
390
391 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
391 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
392 # changelogs don't benefit from generaldelta.
392 # changelogs don't benefit from generaldelta.
393
393
394 self.version &= ~revlog.FLAG_GENERALDELTA
394 self.version &= ~revlog.FLAG_GENERALDELTA
395 self._generaldelta = False
395 self._generaldelta = False
396
396
397 # Delta chains for changelogs tend to be very small because entries
397 # Delta chains for changelogs tend to be very small because entries
398 # tend to be small and don't delta well with each. So disable delta
398 # tend to be small and don't delta well with each. So disable delta
399 # chains.
399 # chains.
400 self._storedeltachains = False
400 self._storedeltachains = False
401
401
402 self._realopener = opener
402 self._realopener = opener
403 self._delayed = False
403 self._delayed = False
404 self._delaybuf = None
404 self._delaybuf = None
405 self._divert = False
405 self._divert = False
406 self._filteredrevs = frozenset()
406 self._filteredrevs = frozenset()
407 self._filteredrevs_hashcache = {}
407 self._filteredrevs_hashcache = {}
408 self._copiesstorage = opener.options.get(b'copies-storage')
408 self._copiesstorage = opener.options.get(b'copies-storage')
409
409
410 @property
410 @property
411 def filteredrevs(self):
411 def filteredrevs(self):
412 return self._filteredrevs
412 return self._filteredrevs
413
413
414 @filteredrevs.setter
414 @filteredrevs.setter
415 def filteredrevs(self, val):
415 def filteredrevs(self, val):
416 # Ensure all updates go through this function
416 # Ensure all updates go through this function
417 assert isinstance(val, frozenset)
417 assert isinstance(val, frozenset)
418 self._filteredrevs = val
418 self._filteredrevs = val
419 self._filteredrevs_hashcache = {}
419 self._filteredrevs_hashcache = {}
420
420
421 def delayupdate(self, tr):
421 def delayupdate(self, tr):
422 """delay visibility of index updates to other readers"""
422 """delay visibility of index updates to other readers"""
423
423
424 if not self._delayed:
424 if not self._delayed:
425 if len(self) == 0:
425 if len(self) == 0:
426 self._divert = True
426 self._divert = True
427 if self._realopener.exists(self.indexfile + b'.a'):
427 if self._realopener.exists(self.indexfile + b'.a'):
428 self._realopener.unlink(self.indexfile + b'.a')
428 self._realopener.unlink(self.indexfile + b'.a')
429 self.opener = _divertopener(self._realopener, self.indexfile)
429 self.opener = _divertopener(self._realopener, self.indexfile)
430 else:
430 else:
431 self._delaybuf = []
431 self._delaybuf = []
432 self.opener = _delayopener(
432 self.opener = _delayopener(
433 self._realopener, self.indexfile, self._delaybuf
433 self._realopener, self.indexfile, self._delaybuf
434 )
434 )
435 self._delayed = True
435 self._delayed = True
436 tr.addpending(b'cl-%i' % id(self), self._writepending)
436 tr.addpending(b'cl-%i' % id(self), self._writepending)
437 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
437 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
438
438
439 def _finalize(self, tr):
439 def _finalize(self, tr):
440 """finalize index updates"""
440 """finalize index updates"""
441 self._delayed = False
441 self._delayed = False
442 self.opener = self._realopener
442 self.opener = self._realopener
443 # move redirected index data back into place
443 # move redirected index data back into place
444 if self._divert:
444 if self._divert:
445 assert not self._delaybuf
445 assert not self._delaybuf
446 tmpname = self.indexfile + b".a"
446 tmpname = self.indexfile + b".a"
447 nfile = self.opener.open(tmpname)
447 nfile = self.opener.open(tmpname)
448 nfile.close()
448 nfile.close()
449 self.opener.rename(tmpname, self.indexfile, checkambig=True)
449 self.opener.rename(tmpname, self.indexfile, checkambig=True)
450 elif self._delaybuf:
450 elif self._delaybuf:
451 fp = self.opener(self.indexfile, b'a', checkambig=True)
451 fp = self.opener(self.indexfile, b'a', checkambig=True)
452 fp.write(b"".join(self._delaybuf))
452 fp.write(b"".join(self._delaybuf))
453 fp.close()
453 fp.close()
454 self._delaybuf = None
454 self._delaybuf = None
455 self._divert = False
455 self._divert = False
456 # split when we're done
456 # split when we're done
457 self._enforceinlinesize(tr)
457 self._enforceinlinesize(tr)
458
458
459 def _writepending(self, tr):
459 def _writepending(self, tr):
460 """create a file containing the unfinalized state for
460 """create a file containing the unfinalized state for
461 pretxnchangegroup"""
461 pretxnchangegroup"""
462 if self._delaybuf:
462 if self._delaybuf:
463 # make a temporary copy of the index
463 # make a temporary copy of the index
464 fp1 = self._realopener(self.indexfile)
464 fp1 = self._realopener(self.indexfile)
465 pendingfilename = self.indexfile + b".a"
465 pendingfilename = self.indexfile + b".a"
466 # register as a temp file to ensure cleanup on failure
466 # register as a temp file to ensure cleanup on failure
467 tr.registertmp(pendingfilename)
467 tr.registertmp(pendingfilename)
468 # write existing data
468 # write existing data
469 fp2 = self._realopener(pendingfilename, b"w")
469 fp2 = self._realopener(pendingfilename, b"w")
470 fp2.write(fp1.read())
470 fp2.write(fp1.read())
471 # add pending data
471 # add pending data
472 fp2.write(b"".join(self._delaybuf))
472 fp2.write(b"".join(self._delaybuf))
473 fp2.close()
473 fp2.close()
474 # switch modes so finalize can simply rename
474 # switch modes so finalize can simply rename
475 self._delaybuf = None
475 self._delaybuf = None
476 self._divert = True
476 self._divert = True
477 self.opener = _divertopener(self._realopener, self.indexfile)
477 self.opener = _divertopener(self._realopener, self.indexfile)
478
478
479 if self._divert:
479 if self._divert:
480 return True
480 return True
481
481
482 return False
482 return False
483
483
484 def _enforceinlinesize(self, tr, fp=None):
484 def _enforceinlinesize(self, tr, fp=None):
485 if not self._delayed:
485 if not self._delayed:
486 revlog.revlog._enforceinlinesize(self, tr, fp)
486 revlog.revlog._enforceinlinesize(self, tr, fp)
487
487
488 def read(self, node):
488 def read(self, node):
489 """Obtain data from a parsed changelog revision.
489 """Obtain data from a parsed changelog revision.
490
490
491 Returns a 6-tuple of:
491 Returns a 6-tuple of:
492
492
493 - manifest node in binary
493 - manifest node in binary
494 - author/user as a localstr
494 - author/user as a localstr
495 - date as a 2-tuple of (time, timezone)
495 - date as a 2-tuple of (time, timezone)
496 - list of files
496 - list of files
497 - commit message as a localstr
497 - commit message as a localstr
498 - dict of extra metadata
498 - dict of extra metadata
499
499
500 Unless you need to access all fields, consider calling
500 Unless you need to access all fields, consider calling
501 ``changelogrevision`` instead, as it is faster for partial object
501 ``changelogrevision`` instead, as it is faster for partial object
502 access.
502 access.
503 """
503 """
504 d, s = self._revisiondata(node)
504 d, s = self._revisiondata(node)
505 c = changelogrevision(
505 c = changelogrevision(
506 d, s, self._copiesstorage == b'changeset-sidedata'
506 d, s, self._copiesstorage == b'changeset-sidedata'
507 )
507 )
508 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
508 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
509
509
510 def changelogrevision(self, nodeorrev):
510 def changelogrevision(self, nodeorrev):
511 """Obtain a ``changelogrevision`` for a node or revision."""
511 """Obtain a ``changelogrevision`` for a node or revision."""
512 text, sidedata = self._revisiondata(nodeorrev)
512 text, sidedata = self._revisiondata(nodeorrev)
513 return changelogrevision(
513 return changelogrevision(
514 text, sidedata, self._copiesstorage == b'changeset-sidedata'
514 text, sidedata, self._copiesstorage == b'changeset-sidedata'
515 )
515 )
516
516
517 def readfiles(self, node):
517 def readfiles(self, node):
518 """
518 """
519 short version of read that only returns the files modified by the cset
519 short version of read that only returns the files modified by the cset
520 """
520 """
521 text = self.revision(node)
521 text = self.revision(node)
522 if not text:
522 if not text:
523 return []
523 return []
524 last = text.index(b"\n\n")
524 last = text.index(b"\n\n")
525 l = text[:last].split(b'\n')
525 l = text[:last].split(b'\n')
526 return l[3:]
526 return l[3:]
527
527
528 def add(
528 def add(
529 self,
529 self,
530 manifest,
530 manifest,
531 files,
531 files,
532 desc,
532 desc,
533 transaction,
533 transaction,
534 p1,
534 p1,
535 p2,
535 p2,
536 user,
536 user,
537 date=None,
537 date=None,
538 extra=None,
538 extra=None,
539 ):
539 ):
540 # Convert to UTF-8 encoded bytestrings as the very first
540 # Convert to UTF-8 encoded bytestrings as the very first
541 # thing: calling any method on a localstr object will turn it
541 # thing: calling any method on a localstr object will turn it
542 # into a str object and the cached UTF-8 string is thus lost.
542 # into a str object and the cached UTF-8 string is thus lost.
543 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
543 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
544
544
545 user = user.strip()
545 user = user.strip()
546 # An empty username or a username with a "\n" will make the
546 # An empty username or a username with a "\n" will make the
547 # revision text contain two "\n\n" sequences -> corrupt
547 # revision text contain two "\n\n" sequences -> corrupt
548 # repository since read cannot unpack the revision.
548 # repository since read cannot unpack the revision.
549 if not user:
549 if not user:
550 raise error.StorageError(_(b"empty username"))
550 raise error.StorageError(_(b"empty username"))
551 if b"\n" in user:
551 if b"\n" in user:
552 raise error.StorageError(
552 raise error.StorageError(
553 _(b"username %r contains a newline") % pycompat.bytestr(user)
553 _(b"username %r contains a newline") % pycompat.bytestr(user)
554 )
554 )
555
555
556 desc = stripdesc(desc)
556 desc = stripdesc(desc)
557
557
558 if date:
558 if date:
559 parseddate = b"%d %d" % dateutil.parsedate(date)
559 parseddate = b"%d %d" % dateutil.parsedate(date)
560 else:
560 else:
561 parseddate = b"%d %d" % dateutil.makedate()
561 parseddate = b"%d %d" % dateutil.makedate()
562 if extra:
562 if extra:
563 branch = extra.get(b"branch")
563 branch = extra.get(b"branch")
564 if branch in (b"default", b""):
564 if branch in (b"default", b""):
565 del extra[b"branch"]
565 del extra[b"branch"]
566 elif branch in (b".", b"null", b"tip"):
566 elif branch in (b".", b"null", b"tip"):
567 raise error.StorageError(
567 raise error.StorageError(
568 _(b'the name \'%s\' is reserved') % branch
568 _(b'the name \'%s\' is reserved') % branch
569 )
569 )
570 sortedfiles = sorted(files.touched)
570 sortedfiles = sorted(files.touched)
571 sidedata = None
571 sidedata = None
572 if self._copiesstorage == b'changeset-sidedata':
572 if self._copiesstorage == b'changeset-sidedata':
573 sidedata = metadata.encode_copies_sidedata(files)
573 sidedata = metadata.encode_files_sidedata(files)
574
574
575 if extra:
575 if extra:
576 extra = encodeextra(extra)
576 extra = encodeextra(extra)
577 parseddate = b"%s %s" % (parseddate, extra)
577 parseddate = b"%s %s" % (parseddate, extra)
578 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
578 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
579 text = b"\n".join(l)
579 text = b"\n".join(l)
580 return self.addrevision(
580 return self.addrevision(
581 text, transaction, len(self), p1, p2, sidedata=sidedata
581 text, transaction, len(self), p1, p2, sidedata=sidedata
582 )
582 )
583
583
584 def branchinfo(self, rev):
584 def branchinfo(self, rev):
585 """return the branch name and open/close state of a revision
585 """return the branch name and open/close state of a revision
586
586
587 This function exists because creating a changectx object
587 This function exists because creating a changectx object
588 just to access this is costly."""
588 just to access this is costly."""
589 extra = self.read(rev)[5]
589 extra = self.read(rev)[5]
590 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
590 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
591
591
592 def _nodeduplicatecallback(self, transaction, node):
592 def _nodeduplicatecallback(self, transaction, node):
593 # keep track of revisions that got "re-added", eg: unbunde of know rev.
593 # keep track of revisions that got "re-added", eg: unbunde of know rev.
594 #
594 #
595 # We track them in a list to preserve their order from the source bundle
595 # We track them in a list to preserve their order from the source bundle
596 duplicates = transaction.changes.setdefault(b'revduplicates', [])
596 duplicates = transaction.changes.setdefault(b'revduplicates', [])
597 duplicates.append(self.rev(node))
597 duplicates.append(self.rev(node))
@@ -1,488 +1,488
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11
11
12 from . import (
12 from . import (
13 error,
13 error,
14 node,
14 node,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from .revlogutils import (
19 from .revlogutils import (
20 flagutil as sidedataflag,
20 flagutil as sidedataflag,
21 sidedata as sidedatamod,
21 sidedata as sidedatamod,
22 )
22 )
23
23
24
24
25 class ChangingFiles(object):
25 class ChangingFiles(object):
26 """A class recording the changes made to a file by a changeset
26 """A class recording the changes made to a file by a changeset
27
27
28 Actions performed on files are gathered into 3 sets:
28 Actions performed on files are gathered into 3 sets:
29
29
30 - added: files actively added in the changeset.
30 - added: files actively added in the changeset.
31 - removed: files removed in the revision
31 - removed: files removed in the revision
32 - touched: files affected by the merge
32 - touched: files affected by the merge
33
33
34 and copies information is held by 2 mappings
34 and copies information is held by 2 mappings
35
35
36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
38
38
39 See their inline help for details.
39 See their inline help for details.
40 """
40 """
41
41
42 def __init__(
42 def __init__(
43 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
43 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
44 ):
44 ):
45 self._added = set(added)
45 self._added = set(added)
46 self._removed = set(removed)
46 self._removed = set(removed)
47 self._touched = set(touched)
47 self._touched = set(touched)
48 self._touched.update(self._added)
48 self._touched.update(self._added)
49 self._touched.update(self._removed)
49 self._touched.update(self._removed)
50 self._p1_copies = dict(p1_copies)
50 self._p1_copies = dict(p1_copies)
51 self._p2_copies = dict(p2_copies)
51 self._p2_copies = dict(p2_copies)
52
52
53 def __eq__(self, other):
53 def __eq__(self, other):
54 return (
54 return (
55 self.added == other.added
55 self.added == other.added
56 and self.removed == other.removed
56 and self.removed == other.removed
57 and self.touched == other.touched
57 and self.touched == other.touched
58 and self.copied_from_p1 == other.copied_from_p1
58 and self.copied_from_p1 == other.copied_from_p1
59 and self.copied_from_p2 == other.copied_from_p2
59 and self.copied_from_p2 == other.copied_from_p2
60 )
60 )
61
61
62 @property
62 @property
63 def added(self):
63 def added(self):
64 """files actively added in the changeset
64 """files actively added in the changeset
65
65
66 Any file present in that revision that was absent in all the changeset's
66 Any file present in that revision that was absent in all the changeset's
67 parents.
67 parents.
68
68
69 In case of merge, this means a file absent in one of the parents but
69 In case of merge, this means a file absent in one of the parents but
70 existing in the other will *not* be contained in this set. (They were
70 existing in the other will *not* be contained in this set. (They were
71 added by an ancestor)
71 added by an ancestor)
72 """
72 """
73 return frozenset(self._added)
73 return frozenset(self._added)
74
74
75 def mark_added(self, filename):
75 def mark_added(self, filename):
76 self._added.add(filename)
76 self._added.add(filename)
77 self._touched.add(filename)
77 self._touched.add(filename)
78
78
79 def update_added(self, filenames):
79 def update_added(self, filenames):
80 for f in filenames:
80 for f in filenames:
81 self.mark_added(f)
81 self.mark_added(f)
82
82
83 @property
83 @property
84 def removed(self):
84 def removed(self):
85 """files actively removed by the changeset
85 """files actively removed by the changeset
86
86
87 In case of merge this will only contain the set of files removing "new"
87 In case of merge this will only contain the set of files removing "new"
88 content. For any file absent in the current changeset:
88 content. For any file absent in the current changeset:
89
89
90 a) If the file exists in both parents, it is clearly "actively" removed
90 a) If the file exists in both parents, it is clearly "actively" removed
91 by this changeset.
91 by this changeset.
92
92
93 b) If a file exists in only one parent and in none of the common
93 b) If a file exists in only one parent and in none of the common
94 ancestors, then the file was newly added in one of the merged branches
94 ancestors, then the file was newly added in one of the merged branches
95 and then got "actively" removed.
95 and then got "actively" removed.
96
96
97 c) If a file exists in only one parent and at least one of the common
97 c) If a file exists in only one parent and at least one of the common
98 ancestors using the same filenode, then the file was unchanged on one
98 ancestors using the same filenode, then the file was unchanged on one
99 side and deleted on the other side. The merge "passively" propagated
99 side and deleted on the other side. The merge "passively" propagated
100 that deletion, but didn't "actively" remove the file. In this case the
100 that deletion, but didn't "actively" remove the file. In this case the
101 file is *not* included in the `removed` set.
101 file is *not* included in the `removed` set.
102
102
103 d) If a file exists in only one parent and at least one of the common
103 d) If a file exists in only one parent and at least one of the common
104 ancestors using a different filenode, then the file was changed on one
104 ancestors using a different filenode, then the file was changed on one
105 side and removed on the other side. The merge process "actively"
105 side and removed on the other side. The merge process "actively"
106 decided to drop the new change and delete the file. Unlike in the
106 decided to drop the new change and delete the file. Unlike in the
107 previous case, (c), the file included in the `removed` set.
107 previous case, (c), the file included in the `removed` set.
108
108
109 Summary table for merge:
109 Summary table for merge:
110
110
111 case | exists in parents | exists in gca || removed
111 case | exists in parents | exists in gca || removed
112 (a) | both | * || yes
112 (a) | both | * || yes
113 (b) | one | none || yes
113 (b) | one | none || yes
114 (c) | one | same filenode || no
114 (c) | one | same filenode || no
115 (d) | one | new filenode || yes
115 (d) | one | new filenode || yes
116 """
116 """
117 return frozenset(self._removed)
117 return frozenset(self._removed)
118
118
119 def mark_removed(self, filename):
119 def mark_removed(self, filename):
120 self._removed.add(filename)
120 self._removed.add(filename)
121 self._touched.add(filename)
121 self._touched.add(filename)
122
122
123 def update_removed(self, filenames):
123 def update_removed(self, filenames):
124 for f in filenames:
124 for f in filenames:
125 self.mark_removed(f)
125 self.mark_removed(f)
126
126
127 @property
127 @property
128 def touched(self):
128 def touched(self):
129 """files either actively modified, added or removed"""
129 """files either actively modified, added or removed"""
130 return frozenset(self._touched)
130 return frozenset(self._touched)
131
131
132 def mark_touched(self, filename):
132 def mark_touched(self, filename):
133 self._touched.add(filename)
133 self._touched.add(filename)
134
134
135 def update_touched(self, filenames):
135 def update_touched(self, filenames):
136 for f in filenames:
136 for f in filenames:
137 self.mark_touched(f)
137 self.mark_touched(f)
138
138
139 @property
139 @property
140 def copied_from_p1(self):
140 def copied_from_p1(self):
141 return self._p1_copies.copy()
141 return self._p1_copies.copy()
142
142
143 def mark_copied_from_p1(self, source, dest):
143 def mark_copied_from_p1(self, source, dest):
144 self._p1_copies[dest] = source
144 self._p1_copies[dest] = source
145
145
146 def update_copies_from_p1(self, copies):
146 def update_copies_from_p1(self, copies):
147 for dest, source in copies.items():
147 for dest, source in copies.items():
148 self.mark_copied_from_p1(source, dest)
148 self.mark_copied_from_p1(source, dest)
149
149
150 @property
150 @property
151 def copied_from_p2(self):
151 def copied_from_p2(self):
152 return self._p2_copies.copy()
152 return self._p2_copies.copy()
153
153
154 def mark_copied_from_p2(self, source, dest):
154 def mark_copied_from_p2(self, source, dest):
155 self._p2_copies[dest] = source
155 self._p2_copies[dest] = source
156
156
157 def update_copies_from_p2(self, copies):
157 def update_copies_from_p2(self, copies):
158 for dest, source in copies.items():
158 for dest, source in copies.items():
159 self.mark_copied_from_p2(source, dest)
159 self.mark_copied_from_p2(source, dest)
160
160
161
161
162 def computechangesetfilesadded(ctx):
162 def computechangesetfilesadded(ctx):
163 """return the list of files added in a changeset
163 """return the list of files added in a changeset
164 """
164 """
165 added = []
165 added = []
166 for f in ctx.files():
166 for f in ctx.files():
167 if not any(f in p for p in ctx.parents()):
167 if not any(f in p for p in ctx.parents()):
168 added.append(f)
168 added.append(f)
169 return added
169 return added
170
170
171
171
172 def get_removal_filter(ctx, x=None):
172 def get_removal_filter(ctx, x=None):
173 """return a function to detect files "wrongly" detected as `removed`
173 """return a function to detect files "wrongly" detected as `removed`
174
174
175 When a file is removed relative to p1 in a merge, this
175 When a file is removed relative to p1 in a merge, this
176 function determines whether the absence is due to a
176 function determines whether the absence is due to a
177 deletion from a parent, or whether the merge commit
177 deletion from a parent, or whether the merge commit
178 itself deletes the file. We decide this by doing a
178 itself deletes the file. We decide this by doing a
179 simplified three way merge of the manifest entry for
179 simplified three way merge of the manifest entry for
180 the file. There are two ways we decide the merge
180 the file. There are two ways we decide the merge
181 itself didn't delete a file:
181 itself didn't delete a file:
182 - neither parent (nor the merge) contain the file
182 - neither parent (nor the merge) contain the file
183 - exactly one parent contains the file, and that
183 - exactly one parent contains the file, and that
184 parent has the same filelog entry as the merge
184 parent has the same filelog entry as the merge
185 ancestor (or all of them if there two). In other
185 ancestor (or all of them if there two). In other
186 words, that parent left the file unchanged while the
186 words, that parent left the file unchanged while the
187 other one deleted it.
187 other one deleted it.
188 One way to think about this is that deleting a file is
188 One way to think about this is that deleting a file is
189 similar to emptying it, so the list of changed files
189 similar to emptying it, so the list of changed files
190 should be similar either way. The computation
190 should be similar either way. The computation
191 described above is not done directly in _filecommit
191 described above is not done directly in _filecommit
192 when creating the list of changed files, however
192 when creating the list of changed files, however
193 it does something very similar by comparing filelog
193 it does something very similar by comparing filelog
194 nodes.
194 nodes.
195 """
195 """
196
196
197 if x is not None:
197 if x is not None:
198 p1, p2, m1, m2 = x
198 p1, p2, m1, m2 = x
199 else:
199 else:
200 p1 = ctx.p1()
200 p1 = ctx.p1()
201 p2 = ctx.p2()
201 p2 = ctx.p2()
202 m1 = p1.manifest()
202 m1 = p1.manifest()
203 m2 = p2.manifest()
203 m2 = p2.manifest()
204
204
205 @util.cachefunc
205 @util.cachefunc
206 def mas():
206 def mas():
207 p1n = p1.node()
207 p1n = p1.node()
208 p2n = p2.node()
208 p2n = p2.node()
209 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
209 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
210 if not cahs:
210 if not cahs:
211 cahs = [node.nullrev]
211 cahs = [node.nullrev]
212 return [ctx.repo()[r].manifest() for r in cahs]
212 return [ctx.repo()[r].manifest() for r in cahs]
213
213
214 def deletionfromparent(f):
214 def deletionfromparent(f):
215 if f in m1:
215 if f in m1:
216 return f not in m2 and all(
216 return f not in m2 and all(
217 f in ma and ma.find(f) == m1.find(f) for ma in mas()
217 f in ma and ma.find(f) == m1.find(f) for ma in mas()
218 )
218 )
219 elif f in m2:
219 elif f in m2:
220 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
220 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
221 else:
221 else:
222 return True
222 return True
223
223
224 return deletionfromparent
224 return deletionfromparent
225
225
226
226
227 def computechangesetfilesremoved(ctx):
227 def computechangesetfilesremoved(ctx):
228 """return the list of files removed in a changeset
228 """return the list of files removed in a changeset
229 """
229 """
230 removed = []
230 removed = []
231 for f in ctx.files():
231 for f in ctx.files():
232 if f not in ctx:
232 if f not in ctx:
233 removed.append(f)
233 removed.append(f)
234 if removed:
234 if removed:
235 rf = get_removal_filter(ctx)
235 rf = get_removal_filter(ctx)
236 removed = [r for r in removed if not rf(r)]
236 removed = [r for r in removed if not rf(r)]
237 return removed
237 return removed
238
238
239
239
240 def computechangesetcopies(ctx):
240 def computechangesetcopies(ctx):
241 """return the copies data for a changeset
241 """return the copies data for a changeset
242
242
243 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
243 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
244
244
245 Each dictionnary are in the form: `{newname: oldname}`
245 Each dictionnary are in the form: `{newname: oldname}`
246 """
246 """
247 p1copies = {}
247 p1copies = {}
248 p2copies = {}
248 p2copies = {}
249 p1 = ctx.p1()
249 p1 = ctx.p1()
250 p2 = ctx.p2()
250 p2 = ctx.p2()
251 narrowmatch = ctx._repo.narrowmatch()
251 narrowmatch = ctx._repo.narrowmatch()
252 for dst in ctx.files():
252 for dst in ctx.files():
253 if not narrowmatch(dst) or dst not in ctx:
253 if not narrowmatch(dst) or dst not in ctx:
254 continue
254 continue
255 copied = ctx[dst].renamed()
255 copied = ctx[dst].renamed()
256 if not copied:
256 if not copied:
257 continue
257 continue
258 src, srcnode = copied
258 src, srcnode = copied
259 if src in p1 and p1[src].filenode() == srcnode:
259 if src in p1 and p1[src].filenode() == srcnode:
260 p1copies[dst] = src
260 p1copies[dst] = src
261 elif src in p2 and p2[src].filenode() == srcnode:
261 elif src in p2 and p2[src].filenode() == srcnode:
262 p2copies[dst] = src
262 p2copies[dst] = src
263 return p1copies, p2copies
263 return p1copies, p2copies
264
264
265
265
266 def encodecopies(files, copies):
266 def encodecopies(files, copies):
267 items = []
267 items = []
268 for i, dst in enumerate(files):
268 for i, dst in enumerate(files):
269 if dst in copies:
269 if dst in copies:
270 items.append(b'%d\0%s' % (i, copies[dst]))
270 items.append(b'%d\0%s' % (i, copies[dst]))
271 if len(items) != len(copies):
271 if len(items) != len(copies):
272 raise error.ProgrammingError(
272 raise error.ProgrammingError(
273 b'some copy targets missing from file list'
273 b'some copy targets missing from file list'
274 )
274 )
275 return b"\n".join(items)
275 return b"\n".join(items)
276
276
277
277
278 def decodecopies(files, data):
278 def decodecopies(files, data):
279 try:
279 try:
280 copies = {}
280 copies = {}
281 if not data:
281 if not data:
282 return copies
282 return copies
283 for l in data.split(b'\n'):
283 for l in data.split(b'\n'):
284 strindex, src = l.split(b'\0')
284 strindex, src = l.split(b'\0')
285 i = int(strindex)
285 i = int(strindex)
286 dst = files[i]
286 dst = files[i]
287 copies[dst] = src
287 copies[dst] = src
288 return copies
288 return copies
289 except (ValueError, IndexError):
289 except (ValueError, IndexError):
290 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
290 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
291 # used different syntax for the value.
291 # used different syntax for the value.
292 return None
292 return None
293
293
294
294
295 def encodefileindices(files, subset):
295 def encodefileindices(files, subset):
296 subset = set(subset)
296 subset = set(subset)
297 indices = []
297 indices = []
298 for i, f in enumerate(files):
298 for i, f in enumerate(files):
299 if f in subset:
299 if f in subset:
300 indices.append(b'%d' % i)
300 indices.append(b'%d' % i)
301 return b'\n'.join(indices)
301 return b'\n'.join(indices)
302
302
303
303
304 def decodefileindices(files, data):
304 def decodefileindices(files, data):
305 try:
305 try:
306 subset = []
306 subset = []
307 if not data:
307 if not data:
308 return subset
308 return subset
309 for strindex in data.split(b'\n'):
309 for strindex in data.split(b'\n'):
310 i = int(strindex)
310 i = int(strindex)
311 if i < 0 or i >= len(files):
311 if i < 0 or i >= len(files):
312 return None
312 return None
313 subset.append(files[i])
313 subset.append(files[i])
314 return subset
314 return subset
315 except (ValueError, IndexError):
315 except (ValueError, IndexError):
316 # Perhaps someone had chosen the same key name (e.g. "added") and
316 # Perhaps someone had chosen the same key name (e.g. "added") and
317 # used different syntax for the value.
317 # used different syntax for the value.
318 return None
318 return None
319
319
320
320
321 def encode_copies_sidedata(files):
321 def encode_files_sidedata(files):
322 sortedfiles = sorted(files.touched)
322 sortedfiles = sorted(files.touched)
323 sidedata = {}
323 sidedata = {}
324 p1copies = files.copied_from_p1
324 p1copies = files.copied_from_p1
325 if p1copies:
325 if p1copies:
326 p1copies = encodecopies(sortedfiles, p1copies)
326 p1copies = encodecopies(sortedfiles, p1copies)
327 sidedata[sidedatamod.SD_P1COPIES] = p1copies
327 sidedata[sidedatamod.SD_P1COPIES] = p1copies
328 p2copies = files.copied_from_p2
328 p2copies = files.copied_from_p2
329 if p2copies:
329 if p2copies:
330 p2copies = encodecopies(sortedfiles, p2copies)
330 p2copies = encodecopies(sortedfiles, p2copies)
331 sidedata[sidedatamod.SD_P2COPIES] = p2copies
331 sidedata[sidedatamod.SD_P2COPIES] = p2copies
332 filesadded = files.added
332 filesadded = files.added
333 if filesadded:
333 if filesadded:
334 filesadded = encodefileindices(sortedfiles, filesadded)
334 filesadded = encodefileindices(sortedfiles, filesadded)
335 sidedata[sidedatamod.SD_FILESADDED] = filesadded
335 sidedata[sidedatamod.SD_FILESADDED] = filesadded
336 filesremoved = files.removed
336 filesremoved = files.removed
337 if filesremoved:
337 if filesremoved:
338 filesremoved = encodefileindices(sortedfiles, filesremoved)
338 filesremoved = encodefileindices(sortedfiles, filesremoved)
339 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
339 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
340 if not sidedata:
340 if not sidedata:
341 sidedata = None
341 sidedata = None
342 return sidedata
342 return sidedata
343
343
344
344
345 def _getsidedata(srcrepo, rev):
345 def _getsidedata(srcrepo, rev):
346 ctx = srcrepo[rev]
346 ctx = srcrepo[rev]
347 filescopies = computechangesetcopies(ctx)
347 filescopies = computechangesetcopies(ctx)
348 filesadded = computechangesetfilesadded(ctx)
348 filesadded = computechangesetfilesadded(ctx)
349 filesremoved = computechangesetfilesremoved(ctx)
349 filesremoved = computechangesetfilesremoved(ctx)
350 sidedata = {}
350 sidedata = {}
351 if any([filescopies, filesadded, filesremoved]):
351 if any([filescopies, filesadded, filesremoved]):
352 sortedfiles = sorted(ctx.files())
352 sortedfiles = sorted(ctx.files())
353 p1copies, p2copies = filescopies
353 p1copies, p2copies = filescopies
354 p1copies = encodecopies(sortedfiles, p1copies)
354 p1copies = encodecopies(sortedfiles, p1copies)
355 p2copies = encodecopies(sortedfiles, p2copies)
355 p2copies = encodecopies(sortedfiles, p2copies)
356 filesadded = encodefileindices(sortedfiles, filesadded)
356 filesadded = encodefileindices(sortedfiles, filesadded)
357 filesremoved = encodefileindices(sortedfiles, filesremoved)
357 filesremoved = encodefileindices(sortedfiles, filesremoved)
358 if p1copies:
358 if p1copies:
359 sidedata[sidedatamod.SD_P1COPIES] = p1copies
359 sidedata[sidedatamod.SD_P1COPIES] = p1copies
360 if p2copies:
360 if p2copies:
361 sidedata[sidedatamod.SD_P2COPIES] = p2copies
361 sidedata[sidedatamod.SD_P2COPIES] = p2copies
362 if filesadded:
362 if filesadded:
363 sidedata[sidedatamod.SD_FILESADDED] = filesadded
363 sidedata[sidedatamod.SD_FILESADDED] = filesadded
364 if filesremoved:
364 if filesremoved:
365 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
365 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
366 return sidedata
366 return sidedata
367
367
368
368
369 def getsidedataadder(srcrepo, destrepo):
369 def getsidedataadder(srcrepo, destrepo):
370 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
370 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
371 if pycompat.iswindows or not use_w:
371 if pycompat.iswindows or not use_w:
372 return _get_simple_sidedata_adder(srcrepo, destrepo)
372 return _get_simple_sidedata_adder(srcrepo, destrepo)
373 else:
373 else:
374 return _get_worker_sidedata_adder(srcrepo, destrepo)
374 return _get_worker_sidedata_adder(srcrepo, destrepo)
375
375
376
376
377 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
377 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
378 """The function used by worker precomputing sidedata
378 """The function used by worker precomputing sidedata
379
379
380 It read an input queue containing revision numbers
380 It read an input queue containing revision numbers
381 It write in an output queue containing (rev, <sidedata-map>)
381 It write in an output queue containing (rev, <sidedata-map>)
382
382
383 The `None` input value is used as a stop signal.
383 The `None` input value is used as a stop signal.
384
384
385 The `tokens` semaphore is user to avoid having too many unprocessed
385 The `tokens` semaphore is user to avoid having too many unprocessed
386 entries. The workers needs to acquire one token before fetching a task.
386 entries. The workers needs to acquire one token before fetching a task.
387 They will be released by the consumer of the produced data.
387 They will be released by the consumer of the produced data.
388 """
388 """
389 tokens.acquire()
389 tokens.acquire()
390 rev = revs_queue.get()
390 rev = revs_queue.get()
391 while rev is not None:
391 while rev is not None:
392 data = _getsidedata(srcrepo, rev)
392 data = _getsidedata(srcrepo, rev)
393 sidedata_queue.put((rev, data))
393 sidedata_queue.put((rev, data))
394 tokens.acquire()
394 tokens.acquire()
395 rev = revs_queue.get()
395 rev = revs_queue.get()
396 # processing of `None` is completed, release the token.
396 # processing of `None` is completed, release the token.
397 tokens.release()
397 tokens.release()
398
398
399
399
400 BUFF_PER_WORKER = 50
400 BUFF_PER_WORKER = 50
401
401
402
402
403 def _get_worker_sidedata_adder(srcrepo, destrepo):
403 def _get_worker_sidedata_adder(srcrepo, destrepo):
404 """The parallel version of the sidedata computation
404 """The parallel version of the sidedata computation
405
405
406 This code spawn a pool of worker that precompute a buffer of sidedata
406 This code spawn a pool of worker that precompute a buffer of sidedata
407 before we actually need them"""
407 before we actually need them"""
408 # avoid circular import copies -> scmutil -> worker -> copies
408 # avoid circular import copies -> scmutil -> worker -> copies
409 from . import worker
409 from . import worker
410
410
411 nbworkers = worker._numworkers(srcrepo.ui)
411 nbworkers = worker._numworkers(srcrepo.ui)
412
412
413 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
413 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
414 revsq = multiprocessing.Queue()
414 revsq = multiprocessing.Queue()
415 sidedataq = multiprocessing.Queue()
415 sidedataq = multiprocessing.Queue()
416
416
417 assert srcrepo.filtername is None
417 assert srcrepo.filtername is None
418 # queue all tasks beforehand, revision numbers are small and it make
418 # queue all tasks beforehand, revision numbers are small and it make
419 # synchronisation simpler
419 # synchronisation simpler
420 #
420 #
421 # Since the computation for each node can be quite expensive, the overhead
421 # Since the computation for each node can be quite expensive, the overhead
422 # of using a single queue is not revelant. In practice, most computation
422 # of using a single queue is not revelant. In practice, most computation
423 # are fast but some are very expensive and dominate all the other smaller
423 # are fast but some are very expensive and dominate all the other smaller
424 # cost.
424 # cost.
425 for r in srcrepo.changelog.revs():
425 for r in srcrepo.changelog.revs():
426 revsq.put(r)
426 revsq.put(r)
427 # queue the "no more tasks" markers
427 # queue the "no more tasks" markers
428 for i in range(nbworkers):
428 for i in range(nbworkers):
429 revsq.put(None)
429 revsq.put(None)
430
430
431 allworkers = []
431 allworkers = []
432 for i in range(nbworkers):
432 for i in range(nbworkers):
433 args = (srcrepo, revsq, sidedataq, tokens)
433 args = (srcrepo, revsq, sidedataq, tokens)
434 w = multiprocessing.Process(target=_sidedata_worker, args=args)
434 w = multiprocessing.Process(target=_sidedata_worker, args=args)
435 allworkers.append(w)
435 allworkers.append(w)
436 w.start()
436 w.start()
437
437
438 # dictionnary to store results for revision higher than we one we are
438 # dictionnary to store results for revision higher than we one we are
439 # looking for. For example, if we need the sidedatamap for 42, and 43 is
439 # looking for. For example, if we need the sidedatamap for 42, and 43 is
440 # received, when shelve 43 for later use.
440 # received, when shelve 43 for later use.
441 staging = {}
441 staging = {}
442
442
443 def sidedata_companion(revlog, rev):
443 def sidedata_companion(revlog, rev):
444 sidedata = {}
444 sidedata = {}
445 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
445 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
446 # Is the data previously shelved ?
446 # Is the data previously shelved ?
447 sidedata = staging.pop(rev, None)
447 sidedata = staging.pop(rev, None)
448 if sidedata is None:
448 if sidedata is None:
449 # look at the queued result until we find the one we are lookig
449 # look at the queued result until we find the one we are lookig
450 # for (shelve the other ones)
450 # for (shelve the other ones)
451 r, sidedata = sidedataq.get()
451 r, sidedata = sidedataq.get()
452 while r != rev:
452 while r != rev:
453 staging[r] = sidedata
453 staging[r] = sidedata
454 r, sidedata = sidedataq.get()
454 r, sidedata = sidedataq.get()
455 tokens.release()
455 tokens.release()
456 return False, (), sidedata
456 return False, (), sidedata
457
457
458 return sidedata_companion
458 return sidedata_companion
459
459
460
460
461 def _get_simple_sidedata_adder(srcrepo, destrepo):
461 def _get_simple_sidedata_adder(srcrepo, destrepo):
462 """The simple version of the sidedata computation
462 """The simple version of the sidedata computation
463
463
464 It just compute it in the same thread on request"""
464 It just compute it in the same thread on request"""
465
465
466 def sidedatacompanion(revlog, rev):
466 def sidedatacompanion(revlog, rev):
467 sidedata = {}
467 sidedata = {}
468 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
468 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
469 sidedata = _getsidedata(srcrepo, rev)
469 sidedata = _getsidedata(srcrepo, rev)
470 return False, (), sidedata
470 return False, (), sidedata
471
471
472 return sidedatacompanion
472 return sidedatacompanion
473
473
474
474
475 def getsidedataremover(srcrepo, destrepo):
475 def getsidedataremover(srcrepo, destrepo):
476 def sidedatacompanion(revlog, rev):
476 def sidedatacompanion(revlog, rev):
477 f = ()
477 f = ()
478 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
478 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
479 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
479 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
480 f = (
480 f = (
481 sidedatamod.SD_P1COPIES,
481 sidedatamod.SD_P1COPIES,
482 sidedatamod.SD_P2COPIES,
482 sidedatamod.SD_P2COPIES,
483 sidedatamod.SD_FILESADDED,
483 sidedatamod.SD_FILESADDED,
484 sidedatamod.SD_FILESREMOVED,
484 sidedatamod.SD_FILESREMOVED,
485 )
485 )
486 return False, f, {}
486 return False, f, {}
487
487
488 return sidedatacompanion
488 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now