##// END OF EJS Templates
sidedata: add a `decode_files_sidedata` function...
marmoute -
r46145:7543b507 default
parent child Browse files
Show More
@@ -1,613 +1,616
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import attr
16 from .thirdparty import attr
17
17
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 metadata,
21 metadata,
22 pycompat,
22 pycompat,
23 revlog,
23 revlog,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 dateutil,
26 dateutil,
27 stringutil,
27 stringutil,
28 )
28 )
29
29
30 from .revlogutils import sidedata as sidedatamod
30 from .revlogutils import sidedata as sidedatamod
31
31
32 _defaultextra = {b'branch': b'default'}
32 _defaultextra = {b'branch': b'default'}
33
33
34
34
35 def _string_escape(text):
35 def _string_escape(text):
36 """
36 """
37 >>> from .pycompat import bytechr as chr
37 >>> from .pycompat import bytechr as chr
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s
40 >>> s
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 >>> res = _string_escape(s)
42 >>> res = _string_escape(s)
43 >>> s == _string_unescape(res)
43 >>> s == _string_unescape(res)
44 True
44 True
45 """
45 """
46 # subset of the string_escape codec
46 # subset of the string_escape codec
47 text = (
47 text = (
48 text.replace(b'\\', b'\\\\')
48 text.replace(b'\\', b'\\\\')
49 .replace(b'\n', b'\\n')
49 .replace(b'\n', b'\\n')
50 .replace(b'\r', b'\\r')
50 .replace(b'\r', b'\\r')
51 )
51 )
52 return text.replace(b'\0', b'\\0')
52 return text.replace(b'\0', b'\\0')
53
53
54
54
55 def _string_unescape(text):
55 def _string_unescape(text):
56 if b'\\0' in text:
56 if b'\\0' in text:
57 # fix up \0 without getting into trouble with \\0
57 # fix up \0 without getting into trouble with \\0
58 text = text.replace(b'\\\\', b'\\\\\n')
58 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\0', b'\0')
59 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\n', b'')
60 text = text.replace(b'\n', b'')
61 return stringutil.unescapestr(text)
61 return stringutil.unescapestr(text)
62
62
63
63
64 def decodeextra(text):
64 def decodeextra(text):
65 """
65 """
66 >>> from .pycompat import bytechr as chr
66 >>> from .pycompat import bytechr as chr
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 ... ).items())
68 ... ).items())
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 ... b'baz': chr(92) + chr(0) + b'2'})
71 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... ).items())
72 ... ).items())
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 """
74 """
75 extra = _defaultextra.copy()
75 extra = _defaultextra.copy()
76 for l in text.split(b'\0'):
76 for l in text.split(b'\0'):
77 if l:
77 if l:
78 k, v = _string_unescape(l).split(b':', 1)
78 k, v = _string_unescape(l).split(b':', 1)
79 extra[k] = v
79 extra[k] = v
80 return extra
80 return extra
81
81
82
82
83 def encodeextra(d):
83 def encodeextra(d):
84 # keys must be sorted to produce a deterministic changelog entry
84 # keys must be sorted to produce a deterministic changelog entry
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 return b"\0".join(items)
86 return b"\0".join(items)
87
87
88
88
89 def stripdesc(desc):
89 def stripdesc(desc):
90 """strip trailing whitespace and leading and trailing empty lines"""
90 """strip trailing whitespace and leading and trailing empty lines"""
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92
92
93
93
94 class appender(object):
94 class appender(object):
95 '''the changelog index must be updated last on disk, so we use this class
95 '''the changelog index must be updated last on disk, so we use this class
96 to delay writes to it'''
96 to delay writes to it'''
97
97
98 def __init__(self, vfs, name, mode, buf):
98 def __init__(self, vfs, name, mode, buf):
99 self.data = buf
99 self.data = buf
100 fp = vfs(name, mode)
100 fp = vfs(name, mode)
101 self.fp = fp
101 self.fp = fp
102 self.offset = fp.tell()
102 self.offset = fp.tell()
103 self.size = vfs.fstat(fp).st_size
103 self.size = vfs.fstat(fp).st_size
104 self._end = self.size
104 self._end = self.size
105
105
106 def end(self):
106 def end(self):
107 return self._end
107 return self._end
108
108
109 def tell(self):
109 def tell(self):
110 return self.offset
110 return self.offset
111
111
112 def flush(self):
112 def flush(self):
113 pass
113 pass
114
114
115 @property
115 @property
116 def closed(self):
116 def closed(self):
117 return self.fp.closed
117 return self.fp.closed
118
118
119 def close(self):
119 def close(self):
120 self.fp.close()
120 self.fp.close()
121
121
122 def seek(self, offset, whence=0):
122 def seek(self, offset, whence=0):
123 '''virtual file offset spans real file and data'''
123 '''virtual file offset spans real file and data'''
124 if whence == 0:
124 if whence == 0:
125 self.offset = offset
125 self.offset = offset
126 elif whence == 1:
126 elif whence == 1:
127 self.offset += offset
127 self.offset += offset
128 elif whence == 2:
128 elif whence == 2:
129 self.offset = self.end() + offset
129 self.offset = self.end() + offset
130 if self.offset < self.size:
130 if self.offset < self.size:
131 self.fp.seek(self.offset)
131 self.fp.seek(self.offset)
132
132
133 def read(self, count=-1):
133 def read(self, count=-1):
134 '''only trick here is reads that span real file and data'''
134 '''only trick here is reads that span real file and data'''
135 ret = b""
135 ret = b""
136 if self.offset < self.size:
136 if self.offset < self.size:
137 s = self.fp.read(count)
137 s = self.fp.read(count)
138 ret = s
138 ret = s
139 self.offset += len(s)
139 self.offset += len(s)
140 if count > 0:
140 if count > 0:
141 count -= len(s)
141 count -= len(s)
142 if count != 0:
142 if count != 0:
143 doff = self.offset - self.size
143 doff = self.offset - self.size
144 self.data.insert(0, b"".join(self.data))
144 self.data.insert(0, b"".join(self.data))
145 del self.data[1:]
145 del self.data[1:]
146 s = self.data[0][doff : doff + count]
146 s = self.data[0][doff : doff + count]
147 self.offset += len(s)
147 self.offset += len(s)
148 ret += s
148 ret += s
149 return ret
149 return ret
150
150
151 def write(self, s):
151 def write(self, s):
152 self.data.append(bytes(s))
152 self.data.append(bytes(s))
153 self.offset += len(s)
153 self.offset += len(s)
154 self._end += len(s)
154 self._end += len(s)
155
155
156 def __enter__(self):
156 def __enter__(self):
157 self.fp.__enter__()
157 self.fp.__enter__()
158 return self
158 return self
159
159
160 def __exit__(self, *args):
160 def __exit__(self, *args):
161 return self.fp.__exit__(*args)
161 return self.fp.__exit__(*args)
162
162
163
163
164 class _divertopener(object):
164 class _divertopener(object):
165 def __init__(self, opener, target):
165 def __init__(self, opener, target):
166 self._opener = opener
166 self._opener = opener
167 self._target = target
167 self._target = target
168
168
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 if name != self._target:
170 if name != self._target:
171 return self._opener(name, mode, **kwargs)
171 return self._opener(name, mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
173
173
174 def __getattr__(self, attr):
174 def __getattr__(self, attr):
175 return getattr(self._opener, attr)
175 return getattr(self._opener, attr)
176
176
177
177
178 def _delayopener(opener, target, buf):
178 def _delayopener(opener, target, buf):
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180
180
181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 if name != target:
182 if name != target:
183 return opener(name, mode, **kwargs)
183 return opener(name, mode, **kwargs)
184 assert not kwargs
184 assert not kwargs
185 return appender(opener, name, mode, buf)
185 return appender(opener, name, mode, buf)
186
186
187 return _delay
187 return _delay
188
188
189
189
190 @attr.s
190 @attr.s
191 class _changelogrevision(object):
191 class _changelogrevision(object):
192 # Extensions might modify _defaultextra, so let the constructor below pass
192 # Extensions might modify _defaultextra, so let the constructor below pass
193 # it in
193 # it in
194 extra = attr.ib()
194 extra = attr.ib()
195 manifest = attr.ib(default=nullid)
195 manifest = attr.ib(default=nullid)
196 user = attr.ib(default=b'')
196 user = attr.ib(default=b'')
197 date = attr.ib(default=(0, 0))
197 date = attr.ib(default=(0, 0))
198 files = attr.ib(default=attr.Factory(list))
198 files = attr.ib(default=attr.Factory(list))
199 filesadded = attr.ib(default=None)
199 filesadded = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
203 description = attr.ib(default=b'')
203 description = attr.ib(default=b'')
204
204
205
205
206 class changelogrevision(object):
206 class changelogrevision(object):
207 """Holds results of a parsed changelog revision.
207 """Holds results of a parsed changelog revision.
208
208
209 Changelog revisions consist of multiple pieces of data, including
209 Changelog revisions consist of multiple pieces of data, including
210 the manifest node, user, and date. This object exposes a view into
210 the manifest node, user, and date. This object exposes a view into
211 the parsed object.
211 the parsed object.
212 """
212 """
213
213
214 __slots__ = (
214 __slots__ = (
215 '_offsets',
215 '_offsets',
216 '_text',
216 '_text',
217 '_sidedata',
217 '_sidedata',
218 '_cpsd',
218 '_cpsd',
219 '_changes',
219 '_changes',
220 )
220 )
221
221
222 def __new__(cls, text, sidedata, cpsd):
222 def __new__(cls, text, sidedata, cpsd):
223 if not text:
223 if not text:
224 return _changelogrevision(extra=_defaultextra)
224 return _changelogrevision(extra=_defaultextra)
225
225
226 self = super(changelogrevision, cls).__new__(cls)
226 self = super(changelogrevision, cls).__new__(cls)
227 # We could return here and implement the following as an __init__.
227 # We could return here and implement the following as an __init__.
228 # But doing it here is equivalent and saves an extra function call.
228 # But doing it here is equivalent and saves an extra function call.
229
229
230 # format used:
230 # format used:
231 # nodeid\n : manifest node in ascii
231 # nodeid\n : manifest node in ascii
232 # user\n : user, no \n or \r allowed
232 # user\n : user, no \n or \r allowed
233 # time tz extra\n : date (time is int or float, timezone is int)
233 # time tz extra\n : date (time is int or float, timezone is int)
234 # : extra is metadata, encoded and separated by '\0'
234 # : extra is metadata, encoded and separated by '\0'
235 # : older versions ignore it
235 # : older versions ignore it
236 # files\n\n : files modified by the cset, no \n or \r allowed
236 # files\n\n : files modified by the cset, no \n or \r allowed
237 # (.*) : comment (free text, ideally utf-8)
237 # (.*) : comment (free text, ideally utf-8)
238 #
238 #
239 # changelog v0 doesn't use extra
239 # changelog v0 doesn't use extra
240
240
241 nl1 = text.index(b'\n')
241 nl1 = text.index(b'\n')
242 nl2 = text.index(b'\n', nl1 + 1)
242 nl2 = text.index(b'\n', nl1 + 1)
243 nl3 = text.index(b'\n', nl2 + 1)
243 nl3 = text.index(b'\n', nl2 + 1)
244
244
245 # The list of files may be empty. Which means nl3 is the first of the
245 # The list of files may be empty. Which means nl3 is the first of the
246 # double newline that precedes the description.
246 # double newline that precedes the description.
247 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 if text[nl3 + 1 : nl3 + 2] == b'\n':
248 doublenl = nl3
248 doublenl = nl3
249 else:
249 else:
250 doublenl = text.index(b'\n\n', nl3 + 1)
250 doublenl = text.index(b'\n\n', nl3 + 1)
251
251
252 self._offsets = (nl1, nl2, nl3, doublenl)
252 self._offsets = (nl1, nl2, nl3, doublenl)
253 self._text = text
253 self._text = text
254 self._sidedata = sidedata
254 self._sidedata = sidedata
255 self._cpsd = cpsd
255 self._cpsd = cpsd
256 self._changes = None
256 self._changes = None
257
257
258 return self
258 return self
259
259
260 @property
260 @property
261 def manifest(self):
261 def manifest(self):
262 return bin(self._text[0 : self._offsets[0]])
262 return bin(self._text[0 : self._offsets[0]])
263
263
264 @property
264 @property
265 def user(self):
265 def user(self):
266 off = self._offsets
266 off = self._offsets
267 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
267 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
268
268
269 @property
269 @property
270 def _rawdate(self):
270 def _rawdate(self):
271 off = self._offsets
271 off = self._offsets
272 dateextra = self._text[off[1] + 1 : off[2]]
272 dateextra = self._text[off[1] + 1 : off[2]]
273 return dateextra.split(b' ', 2)[0:2]
273 return dateextra.split(b' ', 2)[0:2]
274
274
275 @property
275 @property
276 def _rawextra(self):
276 def _rawextra(self):
277 off = self._offsets
277 off = self._offsets
278 dateextra = self._text[off[1] + 1 : off[2]]
278 dateextra = self._text[off[1] + 1 : off[2]]
279 fields = dateextra.split(b' ', 2)
279 fields = dateextra.split(b' ', 2)
280 if len(fields) != 3:
280 if len(fields) != 3:
281 return None
281 return None
282
282
283 return fields[2]
283 return fields[2]
284
284
285 @property
285 @property
286 def date(self):
286 def date(self):
287 raw = self._rawdate
287 raw = self._rawdate
288 time = float(raw[0])
288 time = float(raw[0])
289 # Various tools did silly things with the timezone.
289 # Various tools did silly things with the timezone.
290 try:
290 try:
291 timezone = int(raw[1])
291 timezone = int(raw[1])
292 except ValueError:
292 except ValueError:
293 timezone = 0
293 timezone = 0
294
294
295 return time, timezone
295 return time, timezone
296
296
297 @property
297 @property
298 def extra(self):
298 def extra(self):
299 raw = self._rawextra
299 raw = self._rawextra
300 if raw is None:
300 if raw is None:
301 return _defaultextra
301 return _defaultextra
302
302
303 return decodeextra(raw)
303 return decodeextra(raw)
304
304
305 @property
305 @property
306 def changes(self):
306 def changes(self):
307 if self._changes is not None:
307 if self._changes is not None:
308 return self._changes
308 return self._changes
309 changes = metadata.ChangingFiles(
309 if self._cpsd:
310 touched=self.files or (),
310 changes = metadata.decode_files_sidedata(self, self._sidedata)
311 added=self.filesadded or (),
311 else:
312 removed=self.filesremoved or (),
312 changes = metadata.ChangingFiles(
313 p1_copies=self.p1copies or {},
313 touched=self.files or (),
314 p2_copies=self.p2copies or {},
314 added=self.filesadded or (),
315 )
315 removed=self.filesremoved or (),
316 p1_copies=self.p1copies or {},
317 p2_copies=self.p2copies or {},
318 )
316 self._changes = changes
319 self._changes = changes
317 return changes
320 return changes
318
321
319 @property
322 @property
320 def files(self):
323 def files(self):
321 off = self._offsets
324 off = self._offsets
322 if off[2] == off[3]:
325 if off[2] == off[3]:
323 return []
326 return []
324
327
325 return self._text[off[2] + 1 : off[3]].split(b'\n')
328 return self._text[off[2] + 1 : off[3]].split(b'\n')
326
329
327 @property
330 @property
328 def filesadded(self):
331 def filesadded(self):
329 if self._cpsd:
332 if self._cpsd:
330 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
333 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
331 if not rawindices:
334 if not rawindices:
332 return []
335 return []
333 else:
336 else:
334 rawindices = self.extra.get(b'filesadded')
337 rawindices = self.extra.get(b'filesadded')
335 if rawindices is None:
338 if rawindices is None:
336 return None
339 return None
337 return metadata.decodefileindices(self.files, rawindices)
340 return metadata.decodefileindices(self.files, rawindices)
338
341
339 @property
342 @property
340 def filesremoved(self):
343 def filesremoved(self):
341 if self._cpsd:
344 if self._cpsd:
342 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
345 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
343 if not rawindices:
346 if not rawindices:
344 return []
347 return []
345 else:
348 else:
346 rawindices = self.extra.get(b'filesremoved')
349 rawindices = self.extra.get(b'filesremoved')
347 if rawindices is None:
350 if rawindices is None:
348 return None
351 return None
349 return metadata.decodefileindices(self.files, rawindices)
352 return metadata.decodefileindices(self.files, rawindices)
350
353
351 @property
354 @property
352 def p1copies(self):
355 def p1copies(self):
353 if self._cpsd:
356 if self._cpsd:
354 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
357 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
355 if not rawcopies:
358 if not rawcopies:
356 return {}
359 return {}
357 else:
360 else:
358 rawcopies = self.extra.get(b'p1copies')
361 rawcopies = self.extra.get(b'p1copies')
359 if rawcopies is None:
362 if rawcopies is None:
360 return None
363 return None
361 return metadata.decodecopies(self.files, rawcopies)
364 return metadata.decodecopies(self.files, rawcopies)
362
365
363 @property
366 @property
364 def p2copies(self):
367 def p2copies(self):
365 if self._cpsd:
368 if self._cpsd:
366 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
369 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
367 if not rawcopies:
370 if not rawcopies:
368 return {}
371 return {}
369 else:
372 else:
370 rawcopies = self.extra.get(b'p2copies')
373 rawcopies = self.extra.get(b'p2copies')
371 if rawcopies is None:
374 if rawcopies is None:
372 return None
375 return None
373 return metadata.decodecopies(self.files, rawcopies)
376 return metadata.decodecopies(self.files, rawcopies)
374
377
375 @property
378 @property
376 def description(self):
379 def description(self):
377 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
380 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
378
381
379
382
380 class changelog(revlog.revlog):
383 class changelog(revlog.revlog):
381 def __init__(self, opener, trypending=False):
384 def __init__(self, opener, trypending=False):
382 """Load a changelog revlog using an opener.
385 """Load a changelog revlog using an opener.
383
386
384 If ``trypending`` is true, we attempt to load the index from a
387 If ``trypending`` is true, we attempt to load the index from a
385 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
388 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
386 The ``00changelog.i.a`` file contains index (and possibly inline
389 The ``00changelog.i.a`` file contains index (and possibly inline
387 revision) data for a transaction that hasn't been finalized yet.
390 revision) data for a transaction that hasn't been finalized yet.
388 It exists in a separate file to facilitate readers (such as
391 It exists in a separate file to facilitate readers (such as
389 hooks processes) accessing data before a transaction is finalized.
392 hooks processes) accessing data before a transaction is finalized.
390 """
393 """
391 if trypending and opener.exists(b'00changelog.i.a'):
394 if trypending and opener.exists(b'00changelog.i.a'):
392 indexfile = b'00changelog.i.a'
395 indexfile = b'00changelog.i.a'
393 else:
396 else:
394 indexfile = b'00changelog.i'
397 indexfile = b'00changelog.i'
395
398
396 datafile = b'00changelog.d'
399 datafile = b'00changelog.d'
397 revlog.revlog.__init__(
400 revlog.revlog.__init__(
398 self,
401 self,
399 opener,
402 opener,
400 indexfile,
403 indexfile,
401 datafile=datafile,
404 datafile=datafile,
402 checkambig=True,
405 checkambig=True,
403 mmaplargeindex=True,
406 mmaplargeindex=True,
404 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
407 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
405 )
408 )
406
409
407 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
410 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
408 # changelogs don't benefit from generaldelta.
411 # changelogs don't benefit from generaldelta.
409
412
410 self.version &= ~revlog.FLAG_GENERALDELTA
413 self.version &= ~revlog.FLAG_GENERALDELTA
411 self._generaldelta = False
414 self._generaldelta = False
412
415
413 # Delta chains for changelogs tend to be very small because entries
416 # Delta chains for changelogs tend to be very small because entries
414 # tend to be small and don't delta well with each. So disable delta
417 # tend to be small and don't delta well with each. So disable delta
415 # chains.
418 # chains.
416 self._storedeltachains = False
419 self._storedeltachains = False
417
420
418 self._realopener = opener
421 self._realopener = opener
419 self._delayed = False
422 self._delayed = False
420 self._delaybuf = None
423 self._delaybuf = None
421 self._divert = False
424 self._divert = False
422 self._filteredrevs = frozenset()
425 self._filteredrevs = frozenset()
423 self._filteredrevs_hashcache = {}
426 self._filteredrevs_hashcache = {}
424 self._copiesstorage = opener.options.get(b'copies-storage')
427 self._copiesstorage = opener.options.get(b'copies-storage')
425
428
426 @property
429 @property
427 def filteredrevs(self):
430 def filteredrevs(self):
428 return self._filteredrevs
431 return self._filteredrevs
429
432
430 @filteredrevs.setter
433 @filteredrevs.setter
431 def filteredrevs(self, val):
434 def filteredrevs(self, val):
432 # Ensure all updates go through this function
435 # Ensure all updates go through this function
433 assert isinstance(val, frozenset)
436 assert isinstance(val, frozenset)
434 self._filteredrevs = val
437 self._filteredrevs = val
435 self._filteredrevs_hashcache = {}
438 self._filteredrevs_hashcache = {}
436
439
437 def delayupdate(self, tr):
440 def delayupdate(self, tr):
438 """delay visibility of index updates to other readers"""
441 """delay visibility of index updates to other readers"""
439
442
440 if not self._delayed:
443 if not self._delayed:
441 if len(self) == 0:
444 if len(self) == 0:
442 self._divert = True
445 self._divert = True
443 if self._realopener.exists(self.indexfile + b'.a'):
446 if self._realopener.exists(self.indexfile + b'.a'):
444 self._realopener.unlink(self.indexfile + b'.a')
447 self._realopener.unlink(self.indexfile + b'.a')
445 self.opener = _divertopener(self._realopener, self.indexfile)
448 self.opener = _divertopener(self._realopener, self.indexfile)
446 else:
449 else:
447 self._delaybuf = []
450 self._delaybuf = []
448 self.opener = _delayopener(
451 self.opener = _delayopener(
449 self._realopener, self.indexfile, self._delaybuf
452 self._realopener, self.indexfile, self._delaybuf
450 )
453 )
451 self._delayed = True
454 self._delayed = True
452 tr.addpending(b'cl-%i' % id(self), self._writepending)
455 tr.addpending(b'cl-%i' % id(self), self._writepending)
453 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
456 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
454
457
455 def _finalize(self, tr):
458 def _finalize(self, tr):
456 """finalize index updates"""
459 """finalize index updates"""
457 self._delayed = False
460 self._delayed = False
458 self.opener = self._realopener
461 self.opener = self._realopener
459 # move redirected index data back into place
462 # move redirected index data back into place
460 if self._divert:
463 if self._divert:
461 assert not self._delaybuf
464 assert not self._delaybuf
462 tmpname = self.indexfile + b".a"
465 tmpname = self.indexfile + b".a"
463 nfile = self.opener.open(tmpname)
466 nfile = self.opener.open(tmpname)
464 nfile.close()
467 nfile.close()
465 self.opener.rename(tmpname, self.indexfile, checkambig=True)
468 self.opener.rename(tmpname, self.indexfile, checkambig=True)
466 elif self._delaybuf:
469 elif self._delaybuf:
467 fp = self.opener(self.indexfile, b'a', checkambig=True)
470 fp = self.opener(self.indexfile, b'a', checkambig=True)
468 fp.write(b"".join(self._delaybuf))
471 fp.write(b"".join(self._delaybuf))
469 fp.close()
472 fp.close()
470 self._delaybuf = None
473 self._delaybuf = None
471 self._divert = False
474 self._divert = False
472 # split when we're done
475 # split when we're done
473 self._enforceinlinesize(tr)
476 self._enforceinlinesize(tr)
474
477
475 def _writepending(self, tr):
478 def _writepending(self, tr):
476 """create a file containing the unfinalized state for
479 """create a file containing the unfinalized state for
477 pretxnchangegroup"""
480 pretxnchangegroup"""
478 if self._delaybuf:
481 if self._delaybuf:
479 # make a temporary copy of the index
482 # make a temporary copy of the index
480 fp1 = self._realopener(self.indexfile)
483 fp1 = self._realopener(self.indexfile)
481 pendingfilename = self.indexfile + b".a"
484 pendingfilename = self.indexfile + b".a"
482 # register as a temp file to ensure cleanup on failure
485 # register as a temp file to ensure cleanup on failure
483 tr.registertmp(pendingfilename)
486 tr.registertmp(pendingfilename)
484 # write existing data
487 # write existing data
485 fp2 = self._realopener(pendingfilename, b"w")
488 fp2 = self._realopener(pendingfilename, b"w")
486 fp2.write(fp1.read())
489 fp2.write(fp1.read())
487 # add pending data
490 # add pending data
488 fp2.write(b"".join(self._delaybuf))
491 fp2.write(b"".join(self._delaybuf))
489 fp2.close()
492 fp2.close()
490 # switch modes so finalize can simply rename
493 # switch modes so finalize can simply rename
491 self._delaybuf = None
494 self._delaybuf = None
492 self._divert = True
495 self._divert = True
493 self.opener = _divertopener(self._realopener, self.indexfile)
496 self.opener = _divertopener(self._realopener, self.indexfile)
494
497
495 if self._divert:
498 if self._divert:
496 return True
499 return True
497
500
498 return False
501 return False
499
502
500 def _enforceinlinesize(self, tr, fp=None):
503 def _enforceinlinesize(self, tr, fp=None):
501 if not self._delayed:
504 if not self._delayed:
502 revlog.revlog._enforceinlinesize(self, tr, fp)
505 revlog.revlog._enforceinlinesize(self, tr, fp)
503
506
504 def read(self, node):
507 def read(self, node):
505 """Obtain data from a parsed changelog revision.
508 """Obtain data from a parsed changelog revision.
506
509
507 Returns a 6-tuple of:
510 Returns a 6-tuple of:
508
511
509 - manifest node in binary
512 - manifest node in binary
510 - author/user as a localstr
513 - author/user as a localstr
511 - date as a 2-tuple of (time, timezone)
514 - date as a 2-tuple of (time, timezone)
512 - list of files
515 - list of files
513 - commit message as a localstr
516 - commit message as a localstr
514 - dict of extra metadata
517 - dict of extra metadata
515
518
516 Unless you need to access all fields, consider calling
519 Unless you need to access all fields, consider calling
517 ``changelogrevision`` instead, as it is faster for partial object
520 ``changelogrevision`` instead, as it is faster for partial object
518 access.
521 access.
519 """
522 """
520 d, s = self._revisiondata(node)
523 d, s = self._revisiondata(node)
521 c = changelogrevision(
524 c = changelogrevision(
522 d, s, self._copiesstorage == b'changeset-sidedata'
525 d, s, self._copiesstorage == b'changeset-sidedata'
523 )
526 )
524 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
527 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
525
528
526 def changelogrevision(self, nodeorrev):
529 def changelogrevision(self, nodeorrev):
527 """Obtain a ``changelogrevision`` for a node or revision."""
530 """Obtain a ``changelogrevision`` for a node or revision."""
528 text, sidedata = self._revisiondata(nodeorrev)
531 text, sidedata = self._revisiondata(nodeorrev)
529 return changelogrevision(
532 return changelogrevision(
530 text, sidedata, self._copiesstorage == b'changeset-sidedata'
533 text, sidedata, self._copiesstorage == b'changeset-sidedata'
531 )
534 )
532
535
533 def readfiles(self, node):
536 def readfiles(self, node):
534 """
537 """
535 short version of read that only returns the files modified by the cset
538 short version of read that only returns the files modified by the cset
536 """
539 """
537 text = self.revision(node)
540 text = self.revision(node)
538 if not text:
541 if not text:
539 return []
542 return []
540 last = text.index(b"\n\n")
543 last = text.index(b"\n\n")
541 l = text[:last].split(b'\n')
544 l = text[:last].split(b'\n')
542 return l[3:]
545 return l[3:]
543
546
544 def add(
547 def add(
545 self,
548 self,
546 manifest,
549 manifest,
547 files,
550 files,
548 desc,
551 desc,
549 transaction,
552 transaction,
550 p1,
553 p1,
551 p2,
554 p2,
552 user,
555 user,
553 date=None,
556 date=None,
554 extra=None,
557 extra=None,
555 ):
558 ):
556 # Convert to UTF-8 encoded bytestrings as the very first
559 # Convert to UTF-8 encoded bytestrings as the very first
557 # thing: calling any method on a localstr object will turn it
560 # thing: calling any method on a localstr object will turn it
558 # into a str object and the cached UTF-8 string is thus lost.
561 # into a str object and the cached UTF-8 string is thus lost.
559 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
562 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
560
563
561 user = user.strip()
564 user = user.strip()
562 # An empty username or a username with a "\n" will make the
565 # An empty username or a username with a "\n" will make the
563 # revision text contain two "\n\n" sequences -> corrupt
566 # revision text contain two "\n\n" sequences -> corrupt
564 # repository since read cannot unpack the revision.
567 # repository since read cannot unpack the revision.
565 if not user:
568 if not user:
566 raise error.StorageError(_(b"empty username"))
569 raise error.StorageError(_(b"empty username"))
567 if b"\n" in user:
570 if b"\n" in user:
568 raise error.StorageError(
571 raise error.StorageError(
569 _(b"username %r contains a newline") % pycompat.bytestr(user)
572 _(b"username %r contains a newline") % pycompat.bytestr(user)
570 )
573 )
571
574
572 desc = stripdesc(desc)
575 desc = stripdesc(desc)
573
576
574 if date:
577 if date:
575 parseddate = b"%d %d" % dateutil.parsedate(date)
578 parseddate = b"%d %d" % dateutil.parsedate(date)
576 else:
579 else:
577 parseddate = b"%d %d" % dateutil.makedate()
580 parseddate = b"%d %d" % dateutil.makedate()
578 if extra:
581 if extra:
579 branch = extra.get(b"branch")
582 branch = extra.get(b"branch")
580 if branch in (b"default", b""):
583 if branch in (b"default", b""):
581 del extra[b"branch"]
584 del extra[b"branch"]
582 elif branch in (b".", b"null", b"tip"):
585 elif branch in (b".", b"null", b"tip"):
583 raise error.StorageError(
586 raise error.StorageError(
584 _(b'the name \'%s\' is reserved') % branch
587 _(b'the name \'%s\' is reserved') % branch
585 )
588 )
586 sortedfiles = sorted(files.touched)
589 sortedfiles = sorted(files.touched)
587 sidedata = None
590 sidedata = None
588 if self._copiesstorage == b'changeset-sidedata':
591 if self._copiesstorage == b'changeset-sidedata':
589 sidedata = metadata.encode_files_sidedata(files)
592 sidedata = metadata.encode_files_sidedata(files)
590
593
591 if extra:
594 if extra:
592 extra = encodeextra(extra)
595 extra = encodeextra(extra)
593 parseddate = b"%s %s" % (parseddate, extra)
596 parseddate = b"%s %s" % (parseddate, extra)
594 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
597 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
595 text = b"\n".join(l)
598 text = b"\n".join(l)
596 return self.addrevision(
599 return self.addrevision(
597 text, transaction, len(self), p1, p2, sidedata=sidedata
600 text, transaction, len(self), p1, p2, sidedata=sidedata
598 )
601 )
599
602
600 def branchinfo(self, rev):
603 def branchinfo(self, rev):
601 """return the branch name and open/close state of a revision
604 """return the branch name and open/close state of a revision
602
605
603 This function exists because creating a changectx object
606 This function exists because creating a changectx object
604 just to access this is costly."""
607 just to access this is costly."""
605 extra = self.read(rev)[5]
608 extra = self.read(rev)[5]
606 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
609 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
607
610
608 def _nodeduplicatecallback(self, transaction, node):
611 def _nodeduplicatecallback(self, transaction, node):
609 # keep track of revisions that got "re-added", eg: unbunde of know rev.
612 # keep track of revisions that got "re-added", eg: unbunde of know rev.
610 #
613 #
611 # We track them in a list to preserve their order from the source bundle
614 # We track them in a list to preserve their order from the source bundle
612 duplicates = transaction.changes.setdefault(b'revduplicates', [])
615 duplicates = transaction.changes.setdefault(b'revduplicates', [])
613 duplicates.append(self.rev(node))
616 duplicates.append(self.rev(node))
@@ -1,488 +1,514
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11
11
12 from . import (
12 from . import (
13 error,
13 error,
14 node,
14 node,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from .revlogutils import (
19 from .revlogutils import (
20 flagutil as sidedataflag,
20 flagutil as sidedataflag,
21 sidedata as sidedatamod,
21 sidedata as sidedatamod,
22 )
22 )
23
23
24
24
25 class ChangingFiles(object):
25 class ChangingFiles(object):
26 """A class recording the changes made to a file by a changeset
26 """A class recording the changes made to a file by a changeset
27
27
28 Actions performed on files are gathered into 3 sets:
28 Actions performed on files are gathered into 3 sets:
29
29
30 - added: files actively added in the changeset.
30 - added: files actively added in the changeset.
31 - removed: files removed in the revision
31 - removed: files removed in the revision
32 - touched: files affected by the merge
32 - touched: files affected by the merge
33
33
34 and copies information is held by 2 mappings
34 and copies information is held by 2 mappings
35
35
36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
36 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
37 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
38
38
39 See their inline help for details.
39 See their inline help for details.
40 """
40 """
41
41
42 def __init__(
42 def __init__(
43 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
43 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
44 ):
44 ):
45 self._added = set(added)
45 self._added = set(added)
46 self._removed = set(removed)
46 self._removed = set(removed)
47 self._touched = set(touched)
47 self._touched = set(touched)
48 self._touched.update(self._added)
48 self._touched.update(self._added)
49 self._touched.update(self._removed)
49 self._touched.update(self._removed)
50 self._p1_copies = dict(p1_copies)
50 self._p1_copies = dict(p1_copies)
51 self._p2_copies = dict(p2_copies)
51 self._p2_copies = dict(p2_copies)
52
52
53 def __eq__(self, other):
53 def __eq__(self, other):
54 return (
54 return (
55 self.added == other.added
55 self.added == other.added
56 and self.removed == other.removed
56 and self.removed == other.removed
57 and self.touched == other.touched
57 and self.touched == other.touched
58 and self.copied_from_p1 == other.copied_from_p1
58 and self.copied_from_p1 == other.copied_from_p1
59 and self.copied_from_p2 == other.copied_from_p2
59 and self.copied_from_p2 == other.copied_from_p2
60 )
60 )
61
61
62 @property
62 @property
63 def added(self):
63 def added(self):
64 """files actively added in the changeset
64 """files actively added in the changeset
65
65
66 Any file present in that revision that was absent in all the changeset's
66 Any file present in that revision that was absent in all the changeset's
67 parents.
67 parents.
68
68
69 In case of merge, this means a file absent in one of the parents but
69 In case of merge, this means a file absent in one of the parents but
70 existing in the other will *not* be contained in this set. (They were
70 existing in the other will *not* be contained in this set. (They were
71 added by an ancestor)
71 added by an ancestor)
72 """
72 """
73 return frozenset(self._added)
73 return frozenset(self._added)
74
74
75 def mark_added(self, filename):
75 def mark_added(self, filename):
76 self._added.add(filename)
76 self._added.add(filename)
77 self._touched.add(filename)
77 self._touched.add(filename)
78
78
79 def update_added(self, filenames):
79 def update_added(self, filenames):
80 for f in filenames:
80 for f in filenames:
81 self.mark_added(f)
81 self.mark_added(f)
82
82
83 @property
83 @property
84 def removed(self):
84 def removed(self):
85 """files actively removed by the changeset
85 """files actively removed by the changeset
86
86
87 In case of merge this will only contain the set of files removing "new"
87 In case of merge this will only contain the set of files removing "new"
88 content. For any file absent in the current changeset:
88 content. For any file absent in the current changeset:
89
89
90 a) If the file exists in both parents, it is clearly "actively" removed
90 a) If the file exists in both parents, it is clearly "actively" removed
91 by this changeset.
91 by this changeset.
92
92
93 b) If a file exists in only one parent and in none of the common
93 b) If a file exists in only one parent and in none of the common
94 ancestors, then the file was newly added in one of the merged branches
94 ancestors, then the file was newly added in one of the merged branches
95 and then got "actively" removed.
95 and then got "actively" removed.
96
96
97 c) If a file exists in only one parent and at least one of the common
97 c) If a file exists in only one parent and at least one of the common
98 ancestors using the same filenode, then the file was unchanged on one
98 ancestors using the same filenode, then the file was unchanged on one
99 side and deleted on the other side. The merge "passively" propagated
99 side and deleted on the other side. The merge "passively" propagated
100 that deletion, but didn't "actively" remove the file. In this case the
100 that deletion, but didn't "actively" remove the file. In this case the
101 file is *not* included in the `removed` set.
101 file is *not* included in the `removed` set.
102
102
103 d) If a file exists in only one parent and at least one of the common
103 d) If a file exists in only one parent and at least one of the common
104 ancestors using a different filenode, then the file was changed on one
104 ancestors using a different filenode, then the file was changed on one
105 side and removed on the other side. The merge process "actively"
105 side and removed on the other side. The merge process "actively"
106 decided to drop the new change and delete the file. Unlike in the
106 decided to drop the new change and delete the file. Unlike in the
107 previous case, (c), the file included in the `removed` set.
107 previous case, (c), the file included in the `removed` set.
108
108
109 Summary table for merge:
109 Summary table for merge:
110
110
111 case | exists in parents | exists in gca || removed
111 case | exists in parents | exists in gca || removed
112 (a) | both | * || yes
112 (a) | both | * || yes
113 (b) | one | none || yes
113 (b) | one | none || yes
114 (c) | one | same filenode || no
114 (c) | one | same filenode || no
115 (d) | one | new filenode || yes
115 (d) | one | new filenode || yes
116 """
116 """
117 return frozenset(self._removed)
117 return frozenset(self._removed)
118
118
119 def mark_removed(self, filename):
119 def mark_removed(self, filename):
120 self._removed.add(filename)
120 self._removed.add(filename)
121 self._touched.add(filename)
121 self._touched.add(filename)
122
122
123 def update_removed(self, filenames):
123 def update_removed(self, filenames):
124 for f in filenames:
124 for f in filenames:
125 self.mark_removed(f)
125 self.mark_removed(f)
126
126
127 @property
127 @property
128 def touched(self):
128 def touched(self):
129 """files either actively modified, added or removed"""
129 """files either actively modified, added or removed"""
130 return frozenset(self._touched)
130 return frozenset(self._touched)
131
131
132 def mark_touched(self, filename):
132 def mark_touched(self, filename):
133 self._touched.add(filename)
133 self._touched.add(filename)
134
134
135 def update_touched(self, filenames):
135 def update_touched(self, filenames):
136 for f in filenames:
136 for f in filenames:
137 self.mark_touched(f)
137 self.mark_touched(f)
138
138
139 @property
139 @property
140 def copied_from_p1(self):
140 def copied_from_p1(self):
141 return self._p1_copies.copy()
141 return self._p1_copies.copy()
142
142
143 def mark_copied_from_p1(self, source, dest):
143 def mark_copied_from_p1(self, source, dest):
144 self._p1_copies[dest] = source
144 self._p1_copies[dest] = source
145
145
146 def update_copies_from_p1(self, copies):
146 def update_copies_from_p1(self, copies):
147 for dest, source in copies.items():
147 for dest, source in copies.items():
148 self.mark_copied_from_p1(source, dest)
148 self.mark_copied_from_p1(source, dest)
149
149
150 @property
150 @property
151 def copied_from_p2(self):
151 def copied_from_p2(self):
152 return self._p2_copies.copy()
152 return self._p2_copies.copy()
153
153
154 def mark_copied_from_p2(self, source, dest):
154 def mark_copied_from_p2(self, source, dest):
155 self._p2_copies[dest] = source
155 self._p2_copies[dest] = source
156
156
157 def update_copies_from_p2(self, copies):
157 def update_copies_from_p2(self, copies):
158 for dest, source in copies.items():
158 for dest, source in copies.items():
159 self.mark_copied_from_p2(source, dest)
159 self.mark_copied_from_p2(source, dest)
160
160
161
161
162 def computechangesetfilesadded(ctx):
162 def computechangesetfilesadded(ctx):
163 """return the list of files added in a changeset
163 """return the list of files added in a changeset
164 """
164 """
165 added = []
165 added = []
166 for f in ctx.files():
166 for f in ctx.files():
167 if not any(f in p for p in ctx.parents()):
167 if not any(f in p for p in ctx.parents()):
168 added.append(f)
168 added.append(f)
169 return added
169 return added
170
170
171
171
172 def get_removal_filter(ctx, x=None):
172 def get_removal_filter(ctx, x=None):
173 """return a function to detect files "wrongly" detected as `removed`
173 """return a function to detect files "wrongly" detected as `removed`
174
174
175 When a file is removed relative to p1 in a merge, this
175 When a file is removed relative to p1 in a merge, this
176 function determines whether the absence is due to a
176 function determines whether the absence is due to a
177 deletion from a parent, or whether the merge commit
177 deletion from a parent, or whether the merge commit
178 itself deletes the file. We decide this by doing a
178 itself deletes the file. We decide this by doing a
179 simplified three way merge of the manifest entry for
179 simplified three way merge of the manifest entry for
180 the file. There are two ways we decide the merge
180 the file. There are two ways we decide the merge
181 itself didn't delete a file:
181 itself didn't delete a file:
182 - neither parent (nor the merge) contain the file
182 - neither parent (nor the merge) contain the file
183 - exactly one parent contains the file, and that
183 - exactly one parent contains the file, and that
184 parent has the same filelog entry as the merge
184 parent has the same filelog entry as the merge
185 ancestor (or all of them if there two). In other
185 ancestor (or all of them if there two). In other
186 words, that parent left the file unchanged while the
186 words, that parent left the file unchanged while the
187 other one deleted it.
187 other one deleted it.
188 One way to think about this is that deleting a file is
188 One way to think about this is that deleting a file is
189 similar to emptying it, so the list of changed files
189 similar to emptying it, so the list of changed files
190 should be similar either way. The computation
190 should be similar either way. The computation
191 described above is not done directly in _filecommit
191 described above is not done directly in _filecommit
192 when creating the list of changed files, however
192 when creating the list of changed files, however
193 it does something very similar by comparing filelog
193 it does something very similar by comparing filelog
194 nodes.
194 nodes.
195 """
195 """
196
196
197 if x is not None:
197 if x is not None:
198 p1, p2, m1, m2 = x
198 p1, p2, m1, m2 = x
199 else:
199 else:
200 p1 = ctx.p1()
200 p1 = ctx.p1()
201 p2 = ctx.p2()
201 p2 = ctx.p2()
202 m1 = p1.manifest()
202 m1 = p1.manifest()
203 m2 = p2.manifest()
203 m2 = p2.manifest()
204
204
205 @util.cachefunc
205 @util.cachefunc
206 def mas():
206 def mas():
207 p1n = p1.node()
207 p1n = p1.node()
208 p2n = p2.node()
208 p2n = p2.node()
209 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
209 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
210 if not cahs:
210 if not cahs:
211 cahs = [node.nullrev]
211 cahs = [node.nullrev]
212 return [ctx.repo()[r].manifest() for r in cahs]
212 return [ctx.repo()[r].manifest() for r in cahs]
213
213
214 def deletionfromparent(f):
214 def deletionfromparent(f):
215 if f in m1:
215 if f in m1:
216 return f not in m2 and all(
216 return f not in m2 and all(
217 f in ma and ma.find(f) == m1.find(f) for ma in mas()
217 f in ma and ma.find(f) == m1.find(f) for ma in mas()
218 )
218 )
219 elif f in m2:
219 elif f in m2:
220 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
220 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
221 else:
221 else:
222 return True
222 return True
223
223
224 return deletionfromparent
224 return deletionfromparent
225
225
226
226
227 def computechangesetfilesremoved(ctx):
227 def computechangesetfilesremoved(ctx):
228 """return the list of files removed in a changeset
228 """return the list of files removed in a changeset
229 """
229 """
230 removed = []
230 removed = []
231 for f in ctx.files():
231 for f in ctx.files():
232 if f not in ctx:
232 if f not in ctx:
233 removed.append(f)
233 removed.append(f)
234 if removed:
234 if removed:
235 rf = get_removal_filter(ctx)
235 rf = get_removal_filter(ctx)
236 removed = [r for r in removed if not rf(r)]
236 removed = [r for r in removed if not rf(r)]
237 return removed
237 return removed
238
238
239
239
240 def computechangesetcopies(ctx):
240 def computechangesetcopies(ctx):
241 """return the copies data for a changeset
241 """return the copies data for a changeset
242
242
243 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
243 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
244
244
245 Each dictionnary are in the form: `{newname: oldname}`
245 Each dictionnary are in the form: `{newname: oldname}`
246 """
246 """
247 p1copies = {}
247 p1copies = {}
248 p2copies = {}
248 p2copies = {}
249 p1 = ctx.p1()
249 p1 = ctx.p1()
250 p2 = ctx.p2()
250 p2 = ctx.p2()
251 narrowmatch = ctx._repo.narrowmatch()
251 narrowmatch = ctx._repo.narrowmatch()
252 for dst in ctx.files():
252 for dst in ctx.files():
253 if not narrowmatch(dst) or dst not in ctx:
253 if not narrowmatch(dst) or dst not in ctx:
254 continue
254 continue
255 copied = ctx[dst].renamed()
255 copied = ctx[dst].renamed()
256 if not copied:
256 if not copied:
257 continue
257 continue
258 src, srcnode = copied
258 src, srcnode = copied
259 if src in p1 and p1[src].filenode() == srcnode:
259 if src in p1 and p1[src].filenode() == srcnode:
260 p1copies[dst] = src
260 p1copies[dst] = src
261 elif src in p2 and p2[src].filenode() == srcnode:
261 elif src in p2 and p2[src].filenode() == srcnode:
262 p2copies[dst] = src
262 p2copies[dst] = src
263 return p1copies, p2copies
263 return p1copies, p2copies
264
264
265
265
266 def encodecopies(files, copies):
266 def encodecopies(files, copies):
267 items = []
267 items = []
268 for i, dst in enumerate(files):
268 for i, dst in enumerate(files):
269 if dst in copies:
269 if dst in copies:
270 items.append(b'%d\0%s' % (i, copies[dst]))
270 items.append(b'%d\0%s' % (i, copies[dst]))
271 if len(items) != len(copies):
271 if len(items) != len(copies):
272 raise error.ProgrammingError(
272 raise error.ProgrammingError(
273 b'some copy targets missing from file list'
273 b'some copy targets missing from file list'
274 )
274 )
275 return b"\n".join(items)
275 return b"\n".join(items)
276
276
277
277
278 def decodecopies(files, data):
278 def decodecopies(files, data):
279 try:
279 try:
280 copies = {}
280 copies = {}
281 if not data:
281 if not data:
282 return copies
282 return copies
283 for l in data.split(b'\n'):
283 for l in data.split(b'\n'):
284 strindex, src = l.split(b'\0')
284 strindex, src = l.split(b'\0')
285 i = int(strindex)
285 i = int(strindex)
286 dst = files[i]
286 dst = files[i]
287 copies[dst] = src
287 copies[dst] = src
288 return copies
288 return copies
289 except (ValueError, IndexError):
289 except (ValueError, IndexError):
290 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
290 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
291 # used different syntax for the value.
291 # used different syntax for the value.
292 return None
292 return None
293
293
294
294
295 def encodefileindices(files, subset):
295 def encodefileindices(files, subset):
296 subset = set(subset)
296 subset = set(subset)
297 indices = []
297 indices = []
298 for i, f in enumerate(files):
298 for i, f in enumerate(files):
299 if f in subset:
299 if f in subset:
300 indices.append(b'%d' % i)
300 indices.append(b'%d' % i)
301 return b'\n'.join(indices)
301 return b'\n'.join(indices)
302
302
303
303
304 def decodefileindices(files, data):
304 def decodefileindices(files, data):
305 try:
305 try:
306 subset = []
306 subset = []
307 if not data:
307 if not data:
308 return subset
308 return subset
309 for strindex in data.split(b'\n'):
309 for strindex in data.split(b'\n'):
310 i = int(strindex)
310 i = int(strindex)
311 if i < 0 or i >= len(files):
311 if i < 0 or i >= len(files):
312 return None
312 return None
313 subset.append(files[i])
313 subset.append(files[i])
314 return subset
314 return subset
315 except (ValueError, IndexError):
315 except (ValueError, IndexError):
316 # Perhaps someone had chosen the same key name (e.g. "added") and
316 # Perhaps someone had chosen the same key name (e.g. "added") and
317 # used different syntax for the value.
317 # used different syntax for the value.
318 return None
318 return None
319
319
320
320
321 def encode_files_sidedata(files):
321 def encode_files_sidedata(files):
322 sortedfiles = sorted(files.touched)
322 sortedfiles = sorted(files.touched)
323 sidedata = {}
323 sidedata = {}
324 p1copies = files.copied_from_p1
324 p1copies = files.copied_from_p1
325 if p1copies:
325 if p1copies:
326 p1copies = encodecopies(sortedfiles, p1copies)
326 p1copies = encodecopies(sortedfiles, p1copies)
327 sidedata[sidedatamod.SD_P1COPIES] = p1copies
327 sidedata[sidedatamod.SD_P1COPIES] = p1copies
328 p2copies = files.copied_from_p2
328 p2copies = files.copied_from_p2
329 if p2copies:
329 if p2copies:
330 p2copies = encodecopies(sortedfiles, p2copies)
330 p2copies = encodecopies(sortedfiles, p2copies)
331 sidedata[sidedatamod.SD_P2COPIES] = p2copies
331 sidedata[sidedatamod.SD_P2COPIES] = p2copies
332 filesadded = files.added
332 filesadded = files.added
333 if filesadded:
333 if filesadded:
334 filesadded = encodefileindices(sortedfiles, filesadded)
334 filesadded = encodefileindices(sortedfiles, filesadded)
335 sidedata[sidedatamod.SD_FILESADDED] = filesadded
335 sidedata[sidedatamod.SD_FILESADDED] = filesadded
336 filesremoved = files.removed
336 filesremoved = files.removed
337 if filesremoved:
337 if filesremoved:
338 filesremoved = encodefileindices(sortedfiles, filesremoved)
338 filesremoved = encodefileindices(sortedfiles, filesremoved)
339 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
339 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
340 if not sidedata:
340 if not sidedata:
341 sidedata = None
341 sidedata = None
342 return sidedata
342 return sidedata
343
343
344
344
345 def decode_files_sidedata(changelogrevision, sidedata):
346 """Return a ChangingFiles instance from a changelogrevision using sidata
347 """
348 touched = changelogrevision.files
349
350 rawindices = sidedata.get(sidedatamod.SD_FILESADDED)
351 added = decodefileindices(touched, rawindices)
352
353 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)
354 removed = decodefileindices(touched, rawindices)
355
356 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)
357 p1_copies = decodecopies(touched, rawcopies)
358
359 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)
360 p2_copies = decodecopies(touched, rawcopies)
361
362 return ChangingFiles(
363 touched=touched,
364 added=added,
365 removed=removed,
366 p1_copies=p1_copies,
367 p2_copies=p2_copies,
368 )
369
370
345 def _getsidedata(srcrepo, rev):
371 def _getsidedata(srcrepo, rev):
346 ctx = srcrepo[rev]
372 ctx = srcrepo[rev]
347 filescopies = computechangesetcopies(ctx)
373 filescopies = computechangesetcopies(ctx)
348 filesadded = computechangesetfilesadded(ctx)
374 filesadded = computechangesetfilesadded(ctx)
349 filesremoved = computechangesetfilesremoved(ctx)
375 filesremoved = computechangesetfilesremoved(ctx)
350 sidedata = {}
376 sidedata = {}
351 if any([filescopies, filesadded, filesremoved]):
377 if any([filescopies, filesadded, filesremoved]):
352 sortedfiles = sorted(ctx.files())
378 sortedfiles = sorted(ctx.files())
353 p1copies, p2copies = filescopies
379 p1copies, p2copies = filescopies
354 p1copies = encodecopies(sortedfiles, p1copies)
380 p1copies = encodecopies(sortedfiles, p1copies)
355 p2copies = encodecopies(sortedfiles, p2copies)
381 p2copies = encodecopies(sortedfiles, p2copies)
356 filesadded = encodefileindices(sortedfiles, filesadded)
382 filesadded = encodefileindices(sortedfiles, filesadded)
357 filesremoved = encodefileindices(sortedfiles, filesremoved)
383 filesremoved = encodefileindices(sortedfiles, filesremoved)
358 if p1copies:
384 if p1copies:
359 sidedata[sidedatamod.SD_P1COPIES] = p1copies
385 sidedata[sidedatamod.SD_P1COPIES] = p1copies
360 if p2copies:
386 if p2copies:
361 sidedata[sidedatamod.SD_P2COPIES] = p2copies
387 sidedata[sidedatamod.SD_P2COPIES] = p2copies
362 if filesadded:
388 if filesadded:
363 sidedata[sidedatamod.SD_FILESADDED] = filesadded
389 sidedata[sidedatamod.SD_FILESADDED] = filesadded
364 if filesremoved:
390 if filesremoved:
365 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
391 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
366 return sidedata
392 return sidedata
367
393
368
394
369 def getsidedataadder(srcrepo, destrepo):
395 def getsidedataadder(srcrepo, destrepo):
370 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
396 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
371 if pycompat.iswindows or not use_w:
397 if pycompat.iswindows or not use_w:
372 return _get_simple_sidedata_adder(srcrepo, destrepo)
398 return _get_simple_sidedata_adder(srcrepo, destrepo)
373 else:
399 else:
374 return _get_worker_sidedata_adder(srcrepo, destrepo)
400 return _get_worker_sidedata_adder(srcrepo, destrepo)
375
401
376
402
377 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
403 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
378 """The function used by worker precomputing sidedata
404 """The function used by worker precomputing sidedata
379
405
380 It read an input queue containing revision numbers
406 It read an input queue containing revision numbers
381 It write in an output queue containing (rev, <sidedata-map>)
407 It write in an output queue containing (rev, <sidedata-map>)
382
408
383 The `None` input value is used as a stop signal.
409 The `None` input value is used as a stop signal.
384
410
385 The `tokens` semaphore is user to avoid having too many unprocessed
411 The `tokens` semaphore is user to avoid having too many unprocessed
386 entries. The workers needs to acquire one token before fetching a task.
412 entries. The workers needs to acquire one token before fetching a task.
387 They will be released by the consumer of the produced data.
413 They will be released by the consumer of the produced data.
388 """
414 """
389 tokens.acquire()
415 tokens.acquire()
390 rev = revs_queue.get()
416 rev = revs_queue.get()
391 while rev is not None:
417 while rev is not None:
392 data = _getsidedata(srcrepo, rev)
418 data = _getsidedata(srcrepo, rev)
393 sidedata_queue.put((rev, data))
419 sidedata_queue.put((rev, data))
394 tokens.acquire()
420 tokens.acquire()
395 rev = revs_queue.get()
421 rev = revs_queue.get()
396 # processing of `None` is completed, release the token.
422 # processing of `None` is completed, release the token.
397 tokens.release()
423 tokens.release()
398
424
399
425
400 BUFF_PER_WORKER = 50
426 BUFF_PER_WORKER = 50
401
427
402
428
403 def _get_worker_sidedata_adder(srcrepo, destrepo):
429 def _get_worker_sidedata_adder(srcrepo, destrepo):
404 """The parallel version of the sidedata computation
430 """The parallel version of the sidedata computation
405
431
406 This code spawn a pool of worker that precompute a buffer of sidedata
432 This code spawn a pool of worker that precompute a buffer of sidedata
407 before we actually need them"""
433 before we actually need them"""
408 # avoid circular import copies -> scmutil -> worker -> copies
434 # avoid circular import copies -> scmutil -> worker -> copies
409 from . import worker
435 from . import worker
410
436
411 nbworkers = worker._numworkers(srcrepo.ui)
437 nbworkers = worker._numworkers(srcrepo.ui)
412
438
413 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
439 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
414 revsq = multiprocessing.Queue()
440 revsq = multiprocessing.Queue()
415 sidedataq = multiprocessing.Queue()
441 sidedataq = multiprocessing.Queue()
416
442
417 assert srcrepo.filtername is None
443 assert srcrepo.filtername is None
418 # queue all tasks beforehand, revision numbers are small and it make
444 # queue all tasks beforehand, revision numbers are small and it make
419 # synchronisation simpler
445 # synchronisation simpler
420 #
446 #
421 # Since the computation for each node can be quite expensive, the overhead
447 # Since the computation for each node can be quite expensive, the overhead
422 # of using a single queue is not revelant. In practice, most computation
448 # of using a single queue is not revelant. In practice, most computation
423 # are fast but some are very expensive and dominate all the other smaller
449 # are fast but some are very expensive and dominate all the other smaller
424 # cost.
450 # cost.
425 for r in srcrepo.changelog.revs():
451 for r in srcrepo.changelog.revs():
426 revsq.put(r)
452 revsq.put(r)
427 # queue the "no more tasks" markers
453 # queue the "no more tasks" markers
428 for i in range(nbworkers):
454 for i in range(nbworkers):
429 revsq.put(None)
455 revsq.put(None)
430
456
431 allworkers = []
457 allworkers = []
432 for i in range(nbworkers):
458 for i in range(nbworkers):
433 args = (srcrepo, revsq, sidedataq, tokens)
459 args = (srcrepo, revsq, sidedataq, tokens)
434 w = multiprocessing.Process(target=_sidedata_worker, args=args)
460 w = multiprocessing.Process(target=_sidedata_worker, args=args)
435 allworkers.append(w)
461 allworkers.append(w)
436 w.start()
462 w.start()
437
463
438 # dictionnary to store results for revision higher than we one we are
464 # dictionnary to store results for revision higher than we one we are
439 # looking for. For example, if we need the sidedatamap for 42, and 43 is
465 # looking for. For example, if we need the sidedatamap for 42, and 43 is
440 # received, when shelve 43 for later use.
466 # received, when shelve 43 for later use.
441 staging = {}
467 staging = {}
442
468
443 def sidedata_companion(revlog, rev):
469 def sidedata_companion(revlog, rev):
444 sidedata = {}
470 sidedata = {}
445 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
471 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
446 # Is the data previously shelved ?
472 # Is the data previously shelved ?
447 sidedata = staging.pop(rev, None)
473 sidedata = staging.pop(rev, None)
448 if sidedata is None:
474 if sidedata is None:
449 # look at the queued result until we find the one we are lookig
475 # look at the queued result until we find the one we are lookig
450 # for (shelve the other ones)
476 # for (shelve the other ones)
451 r, sidedata = sidedataq.get()
477 r, sidedata = sidedataq.get()
452 while r != rev:
478 while r != rev:
453 staging[r] = sidedata
479 staging[r] = sidedata
454 r, sidedata = sidedataq.get()
480 r, sidedata = sidedataq.get()
455 tokens.release()
481 tokens.release()
456 return False, (), sidedata
482 return False, (), sidedata
457
483
458 return sidedata_companion
484 return sidedata_companion
459
485
460
486
461 def _get_simple_sidedata_adder(srcrepo, destrepo):
487 def _get_simple_sidedata_adder(srcrepo, destrepo):
462 """The simple version of the sidedata computation
488 """The simple version of the sidedata computation
463
489
464 It just compute it in the same thread on request"""
490 It just compute it in the same thread on request"""
465
491
466 def sidedatacompanion(revlog, rev):
492 def sidedatacompanion(revlog, rev):
467 sidedata = {}
493 sidedata = {}
468 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
494 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
469 sidedata = _getsidedata(srcrepo, rev)
495 sidedata = _getsidedata(srcrepo, rev)
470 return False, (), sidedata
496 return False, (), sidedata
471
497
472 return sidedatacompanion
498 return sidedatacompanion
473
499
474
500
475 def getsidedataremover(srcrepo, destrepo):
501 def getsidedataremover(srcrepo, destrepo):
476 def sidedatacompanion(revlog, rev):
502 def sidedatacompanion(revlog, rev):
477 f = ()
503 f = ()
478 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
504 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
479 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
505 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
480 f = (
506 f = (
481 sidedatamod.SD_P1COPIES,
507 sidedatamod.SD_P1COPIES,
482 sidedatamod.SD_P2COPIES,
508 sidedatamod.SD_P2COPIES,
483 sidedatamod.SD_FILESADDED,
509 sidedatamod.SD_FILESADDED,
484 sidedatamod.SD_FILESREMOVED,
510 sidedatamod.SD_FILESREMOVED,
485 )
511 )
486 return False, f, {}
512 return False, f, {}
487
513
488 return sidedatacompanion
514 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now