##// END OF EJS Templates
commitctx: extract sidedata encoding inside its own function...
marmoute -
r45885:c6eea580 default
parent child Browse files
Show More
@@ -1,605 +1,585 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import attr
16 from .thirdparty import attr
17
17
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 metadata,
21 metadata,
22 pycompat,
22 pycompat,
23 revlog,
23 revlog,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 dateutil,
26 dateutil,
27 stringutil,
27 stringutil,
28 )
28 )
29
29
30 from .revlogutils import sidedata as sidedatamod
30 from .revlogutils import sidedata as sidedatamod
31
31
32 _defaultextra = {b'branch': b'default'}
32 _defaultextra = {b'branch': b'default'}
33
33
34
34
35 def _string_escape(text):
35 def _string_escape(text):
36 """
36 """
37 >>> from .pycompat import bytechr as chr
37 >>> from .pycompat import bytechr as chr
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s
40 >>> s
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 >>> res = _string_escape(s)
42 >>> res = _string_escape(s)
43 >>> s == _string_unescape(res)
43 >>> s == _string_unescape(res)
44 True
44 True
45 """
45 """
46 # subset of the string_escape codec
46 # subset of the string_escape codec
47 text = (
47 text = (
48 text.replace(b'\\', b'\\\\')
48 text.replace(b'\\', b'\\\\')
49 .replace(b'\n', b'\\n')
49 .replace(b'\n', b'\\n')
50 .replace(b'\r', b'\\r')
50 .replace(b'\r', b'\\r')
51 )
51 )
52 return text.replace(b'\0', b'\\0')
52 return text.replace(b'\0', b'\\0')
53
53
54
54
55 def _string_unescape(text):
55 def _string_unescape(text):
56 if b'\\0' in text:
56 if b'\\0' in text:
57 # fix up \0 without getting into trouble with \\0
57 # fix up \0 without getting into trouble with \\0
58 text = text.replace(b'\\\\', b'\\\\\n')
58 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\0', b'\0')
59 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\n', b'')
60 text = text.replace(b'\n', b'')
61 return stringutil.unescapestr(text)
61 return stringutil.unescapestr(text)
62
62
63
63
64 def decodeextra(text):
64 def decodeextra(text):
65 """
65 """
66 >>> from .pycompat import bytechr as chr
66 >>> from .pycompat import bytechr as chr
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 ... ).items())
68 ... ).items())
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 ... b'baz': chr(92) + chr(0) + b'2'})
71 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... ).items())
72 ... ).items())
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 """
74 """
75 extra = _defaultextra.copy()
75 extra = _defaultextra.copy()
76 for l in text.split(b'\0'):
76 for l in text.split(b'\0'):
77 if l:
77 if l:
78 k, v = _string_unescape(l).split(b':', 1)
78 k, v = _string_unescape(l).split(b':', 1)
79 extra[k] = v
79 extra[k] = v
80 return extra
80 return extra
81
81
82
82
83 def encodeextra(d):
83 def encodeextra(d):
84 # keys must be sorted to produce a deterministic changelog entry
84 # keys must be sorted to produce a deterministic changelog entry
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 return b"\0".join(items)
86 return b"\0".join(items)
87
87
88
88
89 def stripdesc(desc):
89 def stripdesc(desc):
90 """strip trailing whitespace and leading and trailing empty lines"""
90 """strip trailing whitespace and leading and trailing empty lines"""
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92
92
93
93
94 class appender(object):
94 class appender(object):
95 '''the changelog index must be updated last on disk, so we use this class
95 '''the changelog index must be updated last on disk, so we use this class
96 to delay writes to it'''
96 to delay writes to it'''
97
97
98 def __init__(self, vfs, name, mode, buf):
98 def __init__(self, vfs, name, mode, buf):
99 self.data = buf
99 self.data = buf
100 fp = vfs(name, mode)
100 fp = vfs(name, mode)
101 self.fp = fp
101 self.fp = fp
102 self.offset = fp.tell()
102 self.offset = fp.tell()
103 self.size = vfs.fstat(fp).st_size
103 self.size = vfs.fstat(fp).st_size
104 self._end = self.size
104 self._end = self.size
105
105
106 def end(self):
106 def end(self):
107 return self._end
107 return self._end
108
108
109 def tell(self):
109 def tell(self):
110 return self.offset
110 return self.offset
111
111
112 def flush(self):
112 def flush(self):
113 pass
113 pass
114
114
115 @property
115 @property
116 def closed(self):
116 def closed(self):
117 return self.fp.closed
117 return self.fp.closed
118
118
119 def close(self):
119 def close(self):
120 self.fp.close()
120 self.fp.close()
121
121
122 def seek(self, offset, whence=0):
122 def seek(self, offset, whence=0):
123 '''virtual file offset spans real file and data'''
123 '''virtual file offset spans real file and data'''
124 if whence == 0:
124 if whence == 0:
125 self.offset = offset
125 self.offset = offset
126 elif whence == 1:
126 elif whence == 1:
127 self.offset += offset
127 self.offset += offset
128 elif whence == 2:
128 elif whence == 2:
129 self.offset = self.end() + offset
129 self.offset = self.end() + offset
130 if self.offset < self.size:
130 if self.offset < self.size:
131 self.fp.seek(self.offset)
131 self.fp.seek(self.offset)
132
132
133 def read(self, count=-1):
133 def read(self, count=-1):
134 '''only trick here is reads that span real file and data'''
134 '''only trick here is reads that span real file and data'''
135 ret = b""
135 ret = b""
136 if self.offset < self.size:
136 if self.offset < self.size:
137 s = self.fp.read(count)
137 s = self.fp.read(count)
138 ret = s
138 ret = s
139 self.offset += len(s)
139 self.offset += len(s)
140 if count > 0:
140 if count > 0:
141 count -= len(s)
141 count -= len(s)
142 if count != 0:
142 if count != 0:
143 doff = self.offset - self.size
143 doff = self.offset - self.size
144 self.data.insert(0, b"".join(self.data))
144 self.data.insert(0, b"".join(self.data))
145 del self.data[1:]
145 del self.data[1:]
146 s = self.data[0][doff : doff + count]
146 s = self.data[0][doff : doff + count]
147 self.offset += len(s)
147 self.offset += len(s)
148 ret += s
148 ret += s
149 return ret
149 return ret
150
150
151 def write(self, s):
151 def write(self, s):
152 self.data.append(bytes(s))
152 self.data.append(bytes(s))
153 self.offset += len(s)
153 self.offset += len(s)
154 self._end += len(s)
154 self._end += len(s)
155
155
156 def __enter__(self):
156 def __enter__(self):
157 self.fp.__enter__()
157 self.fp.__enter__()
158 return self
158 return self
159
159
160 def __exit__(self, *args):
160 def __exit__(self, *args):
161 return self.fp.__exit__(*args)
161 return self.fp.__exit__(*args)
162
162
163
163
164 class _divertopener(object):
164 class _divertopener(object):
165 def __init__(self, opener, target):
165 def __init__(self, opener, target):
166 self._opener = opener
166 self._opener = opener
167 self._target = target
167 self._target = target
168
168
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 if name != self._target:
170 if name != self._target:
171 return self._opener(name, mode, **kwargs)
171 return self._opener(name, mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
173
173
174 def __getattr__(self, attr):
174 def __getattr__(self, attr):
175 return getattr(self._opener, attr)
175 return getattr(self._opener, attr)
176
176
177
177
178 def _delayopener(opener, target, buf):
178 def _delayopener(opener, target, buf):
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180
180
181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 if name != target:
182 if name != target:
183 return opener(name, mode, **kwargs)
183 return opener(name, mode, **kwargs)
184 assert not kwargs
184 assert not kwargs
185 return appender(opener, name, mode, buf)
185 return appender(opener, name, mode, buf)
186
186
187 return _delay
187 return _delay
188
188
189
189
190 @attr.s
190 @attr.s
191 class _changelogrevision(object):
191 class _changelogrevision(object):
192 # Extensions might modify _defaultextra, so let the constructor below pass
192 # Extensions might modify _defaultextra, so let the constructor below pass
193 # it in
193 # it in
194 extra = attr.ib()
194 extra = attr.ib()
195 manifest = attr.ib(default=nullid)
195 manifest = attr.ib(default=nullid)
196 user = attr.ib(default=b'')
196 user = attr.ib(default=b'')
197 date = attr.ib(default=(0, 0))
197 date = attr.ib(default=(0, 0))
198 files = attr.ib(default=attr.Factory(list))
198 files = attr.ib(default=attr.Factory(list))
199 filesadded = attr.ib(default=None)
199 filesadded = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
203 description = attr.ib(default=b'')
203 description = attr.ib(default=b'')
204
204
205
205
206 class changelogrevision(object):
206 class changelogrevision(object):
207 """Holds results of a parsed changelog revision.
207 """Holds results of a parsed changelog revision.
208
208
209 Changelog revisions consist of multiple pieces of data, including
209 Changelog revisions consist of multiple pieces of data, including
210 the manifest node, user, and date. This object exposes a view into
210 the manifest node, user, and date. This object exposes a view into
211 the parsed object.
211 the parsed object.
212 """
212 """
213
213
214 __slots__ = (
214 __slots__ = (
215 '_offsets',
215 '_offsets',
216 '_text',
216 '_text',
217 '_sidedata',
217 '_sidedata',
218 '_cpsd',
218 '_cpsd',
219 )
219 )
220
220
221 def __new__(cls, text, sidedata, cpsd):
221 def __new__(cls, text, sidedata, cpsd):
222 if not text:
222 if not text:
223 return _changelogrevision(extra=_defaultextra)
223 return _changelogrevision(extra=_defaultextra)
224
224
225 self = super(changelogrevision, cls).__new__(cls)
225 self = super(changelogrevision, cls).__new__(cls)
226 # We could return here and implement the following as an __init__.
226 # We could return here and implement the following as an __init__.
227 # But doing it here is equivalent and saves an extra function call.
227 # But doing it here is equivalent and saves an extra function call.
228
228
229 # format used:
229 # format used:
230 # nodeid\n : manifest node in ascii
230 # nodeid\n : manifest node in ascii
231 # user\n : user, no \n or \r allowed
231 # user\n : user, no \n or \r allowed
232 # time tz extra\n : date (time is int or float, timezone is int)
232 # time tz extra\n : date (time is int or float, timezone is int)
233 # : extra is metadata, encoded and separated by '\0'
233 # : extra is metadata, encoded and separated by '\0'
234 # : older versions ignore it
234 # : older versions ignore it
235 # files\n\n : files modified by the cset, no \n or \r allowed
235 # files\n\n : files modified by the cset, no \n or \r allowed
236 # (.*) : comment (free text, ideally utf-8)
236 # (.*) : comment (free text, ideally utf-8)
237 #
237 #
238 # changelog v0 doesn't use extra
238 # changelog v0 doesn't use extra
239
239
240 nl1 = text.index(b'\n')
240 nl1 = text.index(b'\n')
241 nl2 = text.index(b'\n', nl1 + 1)
241 nl2 = text.index(b'\n', nl1 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
243
243
244 # The list of files may be empty. Which means nl3 is the first of the
244 # The list of files may be empty. Which means nl3 is the first of the
245 # double newline that precedes the description.
245 # double newline that precedes the description.
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 doublenl = nl3
247 doublenl = nl3
248 else:
248 else:
249 doublenl = text.index(b'\n\n', nl3 + 1)
249 doublenl = text.index(b'\n\n', nl3 + 1)
250
250
251 self._offsets = (nl1, nl2, nl3, doublenl)
251 self._offsets = (nl1, nl2, nl3, doublenl)
252 self._text = text
252 self._text = text
253 self._sidedata = sidedata
253 self._sidedata = sidedata
254 self._cpsd = cpsd
254 self._cpsd = cpsd
255
255
256 return self
256 return self
257
257
258 @property
258 @property
259 def manifest(self):
259 def manifest(self):
260 return bin(self._text[0 : self._offsets[0]])
260 return bin(self._text[0 : self._offsets[0]])
261
261
262 @property
262 @property
263 def user(self):
263 def user(self):
264 off = self._offsets
264 off = self._offsets
265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266
266
267 @property
267 @property
268 def _rawdate(self):
268 def _rawdate(self):
269 off = self._offsets
269 off = self._offsets
270 dateextra = self._text[off[1] + 1 : off[2]]
270 dateextra = self._text[off[1] + 1 : off[2]]
271 return dateextra.split(b' ', 2)[0:2]
271 return dateextra.split(b' ', 2)[0:2]
272
272
273 @property
273 @property
274 def _rawextra(self):
274 def _rawextra(self):
275 off = self._offsets
275 off = self._offsets
276 dateextra = self._text[off[1] + 1 : off[2]]
276 dateextra = self._text[off[1] + 1 : off[2]]
277 fields = dateextra.split(b' ', 2)
277 fields = dateextra.split(b' ', 2)
278 if len(fields) != 3:
278 if len(fields) != 3:
279 return None
279 return None
280
280
281 return fields[2]
281 return fields[2]
282
282
283 @property
283 @property
284 def date(self):
284 def date(self):
285 raw = self._rawdate
285 raw = self._rawdate
286 time = float(raw[0])
286 time = float(raw[0])
287 # Various tools did silly things with the timezone.
287 # Various tools did silly things with the timezone.
288 try:
288 try:
289 timezone = int(raw[1])
289 timezone = int(raw[1])
290 except ValueError:
290 except ValueError:
291 timezone = 0
291 timezone = 0
292
292
293 return time, timezone
293 return time, timezone
294
294
295 @property
295 @property
296 def extra(self):
296 def extra(self):
297 raw = self._rawextra
297 raw = self._rawextra
298 if raw is None:
298 if raw is None:
299 return _defaultextra
299 return _defaultextra
300
300
301 return decodeextra(raw)
301 return decodeextra(raw)
302
302
303 @property
303 @property
304 def files(self):
304 def files(self):
305 off = self._offsets
305 off = self._offsets
306 if off[2] == off[3]:
306 if off[2] == off[3]:
307 return []
307 return []
308
308
309 return self._text[off[2] + 1 : off[3]].split(b'\n')
309 return self._text[off[2] + 1 : off[3]].split(b'\n')
310
310
311 @property
311 @property
312 def filesadded(self):
312 def filesadded(self):
313 if self._cpsd:
313 if self._cpsd:
314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
315 if not rawindices:
315 if not rawindices:
316 return []
316 return []
317 else:
317 else:
318 rawindices = self.extra.get(b'filesadded')
318 rawindices = self.extra.get(b'filesadded')
319 if rawindices is None:
319 if rawindices is None:
320 return None
320 return None
321 return metadata.decodefileindices(self.files, rawindices)
321 return metadata.decodefileindices(self.files, rawindices)
322
322
323 @property
323 @property
324 def filesremoved(self):
324 def filesremoved(self):
325 if self._cpsd:
325 if self._cpsd:
326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
327 if not rawindices:
327 if not rawindices:
328 return []
328 return []
329 else:
329 else:
330 rawindices = self.extra.get(b'filesremoved')
330 rawindices = self.extra.get(b'filesremoved')
331 if rawindices is None:
331 if rawindices is None:
332 return None
332 return None
333 return metadata.decodefileindices(self.files, rawindices)
333 return metadata.decodefileindices(self.files, rawindices)
334
334
335 @property
335 @property
336 def p1copies(self):
336 def p1copies(self):
337 if self._cpsd:
337 if self._cpsd:
338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
339 if not rawcopies:
339 if not rawcopies:
340 return {}
340 return {}
341 else:
341 else:
342 rawcopies = self.extra.get(b'p1copies')
342 rawcopies = self.extra.get(b'p1copies')
343 if rawcopies is None:
343 if rawcopies is None:
344 return None
344 return None
345 return metadata.decodecopies(self.files, rawcopies)
345 return metadata.decodecopies(self.files, rawcopies)
346
346
347 @property
347 @property
348 def p2copies(self):
348 def p2copies(self):
349 if self._cpsd:
349 if self._cpsd:
350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
351 if not rawcopies:
351 if not rawcopies:
352 return {}
352 return {}
353 else:
353 else:
354 rawcopies = self.extra.get(b'p2copies')
354 rawcopies = self.extra.get(b'p2copies')
355 if rawcopies is None:
355 if rawcopies is None:
356 return None
356 return None
357 return metadata.decodecopies(self.files, rawcopies)
357 return metadata.decodecopies(self.files, rawcopies)
358
358
359 @property
359 @property
360 def description(self):
360 def description(self):
361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
362
362
363
363
364 class changelog(revlog.revlog):
364 class changelog(revlog.revlog):
365 def __init__(self, opener, trypending=False):
365 def __init__(self, opener, trypending=False):
366 """Load a changelog revlog using an opener.
366 """Load a changelog revlog using an opener.
367
367
368 If ``trypending`` is true, we attempt to load the index from a
368 If ``trypending`` is true, we attempt to load the index from a
369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
370 The ``00changelog.i.a`` file contains index (and possibly inline
370 The ``00changelog.i.a`` file contains index (and possibly inline
371 revision) data for a transaction that hasn't been finalized yet.
371 revision) data for a transaction that hasn't been finalized yet.
372 It exists in a separate file to facilitate readers (such as
372 It exists in a separate file to facilitate readers (such as
373 hooks processes) accessing data before a transaction is finalized.
373 hooks processes) accessing data before a transaction is finalized.
374 """
374 """
375 if trypending and opener.exists(b'00changelog.i.a'):
375 if trypending and opener.exists(b'00changelog.i.a'):
376 indexfile = b'00changelog.i.a'
376 indexfile = b'00changelog.i.a'
377 else:
377 else:
378 indexfile = b'00changelog.i'
378 indexfile = b'00changelog.i'
379
379
380 datafile = b'00changelog.d'
380 datafile = b'00changelog.d'
381 revlog.revlog.__init__(
381 revlog.revlog.__init__(
382 self,
382 self,
383 opener,
383 opener,
384 indexfile,
384 indexfile,
385 datafile=datafile,
385 datafile=datafile,
386 checkambig=True,
386 checkambig=True,
387 mmaplargeindex=True,
387 mmaplargeindex=True,
388 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
388 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
389 )
389 )
390
390
391 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
391 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
392 # changelogs don't benefit from generaldelta.
392 # changelogs don't benefit from generaldelta.
393
393
394 self.version &= ~revlog.FLAG_GENERALDELTA
394 self.version &= ~revlog.FLAG_GENERALDELTA
395 self._generaldelta = False
395 self._generaldelta = False
396
396
397 # Delta chains for changelogs tend to be very small because entries
397 # Delta chains for changelogs tend to be very small because entries
398 # tend to be small and don't delta well with each. So disable delta
398 # tend to be small and don't delta well with each. So disable delta
399 # chains.
399 # chains.
400 self._storedeltachains = False
400 self._storedeltachains = False
401
401
402 self._realopener = opener
402 self._realopener = opener
403 self._delayed = False
403 self._delayed = False
404 self._delaybuf = None
404 self._delaybuf = None
405 self._divert = False
405 self._divert = False
406 self.filteredrevs = frozenset()
406 self.filteredrevs = frozenset()
407 self._copiesstorage = opener.options.get(b'copies-storage')
407 self._copiesstorage = opener.options.get(b'copies-storage')
408
408
409 def delayupdate(self, tr):
409 def delayupdate(self, tr):
410 """delay visibility of index updates to other readers"""
410 """delay visibility of index updates to other readers"""
411
411
412 if not self._delayed:
412 if not self._delayed:
413 if len(self) == 0:
413 if len(self) == 0:
414 self._divert = True
414 self._divert = True
415 if self._realopener.exists(self.indexfile + b'.a'):
415 if self._realopener.exists(self.indexfile + b'.a'):
416 self._realopener.unlink(self.indexfile + b'.a')
416 self._realopener.unlink(self.indexfile + b'.a')
417 self.opener = _divertopener(self._realopener, self.indexfile)
417 self.opener = _divertopener(self._realopener, self.indexfile)
418 else:
418 else:
419 self._delaybuf = []
419 self._delaybuf = []
420 self.opener = _delayopener(
420 self.opener = _delayopener(
421 self._realopener, self.indexfile, self._delaybuf
421 self._realopener, self.indexfile, self._delaybuf
422 )
422 )
423 self._delayed = True
423 self._delayed = True
424 tr.addpending(b'cl-%i' % id(self), self._writepending)
424 tr.addpending(b'cl-%i' % id(self), self._writepending)
425 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
425 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
426
426
427 def _finalize(self, tr):
427 def _finalize(self, tr):
428 """finalize index updates"""
428 """finalize index updates"""
429 self._delayed = False
429 self._delayed = False
430 self.opener = self._realopener
430 self.opener = self._realopener
431 # move redirected index data back into place
431 # move redirected index data back into place
432 if self._divert:
432 if self._divert:
433 assert not self._delaybuf
433 assert not self._delaybuf
434 tmpname = self.indexfile + b".a"
434 tmpname = self.indexfile + b".a"
435 nfile = self.opener.open(tmpname)
435 nfile = self.opener.open(tmpname)
436 nfile.close()
436 nfile.close()
437 self.opener.rename(tmpname, self.indexfile, checkambig=True)
437 self.opener.rename(tmpname, self.indexfile, checkambig=True)
438 elif self._delaybuf:
438 elif self._delaybuf:
439 fp = self.opener(self.indexfile, b'a', checkambig=True)
439 fp = self.opener(self.indexfile, b'a', checkambig=True)
440 fp.write(b"".join(self._delaybuf))
440 fp.write(b"".join(self._delaybuf))
441 fp.close()
441 fp.close()
442 self._delaybuf = None
442 self._delaybuf = None
443 self._divert = False
443 self._divert = False
444 # split when we're done
444 # split when we're done
445 self._enforceinlinesize(tr)
445 self._enforceinlinesize(tr)
446
446
447 def _writepending(self, tr):
447 def _writepending(self, tr):
448 """create a file containing the unfinalized state for
448 """create a file containing the unfinalized state for
449 pretxnchangegroup"""
449 pretxnchangegroup"""
450 if self._delaybuf:
450 if self._delaybuf:
451 # make a temporary copy of the index
451 # make a temporary copy of the index
452 fp1 = self._realopener(self.indexfile)
452 fp1 = self._realopener(self.indexfile)
453 pendingfilename = self.indexfile + b".a"
453 pendingfilename = self.indexfile + b".a"
454 # register as a temp file to ensure cleanup on failure
454 # register as a temp file to ensure cleanup on failure
455 tr.registertmp(pendingfilename)
455 tr.registertmp(pendingfilename)
456 # write existing data
456 # write existing data
457 fp2 = self._realopener(pendingfilename, b"w")
457 fp2 = self._realopener(pendingfilename, b"w")
458 fp2.write(fp1.read())
458 fp2.write(fp1.read())
459 # add pending data
459 # add pending data
460 fp2.write(b"".join(self._delaybuf))
460 fp2.write(b"".join(self._delaybuf))
461 fp2.close()
461 fp2.close()
462 # switch modes so finalize can simply rename
462 # switch modes so finalize can simply rename
463 self._delaybuf = None
463 self._delaybuf = None
464 self._divert = True
464 self._divert = True
465 self.opener = _divertopener(self._realopener, self.indexfile)
465 self.opener = _divertopener(self._realopener, self.indexfile)
466
466
467 if self._divert:
467 if self._divert:
468 return True
468 return True
469
469
470 return False
470 return False
471
471
472 def _enforceinlinesize(self, tr, fp=None):
472 def _enforceinlinesize(self, tr, fp=None):
473 if not self._delayed:
473 if not self._delayed:
474 revlog.revlog._enforceinlinesize(self, tr, fp)
474 revlog.revlog._enforceinlinesize(self, tr, fp)
475
475
476 def read(self, node):
476 def read(self, node):
477 """Obtain data from a parsed changelog revision.
477 """Obtain data from a parsed changelog revision.
478
478
479 Returns a 6-tuple of:
479 Returns a 6-tuple of:
480
480
481 - manifest node in binary
481 - manifest node in binary
482 - author/user as a localstr
482 - author/user as a localstr
483 - date as a 2-tuple of (time, timezone)
483 - date as a 2-tuple of (time, timezone)
484 - list of files
484 - list of files
485 - commit message as a localstr
485 - commit message as a localstr
486 - dict of extra metadata
486 - dict of extra metadata
487
487
488 Unless you need to access all fields, consider calling
488 Unless you need to access all fields, consider calling
489 ``changelogrevision`` instead, as it is faster for partial object
489 ``changelogrevision`` instead, as it is faster for partial object
490 access.
490 access.
491 """
491 """
492 d, s = self._revisiondata(node)
492 d, s = self._revisiondata(node)
493 c = changelogrevision(
493 c = changelogrevision(
494 d, s, self._copiesstorage == b'changeset-sidedata'
494 d, s, self._copiesstorage == b'changeset-sidedata'
495 )
495 )
496 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
496 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
497
497
498 def changelogrevision(self, nodeorrev):
498 def changelogrevision(self, nodeorrev):
499 """Obtain a ``changelogrevision`` for a node or revision."""
499 """Obtain a ``changelogrevision`` for a node or revision."""
500 text, sidedata = self._revisiondata(nodeorrev)
500 text, sidedata = self._revisiondata(nodeorrev)
501 return changelogrevision(
501 return changelogrevision(
502 text, sidedata, self._copiesstorage == b'changeset-sidedata'
502 text, sidedata, self._copiesstorage == b'changeset-sidedata'
503 )
503 )
504
504
505 def readfiles(self, node):
505 def readfiles(self, node):
506 """
506 """
507 short version of read that only returns the files modified by the cset
507 short version of read that only returns the files modified by the cset
508 """
508 """
509 text = self.revision(node)
509 text = self.revision(node)
510 if not text:
510 if not text:
511 return []
511 return []
512 last = text.index(b"\n\n")
512 last = text.index(b"\n\n")
513 l = text[:last].split(b'\n')
513 l = text[:last].split(b'\n')
514 return l[3:]
514 return l[3:]
515
515
516 def add(
516 def add(
517 self,
517 self,
518 manifest,
518 manifest,
519 files,
519 files,
520 desc,
520 desc,
521 transaction,
521 transaction,
522 p1,
522 p1,
523 p2,
523 p2,
524 user,
524 user,
525 date=None,
525 date=None,
526 extra=None,
526 extra=None,
527 ):
527 ):
528 # Convert to UTF-8 encoded bytestrings as the very first
528 # Convert to UTF-8 encoded bytestrings as the very first
529 # thing: calling any method on a localstr object will turn it
529 # thing: calling any method on a localstr object will turn it
530 # into a str object and the cached UTF-8 string is thus lost.
530 # into a str object and the cached UTF-8 string is thus lost.
531 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
531 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
532
532
533 user = user.strip()
533 user = user.strip()
534 # An empty username or a username with a "\n" will make the
534 # An empty username or a username with a "\n" will make the
535 # revision text contain two "\n\n" sequences -> corrupt
535 # revision text contain two "\n\n" sequences -> corrupt
536 # repository since read cannot unpack the revision.
536 # repository since read cannot unpack the revision.
537 if not user:
537 if not user:
538 raise error.StorageError(_(b"empty username"))
538 raise error.StorageError(_(b"empty username"))
539 if b"\n" in user:
539 if b"\n" in user:
540 raise error.StorageError(
540 raise error.StorageError(
541 _(b"username %r contains a newline") % pycompat.bytestr(user)
541 _(b"username %r contains a newline") % pycompat.bytestr(user)
542 )
542 )
543
543
544 desc = stripdesc(desc)
544 desc = stripdesc(desc)
545
545
546 if date:
546 if date:
547 parseddate = b"%d %d" % dateutil.parsedate(date)
547 parseddate = b"%d %d" % dateutil.parsedate(date)
548 else:
548 else:
549 parseddate = b"%d %d" % dateutil.makedate()
549 parseddate = b"%d %d" % dateutil.makedate()
550 if extra:
550 if extra:
551 branch = extra.get(b"branch")
551 branch = extra.get(b"branch")
552 if branch in (b"default", b""):
552 if branch in (b"default", b""):
553 del extra[b"branch"]
553 del extra[b"branch"]
554 elif branch in (b".", b"null", b"tip"):
554 elif branch in (b".", b"null", b"tip"):
555 raise error.StorageError(
555 raise error.StorageError(
556 _(b'the name \'%s\' is reserved') % branch
556 _(b'the name \'%s\' is reserved') % branch
557 )
557 )
558 sortedfiles = sorted(files.touched)
558 sortedfiles = sorted(files.touched)
559 sidedata = None
559 sidedata = None
560 if self._copiesstorage == b'changeset-sidedata':
560 if self._copiesstorage == b'changeset-sidedata':
561 sidedata = {}
561 sidedata = metadata.encode_copies_sidedata(files)
562 p1copies = files.copied_from_p1
563 if p1copies:
564 p1copies = metadata.encodecopies(sortedfiles, p1copies)
565 sidedata[sidedatamod.SD_P1COPIES] = p1copies
566 p2copies = files.copied_from_p2
567 if p2copies:
568 p2copies = metadata.encodecopies(sortedfiles, p2copies)
569 sidedata[sidedatamod.SD_P2COPIES] = p2copies
570 filesadded = files.added
571 if filesadded:
572 filesadded = metadata.encodefileindices(sortedfiles, filesadded)
573 sidedata[sidedatamod.SD_FILESADDED] = filesadded
574 filesremoved = files.removed
575 if filesremoved:
576 filesremoved = metadata.encodefileindices(
577 sortedfiles, filesremoved
578 )
579 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
580 if not sidedata:
581 sidedata = None
582
562
583 if extra:
563 if extra:
584 extra = encodeextra(extra)
564 extra = encodeextra(extra)
585 parseddate = b"%s %s" % (parseddate, extra)
565 parseddate = b"%s %s" % (parseddate, extra)
586 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
566 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
587 text = b"\n".join(l)
567 text = b"\n".join(l)
588 return self.addrevision(
568 return self.addrevision(
589 text, transaction, len(self), p1, p2, sidedata=sidedata
569 text, transaction, len(self), p1, p2, sidedata=sidedata
590 )
570 )
591
571
592 def branchinfo(self, rev):
572 def branchinfo(self, rev):
593 """return the branch name and open/close state of a revision
573 """return the branch name and open/close state of a revision
594
574
595 This function exists because creating a changectx object
575 This function exists because creating a changectx object
596 just to access this is costly."""
576 just to access this is costly."""
597 extra = self.read(rev)[5]
577 extra = self.read(rev)[5]
598 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
578 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
599
579
600 def _nodeduplicatecallback(self, transaction, node):
580 def _nodeduplicatecallback(self, transaction, node):
601 # keep track of revisions that got "re-added", eg: unbunde of know rev.
581 # keep track of revisions that got "re-added", eg: unbunde of know rev.
602 #
582 #
603 # We track them in a list to preserve their order from the source bundle
583 # We track them in a list to preserve their order from the source bundle
604 duplicates = transaction.changes.setdefault(b'revduplicates', [])
584 duplicates = transaction.changes.setdefault(b'revduplicates', [])
605 duplicates.append(self.rev(node))
585 duplicates.append(self.rev(node))
@@ -1,400 +1,424 b''
1 # metadata.py -- code related to various metadata computation and access.
1 # metadata.py -- code related to various metadata computation and access.
2 #
2 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
3 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import multiprocessing
10 import multiprocessing
11
11
12 from . import (
12 from . import (
13 error,
13 error,
14 node,
14 node,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from .revlogutils import (
19 from .revlogutils import (
20 flagutil as sidedataflag,
20 flagutil as sidedataflag,
21 sidedata as sidedatamod,
21 sidedata as sidedatamod,
22 )
22 )
23
23
24
24
25 class ChangingFiles(object):
25 class ChangingFiles(object):
26 """A class recording the changes made to a file by a revision
26 """A class recording the changes made to a file by a revision
27 """
27 """
28
28
29 def __init__(
29 def __init__(
30 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
30 self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
31 ):
31 ):
32 self._added = set(added)
32 self._added = set(added)
33 self._removed = set(removed)
33 self._removed = set(removed)
34 self._touched = set(touched)
34 self._touched = set(touched)
35 self._touched.update(self._added)
35 self._touched.update(self._added)
36 self._touched.update(self._removed)
36 self._touched.update(self._removed)
37 self._p1_copies = dict(p1_copies)
37 self._p1_copies = dict(p1_copies)
38 self._p2_copies = dict(p2_copies)
38 self._p2_copies = dict(p2_copies)
39
39
40 @property
40 @property
41 def added(self):
41 def added(self):
42 return frozenset(self._added)
42 return frozenset(self._added)
43
43
44 def mark_added(self, filename):
44 def mark_added(self, filename):
45 self._added.add(filename)
45 self._added.add(filename)
46 self._touched.add(filename)
46 self._touched.add(filename)
47
47
48 def update_added(self, filenames):
48 def update_added(self, filenames):
49 for f in filenames:
49 for f in filenames:
50 self.mark_added(f)
50 self.mark_added(f)
51
51
52 @property
52 @property
53 def removed(self):
53 def removed(self):
54 return frozenset(self._removed)
54 return frozenset(self._removed)
55
55
56 def mark_removed(self, filename):
56 def mark_removed(self, filename):
57 self._removed.add(filename)
57 self._removed.add(filename)
58 self._touched.add(filename)
58 self._touched.add(filename)
59
59
60 def update_removed(self, filenames):
60 def update_removed(self, filenames):
61 for f in filenames:
61 for f in filenames:
62 self.mark_removed(f)
62 self.mark_removed(f)
63
63
64 @property
64 @property
65 def touched(self):
65 def touched(self):
66 return frozenset(self._touched)
66 return frozenset(self._touched)
67
67
68 def mark_touched(self, filename):
68 def mark_touched(self, filename):
69 self._touched.add(filename)
69 self._touched.add(filename)
70
70
71 def update_touched(self, filenames):
71 def update_touched(self, filenames):
72 for f in filenames:
72 for f in filenames:
73 self.mark_touched(f)
73 self.mark_touched(f)
74
74
75 @property
75 @property
76 def copied_from_p1(self):
76 def copied_from_p1(self):
77 return self._p1_copies.copy()
77 return self._p1_copies.copy()
78
78
79 def mark_copied_from_p1(self, source, dest):
79 def mark_copied_from_p1(self, source, dest):
80 self._p1_copies[dest] = source
80 self._p1_copies[dest] = source
81
81
82 def update_copies_from_p1(self, copies):
82 def update_copies_from_p1(self, copies):
83 for dest, source in copies.items():
83 for dest, source in copies.items():
84 self.mark_copied_from_p1(source, dest)
84 self.mark_copied_from_p1(source, dest)
85
85
86 @property
86 @property
87 def copied_from_p2(self):
87 def copied_from_p2(self):
88 return self._p2_copies.copy()
88 return self._p2_copies.copy()
89
89
90 def mark_copied_from_p2(self, source, dest):
90 def mark_copied_from_p2(self, source, dest):
91 self._p2_copies[dest] = source
91 self._p2_copies[dest] = source
92
92
93 def update_copies_from_p2(self, copies):
93 def update_copies_from_p2(self, copies):
94 for dest, source in copies.items():
94 for dest, source in copies.items():
95 self.mark_copied_from_p2(source, dest)
95 self.mark_copied_from_p2(source, dest)
96
96
97
97
98 def computechangesetfilesadded(ctx):
98 def computechangesetfilesadded(ctx):
99 """return the list of files added in a changeset
99 """return the list of files added in a changeset
100 """
100 """
101 added = []
101 added = []
102 for f in ctx.files():
102 for f in ctx.files():
103 if not any(f in p for p in ctx.parents()):
103 if not any(f in p for p in ctx.parents()):
104 added.append(f)
104 added.append(f)
105 return added
105 return added
106
106
107
107
108 def get_removal_filter(ctx, x=None):
108 def get_removal_filter(ctx, x=None):
109 """return a function to detect files "wrongly" detected as `removed`
109 """return a function to detect files "wrongly" detected as `removed`
110
110
111 When a file is removed relative to p1 in a merge, this
111 When a file is removed relative to p1 in a merge, this
112 function determines whether the absence is due to a
112 function determines whether the absence is due to a
113 deletion from a parent, or whether the merge commit
113 deletion from a parent, or whether the merge commit
114 itself deletes the file. We decide this by doing a
114 itself deletes the file. We decide this by doing a
115 simplified three way merge of the manifest entry for
115 simplified three way merge of the manifest entry for
116 the file. There are two ways we decide the merge
116 the file. There are two ways we decide the merge
117 itself didn't delete a file:
117 itself didn't delete a file:
118 - neither parent (nor the merge) contain the file
118 - neither parent (nor the merge) contain the file
119 - exactly one parent contains the file, and that
119 - exactly one parent contains the file, and that
120 parent has the same filelog entry as the merge
120 parent has the same filelog entry as the merge
121 ancestor (or all of them if there two). In other
121 ancestor (or all of them if there two). In other
122 words, that parent left the file unchanged while the
122 words, that parent left the file unchanged while the
123 other one deleted it.
123 other one deleted it.
124 One way to think about this is that deleting a file is
124 One way to think about this is that deleting a file is
125 similar to emptying it, so the list of changed files
125 similar to emptying it, so the list of changed files
126 should be similar either way. The computation
126 should be similar either way. The computation
127 described above is not done directly in _filecommit
127 described above is not done directly in _filecommit
128 when creating the list of changed files, however
128 when creating the list of changed files, however
129 it does something very similar by comparing filelog
129 it does something very similar by comparing filelog
130 nodes.
130 nodes.
131 """
131 """
132
132
133 if x is not None:
133 if x is not None:
134 p1, p2, m1, m2 = x
134 p1, p2, m1, m2 = x
135 else:
135 else:
136 p1 = ctx.p1()
136 p1 = ctx.p1()
137 p2 = ctx.p2()
137 p2 = ctx.p2()
138 m1 = p1.manifest()
138 m1 = p1.manifest()
139 m2 = p2.manifest()
139 m2 = p2.manifest()
140
140
141 @util.cachefunc
141 @util.cachefunc
142 def mas():
142 def mas():
143 p1n = p1.node()
143 p1n = p1.node()
144 p2n = p2.node()
144 p2n = p2.node()
145 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
145 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
146 if not cahs:
146 if not cahs:
147 cahs = [node.nullrev]
147 cahs = [node.nullrev]
148 return [ctx.repo()[r].manifest() for r in cahs]
148 return [ctx.repo()[r].manifest() for r in cahs]
149
149
150 def deletionfromparent(f):
150 def deletionfromparent(f):
151 if f in m1:
151 if f in m1:
152 return f not in m2 and all(
152 return f not in m2 and all(
153 f in ma and ma.find(f) == m1.find(f) for ma in mas()
153 f in ma and ma.find(f) == m1.find(f) for ma in mas()
154 )
154 )
155 elif f in m2:
155 elif f in m2:
156 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
156 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
157 else:
157 else:
158 return True
158 return True
159
159
160 return deletionfromparent
160 return deletionfromparent
161
161
162
162
163 def computechangesetfilesremoved(ctx):
163 def computechangesetfilesremoved(ctx):
164 """return the list of files removed in a changeset
164 """return the list of files removed in a changeset
165 """
165 """
166 removed = []
166 removed = []
167 for f in ctx.files():
167 for f in ctx.files():
168 if f not in ctx:
168 if f not in ctx:
169 removed.append(f)
169 removed.append(f)
170 if removed:
170 if removed:
171 rf = get_removal_filter(ctx)
171 rf = get_removal_filter(ctx)
172 removed = [r for r in removed if not rf(r)]
172 removed = [r for r in removed if not rf(r)]
173 return removed
173 return removed
174
174
175
175
176 def computechangesetcopies(ctx):
176 def computechangesetcopies(ctx):
177 """return the copies data for a changeset
177 """return the copies data for a changeset
178
178
179 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
179 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
180
180
181 Each dictionnary are in the form: `{newname: oldname}`
181 Each dictionnary are in the form: `{newname: oldname}`
182 """
182 """
183 p1copies = {}
183 p1copies = {}
184 p2copies = {}
184 p2copies = {}
185 p1 = ctx.p1()
185 p1 = ctx.p1()
186 p2 = ctx.p2()
186 p2 = ctx.p2()
187 narrowmatch = ctx._repo.narrowmatch()
187 narrowmatch = ctx._repo.narrowmatch()
188 for dst in ctx.files():
188 for dst in ctx.files():
189 if not narrowmatch(dst) or dst not in ctx:
189 if not narrowmatch(dst) or dst not in ctx:
190 continue
190 continue
191 copied = ctx[dst].renamed()
191 copied = ctx[dst].renamed()
192 if not copied:
192 if not copied:
193 continue
193 continue
194 src, srcnode = copied
194 src, srcnode = copied
195 if src in p1 and p1[src].filenode() == srcnode:
195 if src in p1 and p1[src].filenode() == srcnode:
196 p1copies[dst] = src
196 p1copies[dst] = src
197 elif src in p2 and p2[src].filenode() == srcnode:
197 elif src in p2 and p2[src].filenode() == srcnode:
198 p2copies[dst] = src
198 p2copies[dst] = src
199 return p1copies, p2copies
199 return p1copies, p2copies
200
200
201
201
202 def encodecopies(files, copies):
202 def encodecopies(files, copies):
203 items = []
203 items = []
204 for i, dst in enumerate(files):
204 for i, dst in enumerate(files):
205 if dst in copies:
205 if dst in copies:
206 items.append(b'%d\0%s' % (i, copies[dst]))
206 items.append(b'%d\0%s' % (i, copies[dst]))
207 if len(items) != len(copies):
207 if len(items) != len(copies):
208 raise error.ProgrammingError(
208 raise error.ProgrammingError(
209 b'some copy targets missing from file list'
209 b'some copy targets missing from file list'
210 )
210 )
211 return b"\n".join(items)
211 return b"\n".join(items)
212
212
213
213
214 def decodecopies(files, data):
214 def decodecopies(files, data):
215 try:
215 try:
216 copies = {}
216 copies = {}
217 if not data:
217 if not data:
218 return copies
218 return copies
219 for l in data.split(b'\n'):
219 for l in data.split(b'\n'):
220 strindex, src = l.split(b'\0')
220 strindex, src = l.split(b'\0')
221 i = int(strindex)
221 i = int(strindex)
222 dst = files[i]
222 dst = files[i]
223 copies[dst] = src
223 copies[dst] = src
224 return copies
224 return copies
225 except (ValueError, IndexError):
225 except (ValueError, IndexError):
226 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
226 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
227 # used different syntax for the value.
227 # used different syntax for the value.
228 return None
228 return None
229
229
230
230
231 def encodefileindices(files, subset):
231 def encodefileindices(files, subset):
232 subset = set(subset)
232 subset = set(subset)
233 indices = []
233 indices = []
234 for i, f in enumerate(files):
234 for i, f in enumerate(files):
235 if f in subset:
235 if f in subset:
236 indices.append(b'%d' % i)
236 indices.append(b'%d' % i)
237 return b'\n'.join(indices)
237 return b'\n'.join(indices)
238
238
239
239
240 def decodefileindices(files, data):
240 def decodefileindices(files, data):
241 try:
241 try:
242 subset = []
242 subset = []
243 if not data:
243 if not data:
244 return subset
244 return subset
245 for strindex in data.split(b'\n'):
245 for strindex in data.split(b'\n'):
246 i = int(strindex)
246 i = int(strindex)
247 if i < 0 or i >= len(files):
247 if i < 0 or i >= len(files):
248 return None
248 return None
249 subset.append(files[i])
249 subset.append(files[i])
250 return subset
250 return subset
251 except (ValueError, IndexError):
251 except (ValueError, IndexError):
252 # Perhaps someone had chosen the same key name (e.g. "added") and
252 # Perhaps someone had chosen the same key name (e.g. "added") and
253 # used different syntax for the value.
253 # used different syntax for the value.
254 return None
254 return None
255
255
256
256
257 def encode_copies_sidedata(files):
258 sortedfiles = sorted(files.touched)
259 sidedata = {}
260 p1copies = files.copied_from_p1
261 if p1copies:
262 p1copies = encodecopies(sortedfiles, p1copies)
263 sidedata[sidedatamod.SD_P1COPIES] = p1copies
264 p2copies = files.copied_from_p2
265 if p2copies:
266 p2copies = encodecopies(sortedfiles, p2copies)
267 sidedata[sidedatamod.SD_P2COPIES] = p2copies
268 filesadded = files.added
269 if filesadded:
270 filesadded = encodefileindices(sortedfiles, filesadded)
271 sidedata[sidedatamod.SD_FILESADDED] = filesadded
272 filesremoved = files.removed
273 if filesremoved:
274 filesremoved = encodefileindices(sortedfiles, filesremoved)
275 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
276 if not sidedata:
277 sidedata = None
278 return sidedata
279
280
257 def _getsidedata(srcrepo, rev):
281 def _getsidedata(srcrepo, rev):
258 ctx = srcrepo[rev]
282 ctx = srcrepo[rev]
259 filescopies = computechangesetcopies(ctx)
283 filescopies = computechangesetcopies(ctx)
260 filesadded = computechangesetfilesadded(ctx)
284 filesadded = computechangesetfilesadded(ctx)
261 filesremoved = computechangesetfilesremoved(ctx)
285 filesremoved = computechangesetfilesremoved(ctx)
262 sidedata = {}
286 sidedata = {}
263 if any([filescopies, filesadded, filesremoved]):
287 if any([filescopies, filesadded, filesremoved]):
264 sortedfiles = sorted(ctx.files())
288 sortedfiles = sorted(ctx.files())
265 p1copies, p2copies = filescopies
289 p1copies, p2copies = filescopies
266 p1copies = encodecopies(sortedfiles, p1copies)
290 p1copies = encodecopies(sortedfiles, p1copies)
267 p2copies = encodecopies(sortedfiles, p2copies)
291 p2copies = encodecopies(sortedfiles, p2copies)
268 filesadded = encodefileindices(sortedfiles, filesadded)
292 filesadded = encodefileindices(sortedfiles, filesadded)
269 filesremoved = encodefileindices(sortedfiles, filesremoved)
293 filesremoved = encodefileindices(sortedfiles, filesremoved)
270 if p1copies:
294 if p1copies:
271 sidedata[sidedatamod.SD_P1COPIES] = p1copies
295 sidedata[sidedatamod.SD_P1COPIES] = p1copies
272 if p2copies:
296 if p2copies:
273 sidedata[sidedatamod.SD_P2COPIES] = p2copies
297 sidedata[sidedatamod.SD_P2COPIES] = p2copies
274 if filesadded:
298 if filesadded:
275 sidedata[sidedatamod.SD_FILESADDED] = filesadded
299 sidedata[sidedatamod.SD_FILESADDED] = filesadded
276 if filesremoved:
300 if filesremoved:
277 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
301 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
278 return sidedata
302 return sidedata
279
303
280
304
281 def getsidedataadder(srcrepo, destrepo):
305 def getsidedataadder(srcrepo, destrepo):
282 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
306 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
283 if pycompat.iswindows or not use_w:
307 if pycompat.iswindows or not use_w:
284 return _get_simple_sidedata_adder(srcrepo, destrepo)
308 return _get_simple_sidedata_adder(srcrepo, destrepo)
285 else:
309 else:
286 return _get_worker_sidedata_adder(srcrepo, destrepo)
310 return _get_worker_sidedata_adder(srcrepo, destrepo)
287
311
288
312
289 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
313 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
290 """The function used by worker precomputing sidedata
314 """The function used by worker precomputing sidedata
291
315
292 It read an input queue containing revision numbers
316 It read an input queue containing revision numbers
293 It write in an output queue containing (rev, <sidedata-map>)
317 It write in an output queue containing (rev, <sidedata-map>)
294
318
295 The `None` input value is used as a stop signal.
319 The `None` input value is used as a stop signal.
296
320
297 The `tokens` semaphore is user to avoid having too many unprocessed
321 The `tokens` semaphore is user to avoid having too many unprocessed
298 entries. The workers needs to acquire one token before fetching a task.
322 entries. The workers needs to acquire one token before fetching a task.
299 They will be released by the consumer of the produced data.
323 They will be released by the consumer of the produced data.
300 """
324 """
301 tokens.acquire()
325 tokens.acquire()
302 rev = revs_queue.get()
326 rev = revs_queue.get()
303 while rev is not None:
327 while rev is not None:
304 data = _getsidedata(srcrepo, rev)
328 data = _getsidedata(srcrepo, rev)
305 sidedata_queue.put((rev, data))
329 sidedata_queue.put((rev, data))
306 tokens.acquire()
330 tokens.acquire()
307 rev = revs_queue.get()
331 rev = revs_queue.get()
308 # processing of `None` is completed, release the token.
332 # processing of `None` is completed, release the token.
309 tokens.release()
333 tokens.release()
310
334
311
335
312 BUFF_PER_WORKER = 50
336 BUFF_PER_WORKER = 50
313
337
314
338
315 def _get_worker_sidedata_adder(srcrepo, destrepo):
339 def _get_worker_sidedata_adder(srcrepo, destrepo):
316 """The parallel version of the sidedata computation
340 """The parallel version of the sidedata computation
317
341
318 This code spawn a pool of worker that precompute a buffer of sidedata
342 This code spawn a pool of worker that precompute a buffer of sidedata
319 before we actually need them"""
343 before we actually need them"""
320 # avoid circular import copies -> scmutil -> worker -> copies
344 # avoid circular import copies -> scmutil -> worker -> copies
321 from . import worker
345 from . import worker
322
346
323 nbworkers = worker._numworkers(srcrepo.ui)
347 nbworkers = worker._numworkers(srcrepo.ui)
324
348
325 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
349 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
326 revsq = multiprocessing.Queue()
350 revsq = multiprocessing.Queue()
327 sidedataq = multiprocessing.Queue()
351 sidedataq = multiprocessing.Queue()
328
352
329 assert srcrepo.filtername is None
353 assert srcrepo.filtername is None
330 # queue all tasks beforehand, revision numbers are small and it make
354 # queue all tasks beforehand, revision numbers are small and it make
331 # synchronisation simpler
355 # synchronisation simpler
332 #
356 #
333 # Since the computation for each node can be quite expensive, the overhead
357 # Since the computation for each node can be quite expensive, the overhead
334 # of using a single queue is not revelant. In practice, most computation
358 # of using a single queue is not revelant. In practice, most computation
335 # are fast but some are very expensive and dominate all the other smaller
359 # are fast but some are very expensive and dominate all the other smaller
336 # cost.
360 # cost.
337 for r in srcrepo.changelog.revs():
361 for r in srcrepo.changelog.revs():
338 revsq.put(r)
362 revsq.put(r)
339 # queue the "no more tasks" markers
363 # queue the "no more tasks" markers
340 for i in range(nbworkers):
364 for i in range(nbworkers):
341 revsq.put(None)
365 revsq.put(None)
342
366
343 allworkers = []
367 allworkers = []
344 for i in range(nbworkers):
368 for i in range(nbworkers):
345 args = (srcrepo, revsq, sidedataq, tokens)
369 args = (srcrepo, revsq, sidedataq, tokens)
346 w = multiprocessing.Process(target=_sidedata_worker, args=args)
370 w = multiprocessing.Process(target=_sidedata_worker, args=args)
347 allworkers.append(w)
371 allworkers.append(w)
348 w.start()
372 w.start()
349
373
350 # dictionnary to store results for revision higher than we one we are
374 # dictionnary to store results for revision higher than we one we are
351 # looking for. For example, if we need the sidedatamap for 42, and 43 is
375 # looking for. For example, if we need the sidedatamap for 42, and 43 is
352 # received, when shelve 43 for later use.
376 # received, when shelve 43 for later use.
353 staging = {}
377 staging = {}
354
378
355 def sidedata_companion(revlog, rev):
379 def sidedata_companion(revlog, rev):
356 sidedata = {}
380 sidedata = {}
357 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
381 if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
358 # Is the data previously shelved ?
382 # Is the data previously shelved ?
359 sidedata = staging.pop(rev, None)
383 sidedata = staging.pop(rev, None)
360 if sidedata is None:
384 if sidedata is None:
361 # look at the queued result until we find the one we are lookig
385 # look at the queued result until we find the one we are lookig
362 # for (shelve the other ones)
386 # for (shelve the other ones)
363 r, sidedata = sidedataq.get()
387 r, sidedata = sidedataq.get()
364 while r != rev:
388 while r != rev:
365 staging[r] = sidedata
389 staging[r] = sidedata
366 r, sidedata = sidedataq.get()
390 r, sidedata = sidedataq.get()
367 tokens.release()
391 tokens.release()
368 return False, (), sidedata
392 return False, (), sidedata
369
393
370 return sidedata_companion
394 return sidedata_companion
371
395
372
396
373 def _get_simple_sidedata_adder(srcrepo, destrepo):
397 def _get_simple_sidedata_adder(srcrepo, destrepo):
374 """The simple version of the sidedata computation
398 """The simple version of the sidedata computation
375
399
376 It just compute it in the same thread on request"""
400 It just compute it in the same thread on request"""
377
401
378 def sidedatacompanion(revlog, rev):
402 def sidedatacompanion(revlog, rev):
379 sidedata = {}
403 sidedata = {}
380 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
404 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
381 sidedata = _getsidedata(srcrepo, rev)
405 sidedata = _getsidedata(srcrepo, rev)
382 return False, (), sidedata
406 return False, (), sidedata
383
407
384 return sidedatacompanion
408 return sidedatacompanion
385
409
386
410
387 def getsidedataremover(srcrepo, destrepo):
411 def getsidedataremover(srcrepo, destrepo):
388 def sidedatacompanion(revlog, rev):
412 def sidedatacompanion(revlog, rev):
389 f = ()
413 f = ()
390 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
414 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
391 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
415 if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
392 f = (
416 f = (
393 sidedatamod.SD_P1COPIES,
417 sidedatamod.SD_P1COPIES,
394 sidedatamod.SD_P2COPIES,
418 sidedatamod.SD_P2COPIES,
395 sidedatamod.SD_FILESADDED,
419 sidedatamod.SD_FILESADDED,
396 sidedatamod.SD_FILESREMOVED,
420 sidedatamod.SD_FILESREMOVED,
397 )
421 )
398 return False, f, {}
422 return False, f, {}
399
423
400 return sidedatacompanion
424 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now