##// END OF EJS Templates
revlog: flag processor...
Remi Chaintron -
r30745:c1b7b228 default
parent child Browse files
Show More
@@ -0,0 +1,137 b''
1 # coding=UTF-8
2
3 from __future__ import absolute_import
4
5 import base64
6 import zlib
7
8 from mercurial import (
9 changegroup,
10 extensions,
11 filelog,
12 revlog,
13 util,
14 )
15
16 # Test only: These flags are defined here only in the context of testing the
17 # behavior of the flag processor. The canonical way to add flags is to get in
18 # touch with the community and make them known in revlog.
19 REVIDX_NOOP = (1 << 3)
20 REVIDX_BASE64 = (1 << 2)
21 REVIDX_GZIP = (1 << 1)
22 REVIDX_FAIL = 1
23
24 def validatehash(self, text):
25 return True
26
27 def bypass(self, text):
28 return False
29
30 def noopdonothing(self, text):
31 return (text, True)
32
33 def b64encode(self, text):
34 return (base64.b64encode(text), False)
35
36 def b64decode(self, text):
37 return (base64.b64decode(text), True)
38
39 def gzipcompress(self, text):
40 return (zlib.compress(text), False)
41
42 def gzipdecompress(self, text):
43 return (zlib.decompress(text), True)
44
45 def supportedoutgoingversions(orig, repo):
46 versions = orig(repo)
47 versions.discard('01')
48 versions.discard('02')
49 versions.add('03')
50 return versions
51
52 def allsupportedversions(orig, ui):
53 versions = orig(ui)
54 versions.add('03')
55 return versions
56
57 def noopaddrevision(orig, self, text, transaction, link, p1, p2,
58 cachedelta=None, node=None,
59 flags=revlog.REVIDX_DEFAULT_FLAGS):
60 if '[NOOP]' in text:
61 flags |= REVIDX_NOOP
62 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
63 node=node, flags=flags)
64
65 def b64addrevision(orig, self, text, transaction, link, p1, p2,
66 cachedelta=None, node=None,
67 flags=revlog.REVIDX_DEFAULT_FLAGS):
68 if '[BASE64]' in text:
69 flags |= REVIDX_BASE64
70 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
71 node=node, flags=flags)
72
73 def gzipaddrevision(orig, self, text, transaction, link, p1, p2,
74 cachedelta=None, node=None,
75 flags=revlog.REVIDX_DEFAULT_FLAGS):
76 if '[GZIP]' in text:
77 flags |= REVIDX_GZIP
78 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
79 node=node, flags=flags)
80
81 def failaddrevision(orig, self, text, transaction, link, p1, p2,
82 cachedelta=None, node=None,
83 flags=revlog.REVIDX_DEFAULT_FLAGS):
84 # This addrevision wrapper is meant to add a flag we will not have
85 # transforms registered for, ensuring we handle this error case.
86 if '[FAIL]' in text:
87 flags |= REVIDX_FAIL
88 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
89 node=node, flags=flags)
90
91 def extsetup(ui):
92 # Enable changegroup3 for flags to be sent over the wire
93 wrapfunction = extensions.wrapfunction
94 wrapfunction(changegroup,
95 'supportedoutgoingversions',
96 supportedoutgoingversions)
97 wrapfunction(changegroup,
98 'allsupportedversions',
99 allsupportedversions)
100
101 # Teach revlog about our test flags
102 flags = [REVIDX_NOOP, REVIDX_BASE64, REVIDX_GZIP, REVIDX_FAIL]
103 revlog.REVIDX_KNOWN_FLAGS |= util.bitsfrom(flags)
104 revlog.REVIDX_FLAGS_ORDER.extend(flags)
105
106 # Add wrappers for addrevision, responsible to set flags depending on the
107 # revision data contents.
108 wrapfunction(filelog.filelog, 'addrevision', noopaddrevision)
109 wrapfunction(filelog.filelog, 'addrevision', b64addrevision)
110 wrapfunction(filelog.filelog, 'addrevision', gzipaddrevision)
111 wrapfunction(filelog.filelog, 'addrevision', failaddrevision)
112
113 # Register flag processors for each extension
114 revlog.addflagprocessor(
115 REVIDX_NOOP,
116 (
117 noopdonothing,
118 noopdonothing,
119 validatehash,
120 )
121 )
122 revlog.addflagprocessor(
123 REVIDX_BASE64,
124 (
125 b64decode,
126 b64encode,
127 bypass,
128 ),
129 )
130 revlog.addflagprocessor(
131 REVIDX_GZIP,
132 (
133 gzipdecompress,
134 gzipcompress,
135 bypass
136 )
137 )
@@ -0,0 +1,165 b''
1 # Create server
2 $ hg init server
3 $ cd server
4 $ cat >> .hg/hgrc << EOF
5 > [extensions]
6 > extension=$TESTDIR/flagprocessorext.py
7 > EOF
8 $ cd ../
9
10 # Clone server and enable extensions
11 $ hg clone -q server client
12 $ cd client
13 $ cat >> .hg/hgrc << EOF
14 > [extensions]
15 > extension=$TESTDIR/flagprocessorext.py
16 > EOF
17
18 # Commit file that will trigger the noop extension
19 $ echo '[NOOP]' > noop
20 $ hg commit -Aqm "noop"
21
22 # Commit file that will trigger the base64 extension
23 $ echo '[BASE64]' > base64
24 $ hg commit -Aqm 'base64'
25
26 # Commit file that will trigger the gzip extension
27 $ echo '[GZIP]' > gzip
28 $ hg commit -Aqm 'gzip'
29
30 # Commit file that will trigger noop and base64
31 $ echo '[NOOP][BASE64]' > noop-base64
32 $ hg commit -Aqm 'noop+base64'
33
34 # Commit file that will trigger noop and gzip
35 $ echo '[NOOP][GZIP]' > noop-gzip
36 $ hg commit -Aqm 'noop+gzip'
37
38 # Commit file that will trigger base64 and gzip
39 $ echo '[BASE64][GZIP]' > base64-gzip
40 $ hg commit -Aqm 'base64+gzip'
41
42 # Commit file that will trigger base64, gzip and noop
43 $ echo '[BASE64][GZIP][NOOP]' > base64-gzip-noop
44 $ hg commit -Aqm 'base64+gzip+noop'
45
46 # TEST: ensure the revision data is consistent
47 $ hg cat noop
48 [NOOP]
49 $ hg debugdata noop 0
50 [NOOP]
51
52 $ hg cat -r . base64
53 [BASE64]
54 $ hg debugdata base64 0
55 W0JBU0U2NF0K (no-eol)
56
57 $ hg cat -r . gzip
58 [GZIP]
59 $ hg debugdata gzip 0
60 x\x9c\x8bv\x8f\xf2\x0c\x88\xe5\x02\x00\x08\xc8\x01\xfd (no-eol) (esc)
61
62 $ hg cat -r . noop-base64
63 [NOOP][BASE64]
64 $ hg debugdata noop-base64 0
65 W05PT1BdW0JBU0U2NF0K (no-eol)
66
67 $ hg cat -r . noop-gzip
68 [NOOP][GZIP]
69 $ hg debugdata noop-gzip 0
70 x\x9c\x8b\xf6\xf3\xf7\x0f\x88\x8dv\x8f\xf2\x0c\x88\xe5\x02\x00\x1dH\x03\xf1 (no-eol) (esc)
71
72 $ hg cat -r . base64-gzip
73 [BASE64][GZIP]
74 $ hg debugdata base64-gzip 0
75 eJyLdnIMdjUziY12j/IMiOUCACLBBDo= (no-eol)
76
77 $ hg cat -r . base64-gzip-noop
78 [BASE64][GZIP][NOOP]
79 $ hg debugdata base64-gzip-noop 0
80 eJyLdnIMdjUziY12j/IMiI328/cPiOUCAESjBi4= (no-eol)
81
82 # Push to the server
83 $ hg push
84 pushing to $TESTTMP/server (glob)
85 searching for changes
86 adding changesets
87 adding manifests
88 adding file changes
89 added 7 changesets with 7 changes to 7 files
90
91 # Initialize new client (not cloning) and setup extension
92 $ cd ..
93 $ hg init client2
94 $ cd client2
95 $ cat >> .hg/hgrc << EOF
96 > [paths]
97 > default = $TESTTMP/server
98 > [extensions]
99 > extension=$TESTDIR/flagprocessorext.py
100 > EOF
101
102 # Pull from server and update to latest revision
103 $ hg pull default
104 pulling from $TESTTMP/server (glob)
105 requesting all changes
106 adding changesets
107 adding manifests
108 adding file changes
109 added 7 changesets with 7 changes to 7 files
110 (run 'hg update' to get a working copy)
111 $ hg update
112 7 files updated, 0 files merged, 0 files removed, 0 files unresolved
113
114 # TEST: ensure the revision data is consistent
115 $ hg cat noop
116 [NOOP]
117 $ hg debugdata noop 0
118 [NOOP]
119
120 $ hg cat -r . base64
121 [BASE64]
122 $ hg debugdata base64 0
123 W0JBU0U2NF0K (no-eol)
124
125 $ hg cat -r . gzip
126 [GZIP]
127 $ hg debugdata gzip 0
128 x\x9c\x8bv\x8f\xf2\x0c\x88\xe5\x02\x00\x08\xc8\x01\xfd (no-eol) (esc)
129
130 $ hg cat -r . noop-base64
131 [NOOP][BASE64]
132 $ hg debugdata noop-base64 0
133 W05PT1BdW0JBU0U2NF0K (no-eol)
134
135 $ hg cat -r . noop-gzip
136 [NOOP][GZIP]
137 $ hg debugdata noop-gzip 0
138 x\x9c\x8b\xf6\xf3\xf7\x0f\x88\x8dv\x8f\xf2\x0c\x88\xe5\x02\x00\x1dH\x03\xf1 (no-eol) (esc)
139
140 $ hg cat -r . base64-gzip
141 [BASE64][GZIP]
142 $ hg debugdata base64-gzip 0
143 eJyLdnIMdjUziY12j/IMiOUCACLBBDo= (no-eol)
144
145 $ hg cat -r . base64-gzip-noop
146 [BASE64][GZIP][NOOP]
147 $ hg debugdata base64-gzip-noop 0
148 eJyLdnIMdjUziY12j/IMiI328/cPiOUCAESjBi4= (no-eol)
149
150 # TEST: ensure a missing processor is handled
151 $ echo '[FAIL][BASE64][GZIP][NOOP]' > fail-base64-gzip-noop
152 $ hg commit -Aqm 'fail+base64+gzip+noop'
153 abort: missing processor for flag '0x1'!
154 [255]
155
156 # TEST: ensure we cannot register several flag processors on the same flag
157 $ cat >> .hg/hgrc << EOF
158 > [extensions]
159 > extension=$TESTDIR/flagprocessorext.py
160 > duplicate=$TESTDIR/flagprocessorext.py
161 > EOF
162 $ echo 'this should fail' > file
163 $ hg commit -Aqm 'add file'
164 abort: cannot register multiple processors on flag '0x8'.
165 [255]
@@ -1,554 +1,557 b''
1 1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 2 #
3 3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Repository class for viewing uncompressed bundles.
9 9
10 10 This provides a read-only repository interface to bundles as if they
11 11 were part of the actual repository.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import os
17 17 import shutil
18 18 import tempfile
19 19
20 20 from .i18n import _
21 21 from .node import nullid
22 22
23 23 from . import (
24 24 bundle2,
25 25 changegroup,
26 26 changelog,
27 27 cmdutil,
28 28 discovery,
29 29 error,
30 30 exchange,
31 31 filelog,
32 32 localrepo,
33 33 manifest,
34 34 mdiff,
35 35 node as nodemod,
36 36 pathutil,
37 37 phases,
38 38 pycompat,
39 39 revlog,
40 40 scmutil,
41 41 util,
42 42 )
43 43
44 44 class bundlerevlog(revlog.revlog):
45 45 def __init__(self, opener, indexfile, bundle, linkmapper):
46 46 # How it works:
47 47 # To retrieve a revision, we need to know the offset of the revision in
48 48 # the bundle (an unbundle object). We store this offset in the index
49 49 # (start). The base of the delta is stored in the base field.
50 50 #
51 51 # To differentiate a rev in the bundle from a rev in the revlog, we
52 52 # check revision against repotiprev.
53 53 opener = scmutil.readonlyvfs(opener)
54 54 revlog.revlog.__init__(self, opener, indexfile)
55 55 self.bundle = bundle
56 56 n = len(self)
57 57 self.repotiprev = n - 1
58 58 chain = None
59 59 self.bundlerevs = set() # used by 'bundle()' revset expression
60 60 getchunk = lambda: bundle.deltachunk(chain)
61 61 for chunkdata in iter(getchunk, {}):
62 62 node = chunkdata['node']
63 63 p1 = chunkdata['p1']
64 64 p2 = chunkdata['p2']
65 65 cs = chunkdata['cs']
66 66 deltabase = chunkdata['deltabase']
67 67 delta = chunkdata['delta']
68 68
69 69 size = len(delta)
70 70 start = bundle.tell() - size
71 71
72 72 link = linkmapper(cs)
73 73 if node in self.nodemap:
74 74 # this can happen if two branches make the same change
75 75 chain = node
76 76 self.bundlerevs.add(self.nodemap[node])
77 77 continue
78 78
79 79 for p in (p1, p2):
80 80 if p not in self.nodemap:
81 81 raise error.LookupError(p, self.indexfile,
82 82 _("unknown parent"))
83 83
84 84 if deltabase not in self.nodemap:
85 85 raise LookupError(deltabase, self.indexfile,
86 86 _('unknown delta base'))
87 87
88 88 baserev = self.rev(deltabase)
89 89 # start, size, full unc. size, base (unused), link, p1, p2, node
90 90 e = (revlog.offset_type(start, 0), size, -1, baserev, link,
91 91 self.rev(p1), self.rev(p2), node)
92 92 self.index.insert(-1, e)
93 93 self.nodemap[node] = n
94 94 self.bundlerevs.add(n)
95 95 chain = node
96 96 n += 1
97 97
98 98 def _chunk(self, rev):
99 99 # Warning: in case of bundle, the diff is against what we stored as
100 100 # delta base, not against rev - 1
101 101 # XXX: could use some caching
102 102 if rev <= self.repotiprev:
103 103 return revlog.revlog._chunk(self, rev)
104 104 self.bundle.seek(self.start(rev))
105 105 return self.bundle.read(self.length(rev))
106 106
107 107 def revdiff(self, rev1, rev2):
108 108 """return or calculate a delta between two revisions"""
109 109 if rev1 > self.repotiprev and rev2 > self.repotiprev:
110 110 # hot path for bundle
111 111 revb = self.index[rev2][3]
112 112 if revb == rev1:
113 113 return self._chunk(rev2)
114 114 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
115 115 return revlog.revlog.revdiff(self, rev1, rev2)
116 116
117 117 return mdiff.textdiff(self.revision(self.node(rev1)),
118 118 self.revision(self.node(rev2)))
119 119
120 120 def revision(self, nodeorrev, raw=False):
121 121 """return an uncompressed revision of a given node or revision
122 122 number.
123 123 """
124 124 if isinstance(nodeorrev, int):
125 125 rev = nodeorrev
126 126 node = self.node(rev)
127 127 else:
128 128 node = nodeorrev
129 129 rev = self.rev(node)
130 130
131 131 if node == nullid:
132 132 return ""
133 133
134 134 text = None
135 135 chain = []
136 136 iterrev = rev
137 137 # reconstruct the revision if it is from a changegroup
138 138 while iterrev > self.repotiprev:
139 139 if self._cache and self._cache[1] == iterrev:
140 140 text = self._cache[2]
141 141 break
142 142 chain.append(iterrev)
143 143 iterrev = self.index[iterrev][3]
144 144 if text is None:
145 145 text = self.baserevision(iterrev)
146 146
147 147 while chain:
148 148 delta = self._chunk(chain.pop())
149 149 text = mdiff.patches(text, [delta])
150 150
151 self.checkhash(text, node, rev=rev)
151 text, validatehash = self._processflags(text, self.flags(rev),
152 'read', raw=raw)
153 if validatehash:
154 self.checkhash(text, node, rev=rev)
152 155 self._cache = (node, rev, text)
153 156 return text
154 157
155 158 def baserevision(self, nodeorrev):
156 159 # Revlog subclasses may override 'revision' method to modify format of
157 160 # content retrieved from revlog. To use bundlerevlog with such class one
158 161 # needs to override 'baserevision' and make more specific call here.
159 162 return revlog.revlog.revision(self, nodeorrev)
160 163
161 164 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
162 165 raise NotImplementedError
163 166 def addgroup(self, revs, linkmapper, transaction):
164 167 raise NotImplementedError
165 168 def strip(self, rev, minlink):
166 169 raise NotImplementedError
167 170 def checksize(self):
168 171 raise NotImplementedError
169 172
170 173 class bundlechangelog(bundlerevlog, changelog.changelog):
171 174 def __init__(self, opener, bundle):
172 175 changelog.changelog.__init__(self, opener)
173 176 linkmapper = lambda x: x
174 177 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
175 178 linkmapper)
176 179
177 180 def baserevision(self, nodeorrev):
178 181 # Although changelog doesn't override 'revision' method, some extensions
179 182 # may replace this class with another that does. Same story with
180 183 # manifest and filelog classes.
181 184
182 185 # This bypasses filtering on changelog.node() and rev() because we need
183 186 # revision text of the bundle base even if it is hidden.
184 187 oldfilter = self.filteredrevs
185 188 try:
186 189 self.filteredrevs = ()
187 190 return changelog.changelog.revision(self, nodeorrev)
188 191 finally:
189 192 self.filteredrevs = oldfilter
190 193
191 194 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
192 195 def __init__(self, opener, bundle, linkmapper, dirlogstarts=None, dir=''):
193 196 manifest.manifestrevlog.__init__(self, opener, dir=dir)
194 197 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
195 198 linkmapper)
196 199 if dirlogstarts is None:
197 200 dirlogstarts = {}
198 201 if self.bundle.version == "03":
199 202 dirlogstarts = _getfilestarts(self.bundle)
200 203 self._dirlogstarts = dirlogstarts
201 204 self._linkmapper = linkmapper
202 205
203 206 def baserevision(self, nodeorrev):
204 207 node = nodeorrev
205 208 if isinstance(node, int):
206 209 node = self.node(node)
207 210
208 211 if node in self.fulltextcache:
209 212 result = self.fulltextcache[node].tostring()
210 213 else:
211 214 result = manifest.manifestrevlog.revision(self, nodeorrev)
212 215 return result
213 216
214 217 def dirlog(self, d):
215 218 if d in self._dirlogstarts:
216 219 self.bundle.seek(self._dirlogstarts[d])
217 220 return bundlemanifest(
218 221 self.opener, self.bundle, self._linkmapper,
219 222 self._dirlogstarts, dir=d)
220 223 return super(bundlemanifest, self).dirlog(d)
221 224
222 225 class bundlefilelog(bundlerevlog, filelog.filelog):
223 226 def __init__(self, opener, path, bundle, linkmapper):
224 227 filelog.filelog.__init__(self, opener, path)
225 228 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
226 229 linkmapper)
227 230
228 231 def baserevision(self, nodeorrev):
229 232 return filelog.filelog.revision(self, nodeorrev)
230 233
231 234 class bundlepeer(localrepo.localpeer):
232 235 def canpush(self):
233 236 return False
234 237
235 238 class bundlephasecache(phases.phasecache):
236 239 def __init__(self, *args, **kwargs):
237 240 super(bundlephasecache, self).__init__(*args, **kwargs)
238 241 if util.safehasattr(self, 'opener'):
239 242 self.opener = scmutil.readonlyvfs(self.opener)
240 243
241 244 def write(self):
242 245 raise NotImplementedError
243 246
244 247 def _write(self, fp):
245 248 raise NotImplementedError
246 249
247 250 def _updateroots(self, phase, newroots, tr):
248 251 self.phaseroots[phase] = newroots
249 252 self.invalidate()
250 253 self.dirty = True
251 254
252 255 def _getfilestarts(bundle):
253 256 bundlefilespos = {}
254 257 for chunkdata in iter(bundle.filelogheader, {}):
255 258 fname = chunkdata['filename']
256 259 bundlefilespos[fname] = bundle.tell()
257 260 for chunk in iter(lambda: bundle.deltachunk(None), {}):
258 261 pass
259 262 return bundlefilespos
260 263
261 264 class bundlerepository(localrepo.localrepository):
262 265 def __init__(self, ui, path, bundlename):
263 266 def _writetempbundle(read, suffix, header=''):
264 267 """Write a temporary file to disk
265 268
266 269 This is closure because we need to make sure this tracked by
267 270 self.tempfile for cleanup purposes."""
268 271 fdtemp, temp = self.vfs.mkstemp(prefix="hg-bundle-",
269 272 suffix=".hg10un")
270 273 self.tempfile = temp
271 274
272 275 with os.fdopen(fdtemp, 'wb') as fptemp:
273 276 fptemp.write(header)
274 277 while True:
275 278 chunk = read(2**18)
276 279 if not chunk:
277 280 break
278 281 fptemp.write(chunk)
279 282
280 283 return self.vfs.open(self.tempfile, mode="rb")
281 284 self._tempparent = None
282 285 try:
283 286 localrepo.localrepository.__init__(self, ui, path)
284 287 except error.RepoError:
285 288 self._tempparent = tempfile.mkdtemp()
286 289 localrepo.instance(ui, self._tempparent, 1)
287 290 localrepo.localrepository.__init__(self, ui, self._tempparent)
288 291 self.ui.setconfig('phases', 'publish', False, 'bundlerepo')
289 292
290 293 if path:
291 294 self._url = 'bundle:' + util.expandpath(path) + '+' + bundlename
292 295 else:
293 296 self._url = 'bundle:' + bundlename
294 297
295 298 self.tempfile = None
296 299 f = util.posixfile(bundlename, "rb")
297 300 self.bundlefile = self.bundle = exchange.readbundle(ui, f, bundlename)
298 301
299 302 if isinstance(self.bundle, bundle2.unbundle20):
300 303 cgstream = None
301 304 for part in self.bundle.iterparts():
302 305 if part.type == 'changegroup':
303 306 if cgstream is not None:
304 307 raise NotImplementedError("can't process "
305 308 "multiple changegroups")
306 309 cgstream = part
307 310 version = part.params.get('version', '01')
308 311 legalcgvers = changegroup.supportedincomingversions(self)
309 312 if version not in legalcgvers:
310 313 msg = _('Unsupported changegroup version: %s')
311 314 raise error.Abort(msg % version)
312 315 if self.bundle.compressed():
313 316 cgstream = _writetempbundle(part.read,
314 317 ".cg%sun" % version)
315 318
316 319 if cgstream is None:
317 320 raise error.Abort(_('No changegroups found'))
318 321 cgstream.seek(0)
319 322
320 323 self.bundle = changegroup.getunbundler(version, cgstream, 'UN')
321 324
322 325 elif self.bundle.compressed():
323 326 f = _writetempbundle(self.bundle.read, '.hg10un', header='HG10UN')
324 327 self.bundlefile = self.bundle = exchange.readbundle(ui, f,
325 328 bundlename,
326 329 self.vfs)
327 330
328 331 # dict with the mapping 'filename' -> position in the bundle
329 332 self.bundlefilespos = {}
330 333
331 334 self.firstnewrev = self.changelog.repotiprev + 1
332 335 phases.retractboundary(self, None, phases.draft,
333 336 [ctx.node() for ctx in self[self.firstnewrev:]])
334 337
335 338 @localrepo.unfilteredpropertycache
336 339 def _phasecache(self):
337 340 return bundlephasecache(self, self._phasedefaults)
338 341
339 342 @localrepo.unfilteredpropertycache
340 343 def changelog(self):
341 344 # consume the header if it exists
342 345 self.bundle.changelogheader()
343 346 c = bundlechangelog(self.svfs, self.bundle)
344 347 self.manstart = self.bundle.tell()
345 348 return c
346 349
347 350 def _constructmanifest(self):
348 351 self.bundle.seek(self.manstart)
349 352 # consume the header if it exists
350 353 self.bundle.manifestheader()
351 354 linkmapper = self.unfiltered().changelog.rev
352 355 m = bundlemanifest(self.svfs, self.bundle, linkmapper)
353 356 self.filestart = self.bundle.tell()
354 357 return m
355 358
356 359 @localrepo.unfilteredpropertycache
357 360 def manstart(self):
358 361 self.changelog
359 362 return self.manstart
360 363
361 364 @localrepo.unfilteredpropertycache
362 365 def filestart(self):
363 366 self.manifestlog
364 367 return self.filestart
365 368
366 369 def url(self):
367 370 return self._url
368 371
369 372 def file(self, f):
370 373 if not self.bundlefilespos:
371 374 self.bundle.seek(self.filestart)
372 375 self.bundlefilespos = _getfilestarts(self.bundle)
373 376
374 377 if f in self.bundlefilespos:
375 378 self.bundle.seek(self.bundlefilespos[f])
376 379 linkmapper = self.unfiltered().changelog.rev
377 380 return bundlefilelog(self.svfs, f, self.bundle, linkmapper)
378 381 else:
379 382 return filelog.filelog(self.svfs, f)
380 383
381 384 def close(self):
382 385 """Close assigned bundle file immediately."""
383 386 self.bundlefile.close()
384 387 if self.tempfile is not None:
385 388 self.vfs.unlink(self.tempfile)
386 389 if self._tempparent:
387 390 shutil.rmtree(self._tempparent, True)
388 391
389 392 def cancopy(self):
390 393 return False
391 394
392 395 def peer(self):
393 396 return bundlepeer(self)
394 397
395 398 def getcwd(self):
396 399 return pycompat.getcwd() # always outside the repo
397 400
398 401 # Check if parents exist in localrepo before setting
399 402 def setparents(self, p1, p2=nullid):
400 403 p1rev = self.changelog.rev(p1)
401 404 p2rev = self.changelog.rev(p2)
402 405 msg = _("setting parent to node %s that only exists in the bundle\n")
403 406 if self.changelog.repotiprev < p1rev:
404 407 self.ui.warn(msg % nodemod.hex(p1))
405 408 if self.changelog.repotiprev < p2rev:
406 409 self.ui.warn(msg % nodemod.hex(p2))
407 410 return super(bundlerepository, self).setparents(p1, p2)
408 411
409 412 def instance(ui, path, create):
410 413 if create:
411 414 raise error.Abort(_('cannot create new bundle repository'))
412 415 # internal config: bundle.mainreporoot
413 416 parentpath = ui.config("bundle", "mainreporoot", "")
414 417 if not parentpath:
415 418 # try to find the correct path to the working directory repo
416 419 parentpath = cmdutil.findrepo(pycompat.getcwd())
417 420 if parentpath is None:
418 421 parentpath = ''
419 422 if parentpath:
420 423 # Try to make the full path relative so we get a nice, short URL.
421 424 # In particular, we don't want temp dir names in test outputs.
422 425 cwd = pycompat.getcwd()
423 426 if parentpath == cwd:
424 427 parentpath = ''
425 428 else:
426 429 cwd = pathutil.normasprefix(cwd)
427 430 if parentpath.startswith(cwd):
428 431 parentpath = parentpath[len(cwd):]
429 432 u = util.url(path)
430 433 path = u.localpath()
431 434 if u.scheme == 'bundle':
432 435 s = path.split("+", 1)
433 436 if len(s) == 1:
434 437 repopath, bundlename = parentpath, s[0]
435 438 else:
436 439 repopath, bundlename = s
437 440 else:
438 441 repopath, bundlename = parentpath, path
439 442 return bundlerepository(ui, repopath, bundlename)
440 443
441 444 class bundletransactionmanager(object):
442 445 def transaction(self):
443 446 return None
444 447
445 448 def close(self):
446 449 raise NotImplementedError
447 450
448 451 def release(self):
449 452 raise NotImplementedError
450 453
451 454 def getremotechanges(ui, repo, other, onlyheads=None, bundlename=None,
452 455 force=False):
453 456 '''obtains a bundle of changes incoming from other
454 457
455 458 "onlyheads" restricts the returned changes to those reachable from the
456 459 specified heads.
457 460 "bundlename", if given, stores the bundle to this file path permanently;
458 461 otherwise it's stored to a temp file and gets deleted again when you call
459 462 the returned "cleanupfn".
460 463 "force" indicates whether to proceed on unrelated repos.
461 464
462 465 Returns a tuple (local, csets, cleanupfn):
463 466
464 467 "local" is a local repo from which to obtain the actual incoming
465 468 changesets; it is a bundlerepo for the obtained bundle when the
466 469 original "other" is remote.
467 470 "csets" lists the incoming changeset node ids.
468 471 "cleanupfn" must be called without arguments when you're done processing
469 472 the changes; it closes both the original "other" and the one returned
470 473 here.
471 474 '''
472 475 tmp = discovery.findcommonincoming(repo, other, heads=onlyheads,
473 476 force=force)
474 477 common, incoming, rheads = tmp
475 478 if not incoming:
476 479 try:
477 480 if bundlename:
478 481 os.unlink(bundlename)
479 482 except OSError:
480 483 pass
481 484 return repo, [], other.close
482 485
483 486 commonset = set(common)
484 487 rheads = [x for x in rheads if x not in commonset]
485 488
486 489 bundle = None
487 490 bundlerepo = None
488 491 localrepo = other.local()
489 492 if bundlename or not localrepo:
490 493 # create a bundle (uncompressed if other repo is not local)
491 494
492 495 # developer config: devel.legacy.exchange
493 496 legexc = ui.configlist('devel', 'legacy.exchange')
494 497 forcebundle1 = 'bundle2' not in legexc and 'bundle1' in legexc
495 498 canbundle2 = (not forcebundle1
496 499 and other.capable('getbundle')
497 500 and other.capable('bundle2'))
498 501 if canbundle2:
499 502 kwargs = {}
500 503 kwargs['common'] = common
501 504 kwargs['heads'] = rheads
502 505 kwargs['bundlecaps'] = exchange.caps20to10(repo)
503 506 kwargs['cg'] = True
504 507 b2 = other.getbundle('incoming', **kwargs)
505 508 fname = bundle = changegroup.writechunks(ui, b2._forwardchunks(),
506 509 bundlename)
507 510 else:
508 511 if other.capable('getbundle'):
509 512 cg = other.getbundle('incoming', common=common, heads=rheads)
510 513 elif onlyheads is None and not other.capable('changegroupsubset'):
511 514 # compat with older servers when pulling all remote heads
512 515 cg = other.changegroup(incoming, "incoming")
513 516 rheads = None
514 517 else:
515 518 cg = other.changegroupsubset(incoming, rheads, 'incoming')
516 519 if localrepo:
517 520 bundletype = "HG10BZ"
518 521 else:
519 522 bundletype = "HG10UN"
520 523 fname = bundle = bundle2.writebundle(ui, cg, bundlename,
521 524 bundletype)
522 525 # keep written bundle?
523 526 if bundlename:
524 527 bundle = None
525 528 if not localrepo:
526 529 # use the created uncompressed bundlerepo
527 530 localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root,
528 531 fname)
529 532 # this repo contains local and other now, so filter out local again
530 533 common = repo.heads()
531 534 if localrepo:
532 535 # Part of common may be remotely filtered
533 536 # So use an unfiltered version
534 537 # The discovery process probably need cleanup to avoid that
535 538 localrepo = localrepo.unfiltered()
536 539
537 540 csets = localrepo.changelog.findmissing(common, rheads)
538 541
539 542 if bundlerepo:
540 543 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev:]]
541 544 remotephases = other.listkeys('phases')
542 545
543 546 pullop = exchange.pulloperation(bundlerepo, other, heads=reponodes)
544 547 pullop.trmanager = bundletransactionmanager()
545 548 exchange._pullapplyphases(pullop, remotephases)
546 549
547 550 def cleanup():
548 551 if bundlerepo:
549 552 bundlerepo.close()
550 553 if bundle:
551 554 os.unlink(bundle)
552 555 other.close()
553 556
554 557 return (localrepo, csets, cleanup)
@@ -1,1833 +1,1948 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 # stable order in which flags need to be processed and their processors applied
59 REVIDX_FLAGS_ORDER = [
60 REVIDX_ISCENSORED,
61 ]
62 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
59 63
60 64 # max size of revlog with inline data
61 65 _maxinline = 131072
62 66 _chunksize = 1048576
63 67
64 68 RevlogError = error.RevlogError
65 69 LookupError = error.LookupError
66 70 CensoredNodeError = error.CensoredNodeError
71 ProgrammingError = error.ProgrammingError
72
73 # Store flag processors (cf. 'addflagprocessor()' to register)
74 _flagprocessors = {
75 REVIDX_ISCENSORED: None,
76 }
77
78 def addflagprocessor(flag, processor):
79 """Register a flag processor on a revision data flag.
80
81 Invariant:
82 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
83 - Only one flag processor can be registered on a specific flag.
84 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
85 following signatures:
86 - (read) f(self, text) -> newtext, bool
87 - (write) f(self, text) -> newtext, bool
88 - (raw) f(self, text) -> bool
89 The boolean returned by these transforms is used to determine whether
90 'newtext' can be used for hash integrity checking.
91
92 Note: The 'raw' transform is used for changegroup generation and in some
93 debug commands. In this case the transform only indicates whether the
94 contents can be used for hash integrity checks.
95 """
96 if not flag & REVIDX_KNOWN_FLAGS:
97 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
98 raise ProgrammingError(msg)
99 if flag not in REVIDX_FLAGS_ORDER:
100 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
101 raise ProgrammingError(msg)
102 if flag in _flagprocessors:
103 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
104 raise error.Abort(msg)
105 _flagprocessors[flag] = processor
67 106
68 107 def getoffset(q):
69 108 return int(q >> 16)
70 109
71 110 def gettype(q):
72 111 return int(q & 0xFFFF)
73 112
74 113 def offset_type(offset, type):
75 114 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
76 115 raise ValueError('unknown revlog index flags')
77 116 return long(long(offset) << 16 | type)
78 117
79 118 _nullhash = hashlib.sha1(nullid)
80 119
81 120 def hash(text, p1, p2):
82 121 """generate a hash from the given text and its parent hashes
83 122
84 123 This hash combines both the current file contents and its history
85 124 in a manner that makes it easy to distinguish nodes with the same
86 125 content in the revision graph.
87 126 """
88 127 # As of now, if one of the parent node is null, p2 is null
89 128 if p2 == nullid:
90 129 # deep copy of a hash is faster than creating one
91 130 s = _nullhash.copy()
92 131 s.update(p1)
93 132 else:
94 133 # none of the parent nodes are nullid
95 134 l = [p1, p2]
96 135 l.sort()
97 136 s = hashlib.sha1(l[0])
98 137 s.update(l[1])
99 138 s.update(text)
100 139 return s.digest()
101 140
102 141 def decompress(bin):
103 142 """ decompress the given input """
104 143 if not bin:
105 144 return bin
106 145 t = bin[0]
107 146 if t == '\0':
108 147 return bin
109 148 if t == 'x':
110 149 try:
111 150 return _decompress(bin)
112 151 except zlib.error as e:
113 152 raise RevlogError(_("revlog decompress error: %s") % str(e))
114 153 if t == 'u':
115 154 return util.buffer(bin, 1)
116 155 raise RevlogError(_("unknown compression type %r") % t)
117 156
118 157 # index v0:
119 158 # 4 bytes: offset
120 159 # 4 bytes: compressed length
121 160 # 4 bytes: base rev
122 161 # 4 bytes: link rev
123 162 # 20 bytes: parent 1 nodeid
124 163 # 20 bytes: parent 2 nodeid
125 164 # 20 bytes: nodeid
126 165 indexformatv0 = ">4l20s20s20s"
127 166
128 167 class revlogoldio(object):
129 168 def __init__(self):
130 169 self.size = struct.calcsize(indexformatv0)
131 170
132 171 def parseindex(self, data, inline):
133 172 s = self.size
134 173 index = []
135 174 nodemap = {nullid: nullrev}
136 175 n = off = 0
137 176 l = len(data)
138 177 while off + s <= l:
139 178 cur = data[off:off + s]
140 179 off += s
141 180 e = _unpack(indexformatv0, cur)
142 181 # transform to revlogv1 format
143 182 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
144 183 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
145 184 index.append(e2)
146 185 nodemap[e[6]] = n
147 186 n += 1
148 187
149 188 # add the magic null revision at -1
150 189 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
151 190
152 191 return index, nodemap, None
153 192
154 193 def packentry(self, entry, node, version, rev):
155 194 if gettype(entry[0]):
156 195 raise RevlogError(_("index entry flags need RevlogNG"))
157 196 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
158 197 node(entry[5]), node(entry[6]), entry[7])
159 198 return _pack(indexformatv0, *e2)
160 199
161 200 # index ng:
162 201 # 6 bytes: offset
163 202 # 2 bytes: flags
164 203 # 4 bytes: compressed length
165 204 # 4 bytes: uncompressed length
166 205 # 4 bytes: base rev
167 206 # 4 bytes: link rev
168 207 # 4 bytes: parent 1 rev
169 208 # 4 bytes: parent 2 rev
170 209 # 32 bytes: nodeid
171 210 indexformatng = ">Qiiiiii20s12x"
172 211 versionformat = ">I"
173 212
174 213 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
175 214 # signed integer)
176 215 _maxentrysize = 0x7fffffff
177 216
178 217 class revlogio(object):
179 218 def __init__(self):
180 219 self.size = struct.calcsize(indexformatng)
181 220
182 221 def parseindex(self, data, inline):
183 222 # call the C implementation to parse the index data
184 223 index, cache = parsers.parse_index2(data, inline)
185 224 return index, getattr(index, 'nodemap', None), cache
186 225
187 226 def packentry(self, entry, node, version, rev):
188 227 p = _pack(indexformatng, *entry)
189 228 if rev == 0:
190 229 p = _pack(versionformat, version) + p[4:]
191 230 return p
192 231
193 232 class revlog(object):
194 233 """
195 234 the underlying revision storage object
196 235
197 236 A revlog consists of two parts, an index and the revision data.
198 237
199 238 The index is a file with a fixed record size containing
200 239 information on each revision, including its nodeid (hash), the
201 240 nodeids of its parents, the position and offset of its data within
202 241 the data file, and the revision it's based on. Finally, each entry
203 242 contains a linkrev entry that can serve as a pointer to external
204 243 data.
205 244
206 245 The revision data itself is a linear collection of data chunks.
207 246 Each chunk represents a revision and is usually represented as a
208 247 delta against the previous chunk. To bound lookup time, runs of
209 248 deltas are limited to about 2 times the length of the original
210 249 version data. This makes retrieval of a version proportional to
211 250 its size, or O(1) relative to the number of revisions.
212 251
213 252 Both pieces of the revlog are written to in an append-only
214 253 fashion, which means we never need to rewrite a file to insert or
215 254 remove data, and can use some simple techniques to avoid the need
216 255 for locking while reading.
217 256
218 257 If checkambig, indexfile is opened with checkambig=True at
219 258 writing, to avoid file stat ambiguity.
220 259 """
221 260 def __init__(self, opener, indexfile, checkambig=False):
222 261 """
223 262 create a revlog object
224 263
225 264 opener is a function that abstracts the file opening operation
226 265 and can be used to implement COW semantics or the like.
227 266 """
228 267 self.indexfile = indexfile
229 268 self.datafile = indexfile[:-2] + ".d"
230 269 self.opener = opener
231 270 # When True, indexfile is opened with checkambig=True at writing, to
232 271 # avoid file stat ambiguity.
233 272 self._checkambig = checkambig
234 273 # 3-tuple of (node, rev, text) for a raw revision.
235 274 self._cache = None
236 275 # Maps rev to chain base rev.
237 276 self._chainbasecache = util.lrucachedict(100)
238 277 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
239 278 self._chunkcache = (0, '')
240 279 # How much data to read and cache into the raw revlog data cache.
241 280 self._chunkcachesize = 65536
242 281 self._maxchainlen = None
243 282 self._aggressivemergedeltas = False
244 283 self.index = []
245 284 # Mapping of partial identifiers to full nodes.
246 285 self._pcache = {}
247 286 # Mapping of revision integer to full node.
248 287 self._nodecache = {nullid: nullrev}
249 288 self._nodepos = None
250 289
251 290 v = REVLOG_DEFAULT_VERSION
252 291 opts = getattr(opener, 'options', None)
253 292 if opts is not None:
254 293 if 'revlogv1' in opts:
255 294 if 'generaldelta' in opts:
256 295 v |= REVLOGGENERALDELTA
257 296 else:
258 297 v = 0
259 298 if 'chunkcachesize' in opts:
260 299 self._chunkcachesize = opts['chunkcachesize']
261 300 if 'maxchainlen' in opts:
262 301 self._maxchainlen = opts['maxchainlen']
263 302 if 'aggressivemergedeltas' in opts:
264 303 self._aggressivemergedeltas = opts['aggressivemergedeltas']
265 304 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
266 305
267 306 if self._chunkcachesize <= 0:
268 307 raise RevlogError(_('revlog chunk cache size %r is not greater '
269 308 'than 0') % self._chunkcachesize)
270 309 elif self._chunkcachesize & (self._chunkcachesize - 1):
271 310 raise RevlogError(_('revlog chunk cache size %r is not a power '
272 311 'of 2') % self._chunkcachesize)
273 312
274 313 indexdata = ''
275 314 self._initempty = True
276 315 try:
277 316 f = self.opener(self.indexfile)
278 317 indexdata = f.read()
279 318 f.close()
280 319 if len(indexdata) > 0:
281 320 v = struct.unpack(versionformat, indexdata[:4])[0]
282 321 self._initempty = False
283 322 except IOError as inst:
284 323 if inst.errno != errno.ENOENT:
285 324 raise
286 325
287 326 self.version = v
288 327 self._inline = v & REVLOGNGINLINEDATA
289 328 self._generaldelta = v & REVLOGGENERALDELTA
290 329 flags = v & ~0xFFFF
291 330 fmt = v & 0xFFFF
292 331 if fmt == REVLOGV0 and flags:
293 332 raise RevlogError(_("index %s unknown flags %#04x for format v0")
294 333 % (self.indexfile, flags >> 16))
295 334 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
296 335 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
297 336 % (self.indexfile, flags >> 16))
298 337 elif fmt > REVLOGNG:
299 338 raise RevlogError(_("index %s unknown format %d")
300 339 % (self.indexfile, fmt))
301 340
302 341 self.storedeltachains = True
303 342
304 343 self._io = revlogio()
305 344 if self.version == REVLOGV0:
306 345 self._io = revlogoldio()
307 346 try:
308 347 d = self._io.parseindex(indexdata, self._inline)
309 348 except (ValueError, IndexError):
310 349 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
311 350 self.index, nodemap, self._chunkcache = d
312 351 if nodemap is not None:
313 352 self.nodemap = self._nodecache = nodemap
314 353 if not self._chunkcache:
315 354 self._chunkclear()
316 355 # revnum -> (chain-length, sum-delta-length)
317 356 self._chaininfocache = {}
318 357
319 358 def tip(self):
320 359 return self.node(len(self.index) - 2)
321 360 def __contains__(self, rev):
322 361 return 0 <= rev < len(self)
323 362 def __len__(self):
324 363 return len(self.index) - 1
325 364 def __iter__(self):
326 365 return iter(xrange(len(self)))
327 366 def revs(self, start=0, stop=None):
328 367 """iterate over all rev in this revlog (from start to stop)"""
329 368 step = 1
330 369 if stop is not None:
331 370 if start > stop:
332 371 step = -1
333 372 stop += step
334 373 else:
335 374 stop = len(self)
336 375 return xrange(start, stop, step)
337 376
338 377 @util.propertycache
339 378 def nodemap(self):
340 379 self.rev(self.node(0))
341 380 return self._nodecache
342 381
343 382 def hasnode(self, node):
344 383 try:
345 384 self.rev(node)
346 385 return True
347 386 except KeyError:
348 387 return False
349 388
350 389 def clearcaches(self):
351 390 self._cache = None
352 391 self._chainbasecache.clear()
353 392 self._chunkcache = (0, '')
354 393 self._pcache = {}
355 394
356 395 try:
357 396 self._nodecache.clearcaches()
358 397 except AttributeError:
359 398 self._nodecache = {nullid: nullrev}
360 399 self._nodepos = None
361 400
362 401 def rev(self, node):
363 402 try:
364 403 return self._nodecache[node]
365 404 except TypeError:
366 405 raise
367 406 except RevlogError:
368 407 # parsers.c radix tree lookup failed
369 408 raise LookupError(node, self.indexfile, _('no node'))
370 409 except KeyError:
371 410 # pure python cache lookup failed
372 411 n = self._nodecache
373 412 i = self.index
374 413 p = self._nodepos
375 414 if p is None:
376 415 p = len(i) - 2
377 416 for r in xrange(p, -1, -1):
378 417 v = i[r][7]
379 418 n[v] = r
380 419 if v == node:
381 420 self._nodepos = r - 1
382 421 return r
383 422 raise LookupError(node, self.indexfile, _('no node'))
384 423
385 424 # Accessors for index entries.
386 425
387 426 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
388 427 # are flags.
389 428 def start(self, rev):
390 429 return int(self.index[rev][0] >> 16)
391 430
392 431 def flags(self, rev):
393 432 return self.index[rev][0] & 0xFFFF
394 433
395 434 def length(self, rev):
396 435 return self.index[rev][1]
397 436
398 437 def rawsize(self, rev):
399 438 """return the length of the uncompressed text for a given revision"""
400 439 l = self.index[rev][2]
401 440 if l >= 0:
402 441 return l
403 442
404 443 t = self.revision(self.node(rev))
405 444 return len(t)
406 445 size = rawsize
407 446
408 447 def chainbase(self, rev):
409 448 base = self._chainbasecache.get(rev)
410 449 if base is not None:
411 450 return base
412 451
413 452 index = self.index
414 453 base = index[rev][3]
415 454 while base != rev:
416 455 rev = base
417 456 base = index[rev][3]
418 457
419 458 self._chainbasecache[rev] = base
420 459 return base
421 460
422 461 def linkrev(self, rev):
423 462 return self.index[rev][4]
424 463
425 464 def parentrevs(self, rev):
426 465 return self.index[rev][5:7]
427 466
428 467 def node(self, rev):
429 468 return self.index[rev][7]
430 469
431 470 # Derived from index values.
432 471
433 472 def end(self, rev):
434 473 return self.start(rev) + self.length(rev)
435 474
436 475 def parents(self, node):
437 476 i = self.index
438 477 d = i[self.rev(node)]
439 478 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
440 479
441 480 def chainlen(self, rev):
442 481 return self._chaininfo(rev)[0]
443 482
444 483 def _chaininfo(self, rev):
445 484 chaininfocache = self._chaininfocache
446 485 if rev in chaininfocache:
447 486 return chaininfocache[rev]
448 487 index = self.index
449 488 generaldelta = self._generaldelta
450 489 iterrev = rev
451 490 e = index[iterrev]
452 491 clen = 0
453 492 compresseddeltalen = 0
454 493 while iterrev != e[3]:
455 494 clen += 1
456 495 compresseddeltalen += e[1]
457 496 if generaldelta:
458 497 iterrev = e[3]
459 498 else:
460 499 iterrev -= 1
461 500 if iterrev in chaininfocache:
462 501 t = chaininfocache[iterrev]
463 502 clen += t[0]
464 503 compresseddeltalen += t[1]
465 504 break
466 505 e = index[iterrev]
467 506 else:
468 507 # Add text length of base since decompressing that also takes
469 508 # work. For cache hits the length is already included.
470 509 compresseddeltalen += e[1]
471 510 r = (clen, compresseddeltalen)
472 511 chaininfocache[rev] = r
473 512 return r
474 513
475 514 def _deltachain(self, rev, stoprev=None):
476 515 """Obtain the delta chain for a revision.
477 516
478 517 ``stoprev`` specifies a revision to stop at. If not specified, we
479 518 stop at the base of the chain.
480 519
481 520 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
482 521 revs in ascending order and ``stopped`` is a bool indicating whether
483 522 ``stoprev`` was hit.
484 523 """
485 524 chain = []
486 525
487 526 # Alias to prevent attribute lookup in tight loop.
488 527 index = self.index
489 528 generaldelta = self._generaldelta
490 529
491 530 iterrev = rev
492 531 e = index[iterrev]
493 532 while iterrev != e[3] and iterrev != stoprev:
494 533 chain.append(iterrev)
495 534 if generaldelta:
496 535 iterrev = e[3]
497 536 else:
498 537 iterrev -= 1
499 538 e = index[iterrev]
500 539
501 540 if iterrev == stoprev:
502 541 stopped = True
503 542 else:
504 543 chain.append(iterrev)
505 544 stopped = False
506 545
507 546 chain.reverse()
508 547 return chain, stopped
509 548
510 549 def ancestors(self, revs, stoprev=0, inclusive=False):
511 550 """Generate the ancestors of 'revs' in reverse topological order.
512 551 Does not generate revs lower than stoprev.
513 552
514 553 See the documentation for ancestor.lazyancestors for more details."""
515 554
516 555 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
517 556 inclusive=inclusive)
518 557
519 558 def descendants(self, revs):
520 559 """Generate the descendants of 'revs' in revision order.
521 560
522 561 Yield a sequence of revision numbers starting with a child of
523 562 some rev in revs, i.e., each revision is *not* considered a
524 563 descendant of itself. Results are ordered by revision number (a
525 564 topological sort)."""
526 565 first = min(revs)
527 566 if first == nullrev:
528 567 for i in self:
529 568 yield i
530 569 return
531 570
532 571 seen = set(revs)
533 572 for i in self.revs(start=first + 1):
534 573 for x in self.parentrevs(i):
535 574 if x != nullrev and x in seen:
536 575 seen.add(i)
537 576 yield i
538 577 break
539 578
540 579 def findcommonmissing(self, common=None, heads=None):
541 580 """Return a tuple of the ancestors of common and the ancestors of heads
542 581 that are not ancestors of common. In revset terminology, we return the
543 582 tuple:
544 583
545 584 ::common, (::heads) - (::common)
546 585
547 586 The list is sorted by revision number, meaning it is
548 587 topologically sorted.
549 588
550 589 'heads' and 'common' are both lists of node IDs. If heads is
551 590 not supplied, uses all of the revlog's heads. If common is not
552 591 supplied, uses nullid."""
553 592 if common is None:
554 593 common = [nullid]
555 594 if heads is None:
556 595 heads = self.heads()
557 596
558 597 common = [self.rev(n) for n in common]
559 598 heads = [self.rev(n) for n in heads]
560 599
561 600 # we want the ancestors, but inclusive
562 601 class lazyset(object):
563 602 def __init__(self, lazyvalues):
564 603 self.addedvalues = set()
565 604 self.lazyvalues = lazyvalues
566 605
567 606 def __contains__(self, value):
568 607 return value in self.addedvalues or value in self.lazyvalues
569 608
570 609 def __iter__(self):
571 610 added = self.addedvalues
572 611 for r in added:
573 612 yield r
574 613 for r in self.lazyvalues:
575 614 if not r in added:
576 615 yield r
577 616
578 617 def add(self, value):
579 618 self.addedvalues.add(value)
580 619
581 620 def update(self, values):
582 621 self.addedvalues.update(values)
583 622
584 623 has = lazyset(self.ancestors(common))
585 624 has.add(nullrev)
586 625 has.update(common)
587 626
588 627 # take all ancestors from heads that aren't in has
589 628 missing = set()
590 629 visit = collections.deque(r for r in heads if r not in has)
591 630 while visit:
592 631 r = visit.popleft()
593 632 if r in missing:
594 633 continue
595 634 else:
596 635 missing.add(r)
597 636 for p in self.parentrevs(r):
598 637 if p not in has:
599 638 visit.append(p)
600 639 missing = list(missing)
601 640 missing.sort()
602 641 return has, [self.node(miss) for miss in missing]
603 642
604 643 def incrementalmissingrevs(self, common=None):
605 644 """Return an object that can be used to incrementally compute the
606 645 revision numbers of the ancestors of arbitrary sets that are not
607 646 ancestors of common. This is an ancestor.incrementalmissingancestors
608 647 object.
609 648
610 649 'common' is a list of revision numbers. If common is not supplied, uses
611 650 nullrev.
612 651 """
613 652 if common is None:
614 653 common = [nullrev]
615 654
616 655 return ancestor.incrementalmissingancestors(self.parentrevs, common)
617 656
618 657 def findmissingrevs(self, common=None, heads=None):
619 658 """Return the revision numbers of the ancestors of heads that
620 659 are not ancestors of common.
621 660
622 661 More specifically, return a list of revision numbers corresponding to
623 662 nodes N such that every N satisfies the following constraints:
624 663
625 664 1. N is an ancestor of some node in 'heads'
626 665 2. N is not an ancestor of any node in 'common'
627 666
628 667 The list is sorted by revision number, meaning it is
629 668 topologically sorted.
630 669
631 670 'heads' and 'common' are both lists of revision numbers. If heads is
632 671 not supplied, uses all of the revlog's heads. If common is not
633 672 supplied, uses nullid."""
634 673 if common is None:
635 674 common = [nullrev]
636 675 if heads is None:
637 676 heads = self.headrevs()
638 677
639 678 inc = self.incrementalmissingrevs(common=common)
640 679 return inc.missingancestors(heads)
641 680
642 681 def findmissing(self, common=None, heads=None):
643 682 """Return the ancestors of heads that are not ancestors of common.
644 683
645 684 More specifically, return a list of nodes N such that every N
646 685 satisfies the following constraints:
647 686
648 687 1. N is an ancestor of some node in 'heads'
649 688 2. N is not an ancestor of any node in 'common'
650 689
651 690 The list is sorted by revision number, meaning it is
652 691 topologically sorted.
653 692
654 693 'heads' and 'common' are both lists of node IDs. If heads is
655 694 not supplied, uses all of the revlog's heads. If common is not
656 695 supplied, uses nullid."""
657 696 if common is None:
658 697 common = [nullid]
659 698 if heads is None:
660 699 heads = self.heads()
661 700
662 701 common = [self.rev(n) for n in common]
663 702 heads = [self.rev(n) for n in heads]
664 703
665 704 inc = self.incrementalmissingrevs(common=common)
666 705 return [self.node(r) for r in inc.missingancestors(heads)]
667 706
668 707 def nodesbetween(self, roots=None, heads=None):
669 708 """Return a topological path from 'roots' to 'heads'.
670 709
671 710 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
672 711 topologically sorted list of all nodes N that satisfy both of
673 712 these constraints:
674 713
675 714 1. N is a descendant of some node in 'roots'
676 715 2. N is an ancestor of some node in 'heads'
677 716
678 717 Every node is considered to be both a descendant and an ancestor
679 718 of itself, so every reachable node in 'roots' and 'heads' will be
680 719 included in 'nodes'.
681 720
682 721 'outroots' is the list of reachable nodes in 'roots', i.e., the
683 722 subset of 'roots' that is returned in 'nodes'. Likewise,
684 723 'outheads' is the subset of 'heads' that is also in 'nodes'.
685 724
686 725 'roots' and 'heads' are both lists of node IDs. If 'roots' is
687 726 unspecified, uses nullid as the only root. If 'heads' is
688 727 unspecified, uses list of all of the revlog's heads."""
689 728 nonodes = ([], [], [])
690 729 if roots is not None:
691 730 roots = list(roots)
692 731 if not roots:
693 732 return nonodes
694 733 lowestrev = min([self.rev(n) for n in roots])
695 734 else:
696 735 roots = [nullid] # Everybody's a descendant of nullid
697 736 lowestrev = nullrev
698 737 if (lowestrev == nullrev) and (heads is None):
699 738 # We want _all_ the nodes!
700 739 return ([self.node(r) for r in self], [nullid], list(self.heads()))
701 740 if heads is None:
702 741 # All nodes are ancestors, so the latest ancestor is the last
703 742 # node.
704 743 highestrev = len(self) - 1
705 744 # Set ancestors to None to signal that every node is an ancestor.
706 745 ancestors = None
707 746 # Set heads to an empty dictionary for later discovery of heads
708 747 heads = {}
709 748 else:
710 749 heads = list(heads)
711 750 if not heads:
712 751 return nonodes
713 752 ancestors = set()
714 753 # Turn heads into a dictionary so we can remove 'fake' heads.
715 754 # Also, later we will be using it to filter out the heads we can't
716 755 # find from roots.
717 756 heads = dict.fromkeys(heads, False)
718 757 # Start at the top and keep marking parents until we're done.
719 758 nodestotag = set(heads)
720 759 # Remember where the top was so we can use it as a limit later.
721 760 highestrev = max([self.rev(n) for n in nodestotag])
722 761 while nodestotag:
723 762 # grab a node to tag
724 763 n = nodestotag.pop()
725 764 # Never tag nullid
726 765 if n == nullid:
727 766 continue
728 767 # A node's revision number represents its place in a
729 768 # topologically sorted list of nodes.
730 769 r = self.rev(n)
731 770 if r >= lowestrev:
732 771 if n not in ancestors:
733 772 # If we are possibly a descendant of one of the roots
734 773 # and we haven't already been marked as an ancestor
735 774 ancestors.add(n) # Mark as ancestor
736 775 # Add non-nullid parents to list of nodes to tag.
737 776 nodestotag.update([p for p in self.parents(n) if
738 777 p != nullid])
739 778 elif n in heads: # We've seen it before, is it a fake head?
740 779 # So it is, real heads should not be the ancestors of
741 780 # any other heads.
742 781 heads.pop(n)
743 782 if not ancestors:
744 783 return nonodes
745 784 # Now that we have our set of ancestors, we want to remove any
746 785 # roots that are not ancestors.
747 786
748 787 # If one of the roots was nullid, everything is included anyway.
749 788 if lowestrev > nullrev:
750 789 # But, since we weren't, let's recompute the lowest rev to not
751 790 # include roots that aren't ancestors.
752 791
753 792 # Filter out roots that aren't ancestors of heads
754 793 roots = [root for root in roots if root in ancestors]
755 794 # Recompute the lowest revision
756 795 if roots:
757 796 lowestrev = min([self.rev(root) for root in roots])
758 797 else:
759 798 # No more roots? Return empty list
760 799 return nonodes
761 800 else:
762 801 # We are descending from nullid, and don't need to care about
763 802 # any other roots.
764 803 lowestrev = nullrev
765 804 roots = [nullid]
766 805 # Transform our roots list into a set.
767 806 descendants = set(roots)
768 807 # Also, keep the original roots so we can filter out roots that aren't
769 808 # 'real' roots (i.e. are descended from other roots).
770 809 roots = descendants.copy()
771 810 # Our topologically sorted list of output nodes.
772 811 orderedout = []
773 812 # Don't start at nullid since we don't want nullid in our output list,
774 813 # and if nullid shows up in descendants, empty parents will look like
775 814 # they're descendants.
776 815 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
777 816 n = self.node(r)
778 817 isdescendant = False
779 818 if lowestrev == nullrev: # Everybody is a descendant of nullid
780 819 isdescendant = True
781 820 elif n in descendants:
782 821 # n is already a descendant
783 822 isdescendant = True
784 823 # This check only needs to be done here because all the roots
785 824 # will start being marked is descendants before the loop.
786 825 if n in roots:
787 826 # If n was a root, check if it's a 'real' root.
788 827 p = tuple(self.parents(n))
789 828 # If any of its parents are descendants, it's not a root.
790 829 if (p[0] in descendants) or (p[1] in descendants):
791 830 roots.remove(n)
792 831 else:
793 832 p = tuple(self.parents(n))
794 833 # A node is a descendant if either of its parents are
795 834 # descendants. (We seeded the dependents list with the roots
796 835 # up there, remember?)
797 836 if (p[0] in descendants) or (p[1] in descendants):
798 837 descendants.add(n)
799 838 isdescendant = True
800 839 if isdescendant and ((ancestors is None) or (n in ancestors)):
801 840 # Only include nodes that are both descendants and ancestors.
802 841 orderedout.append(n)
803 842 if (ancestors is not None) and (n in heads):
804 843 # We're trying to figure out which heads are reachable
805 844 # from roots.
806 845 # Mark this head as having been reached
807 846 heads[n] = True
808 847 elif ancestors is None:
809 848 # Otherwise, we're trying to discover the heads.
810 849 # Assume this is a head because if it isn't, the next step
811 850 # will eventually remove it.
812 851 heads[n] = True
813 852 # But, obviously its parents aren't.
814 853 for p in self.parents(n):
815 854 heads.pop(p, None)
816 855 heads = [head for head, flag in heads.iteritems() if flag]
817 856 roots = list(roots)
818 857 assert orderedout
819 858 assert roots
820 859 assert heads
821 860 return (orderedout, roots, heads)
822 861
823 862 def headrevs(self):
824 863 try:
825 864 return self.index.headrevs()
826 865 except AttributeError:
827 866 return self._headrevs()
828 867
829 868 def computephases(self, roots):
830 869 return self.index.computephasesmapsets(roots)
831 870
832 871 def _headrevs(self):
833 872 count = len(self)
834 873 if not count:
835 874 return [nullrev]
836 875 # we won't iter over filtered rev so nobody is a head at start
837 876 ishead = [0] * (count + 1)
838 877 index = self.index
839 878 for r in self:
840 879 ishead[r] = 1 # I may be an head
841 880 e = index[r]
842 881 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
843 882 return [r for r, val in enumerate(ishead) if val]
844 883
845 884 def heads(self, start=None, stop=None):
846 885 """return the list of all nodes that have no children
847 886
848 887 if start is specified, only heads that are descendants of
849 888 start will be returned
850 889 if stop is specified, it will consider all the revs from stop
851 890 as if they had no children
852 891 """
853 892 if start is None and stop is None:
854 893 if not len(self):
855 894 return [nullid]
856 895 return [self.node(r) for r in self.headrevs()]
857 896
858 897 if start is None:
859 898 start = nullid
860 899 if stop is None:
861 900 stop = []
862 901 stoprevs = set([self.rev(n) for n in stop])
863 902 startrev = self.rev(start)
864 903 reachable = set((startrev,))
865 904 heads = set((startrev,))
866 905
867 906 parentrevs = self.parentrevs
868 907 for r in self.revs(start=startrev + 1):
869 908 for p in parentrevs(r):
870 909 if p in reachable:
871 910 if r not in stoprevs:
872 911 reachable.add(r)
873 912 heads.add(r)
874 913 if p in heads and p not in stoprevs:
875 914 heads.remove(p)
876 915
877 916 return [self.node(r) for r in heads]
878 917
879 918 def children(self, node):
880 919 """find the children of a given node"""
881 920 c = []
882 921 p = self.rev(node)
883 922 for r in self.revs(start=p + 1):
884 923 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
885 924 if prevs:
886 925 for pr in prevs:
887 926 if pr == p:
888 927 c.append(self.node(r))
889 928 elif p == nullrev:
890 929 c.append(self.node(r))
891 930 return c
892 931
893 932 def descendant(self, start, end):
894 933 if start == nullrev:
895 934 return True
896 935 for i in self.descendants([start]):
897 936 if i == end:
898 937 return True
899 938 elif i > end:
900 939 break
901 940 return False
902 941
903 942 def commonancestorsheads(self, a, b):
904 943 """calculate all the heads of the common ancestors of nodes a and b"""
905 944 a, b = self.rev(a), self.rev(b)
906 945 try:
907 946 ancs = self.index.commonancestorsheads(a, b)
908 947 except (AttributeError, OverflowError): # C implementation failed
909 948 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
910 949 return map(self.node, ancs)
911 950
912 951 def isancestor(self, a, b):
913 952 """return True if node a is an ancestor of node b
914 953
915 954 The implementation of this is trivial but the use of
916 955 commonancestorsheads is not."""
917 956 return a in self.commonancestorsheads(a, b)
918 957
919 958 def ancestor(self, a, b):
920 959 """calculate the "best" common ancestor of nodes a and b"""
921 960
922 961 a, b = self.rev(a), self.rev(b)
923 962 try:
924 963 ancs = self.index.ancestors(a, b)
925 964 except (AttributeError, OverflowError):
926 965 ancs = ancestor.ancestors(self.parentrevs, a, b)
927 966 if ancs:
928 967 # choose a consistent winner when there's a tie
929 968 return min(map(self.node, ancs))
930 969 return nullid
931 970
932 971 def _match(self, id):
933 972 if isinstance(id, int):
934 973 # rev
935 974 return self.node(id)
936 975 if len(id) == 20:
937 976 # possibly a binary node
938 977 # odds of a binary node being all hex in ASCII are 1 in 10**25
939 978 try:
940 979 node = id
941 980 self.rev(node) # quick search the index
942 981 return node
943 982 except LookupError:
944 983 pass # may be partial hex id
945 984 try:
946 985 # str(rev)
947 986 rev = int(id)
948 987 if str(rev) != id:
949 988 raise ValueError
950 989 if rev < 0:
951 990 rev = len(self) + rev
952 991 if rev < 0 or rev >= len(self):
953 992 raise ValueError
954 993 return self.node(rev)
955 994 except (ValueError, OverflowError):
956 995 pass
957 996 if len(id) == 40:
958 997 try:
959 998 # a full hex nodeid?
960 999 node = bin(id)
961 1000 self.rev(node)
962 1001 return node
963 1002 except (TypeError, LookupError):
964 1003 pass
965 1004
966 1005 def _partialmatch(self, id):
967 1006 try:
968 1007 partial = self.index.partialmatch(id)
969 1008 if partial and self.hasnode(partial):
970 1009 return partial
971 1010 return None
972 1011 except RevlogError:
973 1012 # parsers.c radix tree lookup gave multiple matches
974 1013 # fast path: for unfiltered changelog, radix tree is accurate
975 1014 if not getattr(self, 'filteredrevs', None):
976 1015 raise LookupError(id, self.indexfile,
977 1016 _('ambiguous identifier'))
978 1017 # fall through to slow path that filters hidden revisions
979 1018 except (AttributeError, ValueError):
980 1019 # we are pure python, or key was too short to search radix tree
981 1020 pass
982 1021
983 1022 if id in self._pcache:
984 1023 return self._pcache[id]
985 1024
986 1025 if len(id) < 40:
987 1026 try:
988 1027 # hex(node)[:...]
989 1028 l = len(id) // 2 # grab an even number of digits
990 1029 prefix = bin(id[:l * 2])
991 1030 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
992 1031 nl = [n for n in nl if hex(n).startswith(id) and
993 1032 self.hasnode(n)]
994 1033 if len(nl) > 0:
995 1034 if len(nl) == 1:
996 1035 self._pcache[id] = nl[0]
997 1036 return nl[0]
998 1037 raise LookupError(id, self.indexfile,
999 1038 _('ambiguous identifier'))
1000 1039 return None
1001 1040 except TypeError:
1002 1041 pass
1003 1042
1004 1043 def lookup(self, id):
1005 1044 """locate a node based on:
1006 1045 - revision number or str(revision number)
1007 1046 - nodeid or subset of hex nodeid
1008 1047 """
1009 1048 n = self._match(id)
1010 1049 if n is not None:
1011 1050 return n
1012 1051 n = self._partialmatch(id)
1013 1052 if n:
1014 1053 return n
1015 1054
1016 1055 raise LookupError(id, self.indexfile, _('no match found'))
1017 1056
1018 1057 def cmp(self, node, text):
1019 1058 """compare text with a given file revision
1020 1059
1021 1060 returns True if text is different than what is stored.
1022 1061 """
1023 1062 p1, p2 = self.parents(node)
1024 1063 return hash(text, p1, p2) != node
1025 1064
1026 1065 def _addchunk(self, offset, data):
1027 1066 """Add a segment to the revlog cache.
1028 1067
1029 1068 Accepts an absolute offset and the data that is at that location.
1030 1069 """
1031 1070 o, d = self._chunkcache
1032 1071 # try to add to existing cache
1033 1072 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1034 1073 self._chunkcache = o, d + data
1035 1074 else:
1036 1075 self._chunkcache = offset, data
1037 1076
1038 1077 def _loadchunk(self, offset, length, df=None):
1039 1078 """Load a segment of raw data from the revlog.
1040 1079
1041 1080 Accepts an absolute offset, length to read, and an optional existing
1042 1081 file handle to read from.
1043 1082
1044 1083 If an existing file handle is passed, it will be seeked and the
1045 1084 original seek position will NOT be restored.
1046 1085
1047 1086 Returns a str or buffer of raw byte data.
1048 1087 """
1049 1088 if df is not None:
1050 1089 closehandle = False
1051 1090 else:
1052 1091 if self._inline:
1053 1092 df = self.opener(self.indexfile)
1054 1093 else:
1055 1094 df = self.opener(self.datafile)
1056 1095 closehandle = True
1057 1096
1058 1097 # Cache data both forward and backward around the requested
1059 1098 # data, in a fixed size window. This helps speed up operations
1060 1099 # involving reading the revlog backwards.
1061 1100 cachesize = self._chunkcachesize
1062 1101 realoffset = offset & ~(cachesize - 1)
1063 1102 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1064 1103 - realoffset)
1065 1104 df.seek(realoffset)
1066 1105 d = df.read(reallength)
1067 1106 if closehandle:
1068 1107 df.close()
1069 1108 self._addchunk(realoffset, d)
1070 1109 if offset != realoffset or reallength != length:
1071 1110 return util.buffer(d, offset - realoffset, length)
1072 1111 return d
1073 1112
1074 1113 def _getchunk(self, offset, length, df=None):
1075 1114 """Obtain a segment of raw data from the revlog.
1076 1115
1077 1116 Accepts an absolute offset, length of bytes to obtain, and an
1078 1117 optional file handle to the already-opened revlog. If the file
1079 1118 handle is used, it's original seek position will not be preserved.
1080 1119
1081 1120 Requests for data may be returned from a cache.
1082 1121
1083 1122 Returns a str or a buffer instance of raw byte data.
1084 1123 """
1085 1124 o, d = self._chunkcache
1086 1125 l = len(d)
1087 1126
1088 1127 # is it in the cache?
1089 1128 cachestart = offset - o
1090 1129 cacheend = cachestart + length
1091 1130 if cachestart >= 0 and cacheend <= l:
1092 1131 if cachestart == 0 and cacheend == l:
1093 1132 return d # avoid a copy
1094 1133 return util.buffer(d, cachestart, cacheend - cachestart)
1095 1134
1096 1135 return self._loadchunk(offset, length, df=df)
1097 1136
1098 1137 def _chunkraw(self, startrev, endrev, df=None):
1099 1138 """Obtain a segment of raw data corresponding to a range of revisions.
1100 1139
1101 1140 Accepts the start and end revisions and an optional already-open
1102 1141 file handle to be used for reading. If the file handle is read, its
1103 1142 seek position will not be preserved.
1104 1143
1105 1144 Requests for data may be satisfied by a cache.
1106 1145
1107 1146 Returns a 2-tuple of (offset, data) for the requested range of
1108 1147 revisions. Offset is the integer offset from the beginning of the
1109 1148 revlog and data is a str or buffer of the raw byte data.
1110 1149
1111 1150 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1112 1151 to determine where each revision's data begins and ends.
1113 1152 """
1114 1153 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1115 1154 # (functions are expensive).
1116 1155 index = self.index
1117 1156 istart = index[startrev]
1118 1157 start = int(istart[0] >> 16)
1119 1158 if startrev == endrev:
1120 1159 end = start + istart[1]
1121 1160 else:
1122 1161 iend = index[endrev]
1123 1162 end = int(iend[0] >> 16) + iend[1]
1124 1163
1125 1164 if self._inline:
1126 1165 start += (startrev + 1) * self._io.size
1127 1166 end += (endrev + 1) * self._io.size
1128 1167 length = end - start
1129 1168
1130 1169 return start, self._getchunk(start, length, df=df)
1131 1170
1132 1171 def _chunk(self, rev, df=None):
1133 1172 """Obtain a single decompressed chunk for a revision.
1134 1173
1135 1174 Accepts an integer revision and an optional already-open file handle
1136 1175 to be used for reading. If used, the seek position of the file will not
1137 1176 be preserved.
1138 1177
1139 1178 Returns a str holding uncompressed data for the requested revision.
1140 1179 """
1141 1180 return decompress(self._chunkraw(rev, rev, df=df)[1])
1142 1181
1143 1182 def _chunks(self, revs, df=None):
1144 1183 """Obtain decompressed chunks for the specified revisions.
1145 1184
1146 1185 Accepts an iterable of numeric revisions that are assumed to be in
1147 1186 ascending order. Also accepts an optional already-open file handle
1148 1187 to be used for reading. If used, the seek position of the file will
1149 1188 not be preserved.
1150 1189
1151 1190 This function is similar to calling ``self._chunk()`` multiple times,
1152 1191 but is faster.
1153 1192
1154 1193 Returns a list with decompressed data for each requested revision.
1155 1194 """
1156 1195 if not revs:
1157 1196 return []
1158 1197 start = self.start
1159 1198 length = self.length
1160 1199 inline = self._inline
1161 1200 iosize = self._io.size
1162 1201 buffer = util.buffer
1163 1202
1164 1203 l = []
1165 1204 ladd = l.append
1166 1205
1167 1206 try:
1168 1207 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1169 1208 except OverflowError:
1170 1209 # issue4215 - we can't cache a run of chunks greater than
1171 1210 # 2G on Windows
1172 1211 return [self._chunk(rev, df=df) for rev in revs]
1173 1212
1174 1213 for rev in revs:
1175 1214 chunkstart = start(rev)
1176 1215 if inline:
1177 1216 chunkstart += (rev + 1) * iosize
1178 1217 chunklength = length(rev)
1179 1218 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1180 1219
1181 1220 return l
1182 1221
1183 1222 def _chunkclear(self):
1184 1223 """Clear the raw chunk cache."""
1185 1224 self._chunkcache = (0, '')
1186 1225
1187 1226 def deltaparent(self, rev):
1188 1227 """return deltaparent of the given revision"""
1189 1228 base = self.index[rev][3]
1190 1229 if base == rev:
1191 1230 return nullrev
1192 1231 elif self._generaldelta:
1193 1232 return base
1194 1233 else:
1195 1234 return rev - 1
1196 1235
1197 1236 def revdiff(self, rev1, rev2):
1198 1237 """return or calculate a delta between two revisions"""
1199 1238 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1200 1239 return str(self._chunk(rev2))
1201 1240
1202 1241 return mdiff.textdiff(self.revision(rev1),
1203 1242 self.revision(rev2))
1204 1243
1205 1244 def revision(self, nodeorrev, _df=None, raw=False):
1206 1245 """return an uncompressed revision of a given node or revision
1207 1246 number.
1208 1247
1209 1248 _df - an existing file handle to read from. (internal-only)
1210 1249 raw - an optional argument specifying if the revision data is to be
1211 1250 treated as raw data when applying flag transforms. 'raw' should be set
1212 1251 to True when generating changegroups or in debug commands.
1213 1252 """
1214 1253 if isinstance(nodeorrev, int):
1215 1254 rev = nodeorrev
1216 1255 node = self.node(rev)
1217 1256 else:
1218 1257 node = nodeorrev
1219 1258 rev = None
1220 1259
1221 1260 cachedrev = None
1222 1261 if node == nullid:
1223 1262 return ""
1224 1263 if self._cache:
1225 1264 if self._cache[0] == node:
1226 1265 return self._cache[2]
1227 1266 cachedrev = self._cache[1]
1228 1267
1229 1268 # look up what we need to read
1230 1269 text = None
1231 1270 if rev is None:
1232 1271 rev = self.rev(node)
1233 1272
1234 # check rev flags
1235 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1236 raise RevlogError(_('incompatible revision flag %x') %
1237 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1238
1239 1273 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1240 1274 if stopped:
1241 1275 text = self._cache[2]
1242 1276
1243 1277 # drop cache to save memory
1244 1278 self._cache = None
1245 1279
1246 1280 bins = self._chunks(chain, df=_df)
1247 1281 if text is None:
1248 1282 text = str(bins[0])
1249 1283 bins = bins[1:]
1250 1284
1251 1285 text = mdiff.patches(text, bins)
1252 self.checkhash(text, node, rev=rev)
1286
1287 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1288 raw=raw)
1289 if validatehash:
1290 self.checkhash(text, node, rev=rev)
1291
1253 1292 self._cache = (node, rev, text)
1254 1293 return text
1255 1294
1256 1295 def hash(self, text, p1, p2):
1257 1296 """Compute a node hash.
1258 1297
1259 1298 Available as a function so that subclasses can replace the hash
1260 1299 as needed.
1261 1300 """
1262 1301 return hash(text, p1, p2)
1263 1302
1303 def _processflags(self, text, flags, operation, raw=False):
1304 """Inspect revision data flags and applies transforms defined by
1305 registered flag processors.
1306
1307 ``text`` - the revision data to process
1308 ``flags`` - the revision flags
1309 ``operation`` - the operation being performed (read or write)
1310 ``raw`` - an optional argument describing if the raw transform should be
1311 applied.
1312
1313 This method processes the flags in the order (or reverse order if
1314 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1315 flag processors registered for present flags. The order of flags defined
1316 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1317
1318 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1319 processed text and ``validatehash`` is a bool indicating whether the
1320 returned text should be checked for hash integrity.
1321
1322 Note: If the ``raw`` argument is set, it has precedence over the
1323 operation and will only update the value of ``validatehash``.
1324 """
1325 if not operation in ('read', 'write'):
1326 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1327 # Check all flags are known.
1328 if flags & ~REVIDX_KNOWN_FLAGS:
1329 raise RevlogError(_("incompatible revision flag '%#x'") %
1330 (flags & ~REVIDX_KNOWN_FLAGS))
1331 validatehash = True
1332 # Depending on the operation (read or write), the order might be
1333 # reversed due to non-commutative transforms.
1334 orderedflags = REVIDX_FLAGS_ORDER
1335 if operation == 'write':
1336 orderedflags = reversed(orderedflags)
1337
1338 for flag in orderedflags:
1339 # If a flagprocessor has been registered for a known flag, apply the
1340 # related operation transform and update result tuple.
1341 if flag & flags:
1342 vhash = True
1343
1344 if flag not in _flagprocessors:
1345 message = _("missing processor for flag '%#x'") % (flag)
1346 raise RevlogError(message)
1347
1348 processor = _flagprocessors[flag]
1349 if processor is not None:
1350 readtransform, writetransform, rawtransform = processor
1351
1352 if raw:
1353 vhash = rawtransform(self, text)
1354 elif operation == 'read':
1355 text, vhash = readtransform(self, text)
1356 else: # write operation
1357 text, vhash = writetransform(self, text)
1358 validatehash = validatehash and vhash
1359
1360 return text, validatehash
1361
1264 1362 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1265 1363 """Check node hash integrity.
1266 1364
1267 1365 Available as a function so that subclasses can extend hash mismatch
1268 1366 behaviors as needed.
1269 1367 """
1270 1368 if p1 is None and p2 is None:
1271 1369 p1, p2 = self.parents(node)
1272 1370 if node != self.hash(text, p1, p2):
1273 1371 revornode = rev
1274 1372 if revornode is None:
1275 1373 revornode = templatefilters.short(hex(node))
1276 1374 raise RevlogError(_("integrity check failed on %s:%s")
1277 1375 % (self.indexfile, revornode))
1278 1376
1279 1377 def checkinlinesize(self, tr, fp=None):
1280 1378 """Check if the revlog is too big for inline and convert if so.
1281 1379
1282 1380 This should be called after revisions are added to the revlog. If the
1283 1381 revlog has grown too large to be an inline revlog, it will convert it
1284 1382 to use multiple index and data files.
1285 1383 """
1286 1384 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1287 1385 return
1288 1386
1289 1387 trinfo = tr.find(self.indexfile)
1290 1388 if trinfo is None:
1291 1389 raise RevlogError(_("%s not found in the transaction")
1292 1390 % self.indexfile)
1293 1391
1294 1392 trindex = trinfo[2]
1295 1393 if trindex is not None:
1296 1394 dataoff = self.start(trindex)
1297 1395 else:
1298 1396 # revlog was stripped at start of transaction, use all leftover data
1299 1397 trindex = len(self) - 1
1300 1398 dataoff = self.end(-2)
1301 1399
1302 1400 tr.add(self.datafile, dataoff)
1303 1401
1304 1402 if fp:
1305 1403 fp.flush()
1306 1404 fp.close()
1307 1405
1308 1406 df = self.opener(self.datafile, 'w')
1309 1407 try:
1310 1408 for r in self:
1311 1409 df.write(self._chunkraw(r, r)[1])
1312 1410 finally:
1313 1411 df.close()
1314 1412
1315 1413 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1316 1414 checkambig=self._checkambig)
1317 1415 self.version &= ~(REVLOGNGINLINEDATA)
1318 1416 self._inline = False
1319 1417 for i in self:
1320 1418 e = self._io.packentry(self.index[i], self.node, self.version, i)
1321 1419 fp.write(e)
1322 1420
1323 1421 # if we don't call close, the temp file will never replace the
1324 1422 # real index
1325 1423 fp.close()
1326 1424
1327 1425 tr.replace(self.indexfile, trindex * self._io.size)
1328 1426 self._chunkclear()
1329 1427
1330 1428 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1331 1429 node=None, flags=REVIDX_DEFAULT_FLAGS):
1332 1430 """add a revision to the log
1333 1431
1334 1432 text - the revision data to add
1335 1433 transaction - the transaction object used for rollback
1336 1434 link - the linkrev data to add
1337 1435 p1, p2 - the parent nodeids of the revision
1338 1436 cachedelta - an optional precomputed delta
1339 1437 node - nodeid of revision; typically node is not specified, and it is
1340 1438 computed by default as hash(text, p1, p2), however subclasses might
1341 1439 use different hashing method (and override checkhash() in such case)
1342 1440 flags - the known flags to set on the revision
1343 1441 """
1344 1442 if link == nullrev:
1345 1443 raise RevlogError(_("attempted to add linkrev -1 to %s")
1346 1444 % self.indexfile)
1347 1445
1446 if flags:
1447 node = node or self.hash(text, p1, p2)
1448
1449 newtext, validatehash = self._processflags(text, flags, 'write')
1450
1451 # If the flag processor modifies the revision data, ignore any provided
1452 # cachedelta.
1453 if newtext != text:
1454 cachedelta = None
1455 text = newtext
1456
1348 1457 if len(text) > _maxentrysize:
1349 1458 raise RevlogError(
1350 1459 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1351 1460 % (self.indexfile, len(text)))
1352 1461
1353 1462 node = node or self.hash(text, p1, p2)
1354 1463 if node in self.nodemap:
1355 1464 return node
1356 1465
1466 if validatehash:
1467 self.checkhash(text, node, p1=p1, p2=p2)
1468
1357 1469 dfh = None
1358 1470 if not self._inline:
1359 1471 dfh = self.opener(self.datafile, "a+")
1360 1472 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1361 1473 try:
1362 1474 return self._addrevision(node, text, transaction, link, p1, p2,
1363 1475 flags, cachedelta, ifh, dfh)
1364 1476 finally:
1365 1477 if dfh:
1366 1478 dfh.close()
1367 1479 ifh.close()
1368 1480
1369 1481 def compress(self, text):
1370 1482 """ generate a possibly-compressed representation of text """
1371 1483 if not text:
1372 1484 return ("", text)
1373 1485 l = len(text)
1374 1486 bin = None
1375 1487 if l < 44:
1376 1488 pass
1377 1489 elif l > 1000000:
1378 1490 # zlib makes an internal copy, thus doubling memory usage for
1379 1491 # large files, so lets do this in pieces
1380 1492 z = zlib.compressobj()
1381 1493 p = []
1382 1494 pos = 0
1383 1495 while pos < l:
1384 1496 pos2 = pos + 2**20
1385 1497 p.append(z.compress(text[pos:pos2]))
1386 1498 pos = pos2
1387 1499 p.append(z.flush())
1388 1500 if sum(map(len, p)) < l:
1389 1501 bin = "".join(p)
1390 1502 else:
1391 1503 bin = _compress(text)
1392 1504 if bin is None or len(bin) > l:
1393 1505 if text[0] == '\0':
1394 1506 return ("", text)
1395 1507 return ('u', text)
1396 1508 return ("", bin)
1397 1509
1398 1510 def _isgooddelta(self, d, textlen):
1399 1511 """Returns True if the given delta is good. Good means that it is within
1400 1512 the disk span, disk size, and chain length bounds that we know to be
1401 1513 performant."""
1402 1514 if d is None:
1403 1515 return False
1404 1516
1405 1517 # - 'dist' is the distance from the base revision -- bounding it limits
1406 1518 # the amount of I/O we need to do.
1407 1519 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1408 1520 # to apply -- bounding it limits the amount of CPU we consume.
1409 1521 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1410 1522 if (dist > textlen * 4 or l > textlen or
1411 1523 compresseddeltalen > textlen * 2 or
1412 1524 (self._maxchainlen and chainlen > self._maxchainlen)):
1413 1525 return False
1414 1526
1415 1527 return True
1416 1528
1417 1529 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1418 1530 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1419 1531 """internal function to add revisions to the log
1420 1532
1421 1533 see addrevision for argument descriptions.
1422 1534 invariants:
1423 1535 - text is optional (can be None); if not set, cachedelta must be set.
1424 1536 if both are set, they must correspond to each other.
1425 1537 - raw is optional; if set to True, it indicates the revision data is to
1426 1538 be treated by _processflags() as raw. It is usually set by changegroup
1427 1539 generation and debug commands.
1428 1540 """
1429 1541 btext = [text]
1430 1542 def buildtext():
1431 1543 if btext[0] is not None:
1432 1544 return btext[0]
1433 1545 baserev = cachedelta[0]
1434 1546 delta = cachedelta[1]
1435 1547 # special case deltas which replace entire base; no need to decode
1436 1548 # base revision. this neatly avoids censored bases, which throw when
1437 1549 # they're decoded.
1438 1550 hlen = struct.calcsize(">lll")
1439 1551 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1440 1552 len(delta) - hlen):
1441 1553 btext[0] = delta[hlen:]
1442 1554 else:
1443 1555 if self._inline:
1444 1556 fh = ifh
1445 1557 else:
1446 1558 fh = dfh
1447 1559 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1448 1560 btext[0] = mdiff.patch(basetext, delta)
1449 1561
1450 1562 try:
1451 self.checkhash(btext[0], node, p1=p1, p2=p2)
1563 res = self._processflags(btext[0], flags, 'read', raw=raw)
1564 btext[0], validatehash = res
1565 if validatehash:
1566 self.checkhash(btext[0], node, p1=p1, p2=p2)
1452 1567 if flags & REVIDX_ISCENSORED:
1453 1568 raise RevlogError(_('node %s is not censored') % node)
1454 1569 except CensoredNodeError:
1455 1570 # must pass the censored index flag to add censored revisions
1456 1571 if not flags & REVIDX_ISCENSORED:
1457 1572 raise
1458 1573 return btext[0]
1459 1574
1460 1575 def builddelta(rev):
1461 1576 # can we use the cached delta?
1462 1577 if cachedelta and cachedelta[0] == rev:
1463 1578 delta = cachedelta[1]
1464 1579 else:
1465 1580 t = buildtext()
1466 1581 if self.iscensored(rev):
1467 1582 # deltas based on a censored revision must replace the
1468 1583 # full content in one patch, so delta works everywhere
1469 1584 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1470 1585 delta = header + t
1471 1586 else:
1472 1587 if self._inline:
1473 1588 fh = ifh
1474 1589 else:
1475 1590 fh = dfh
1476 1591 ptext = self.revision(self.node(rev), _df=fh)
1477 1592 delta = mdiff.textdiff(ptext, t)
1478 1593 header, data = self.compress(delta)
1479 1594 deltalen = len(header) + len(data)
1480 1595 chainbase = self.chainbase(rev)
1481 1596 dist = deltalen + offset - self.start(chainbase)
1482 1597 if self._generaldelta:
1483 1598 base = rev
1484 1599 else:
1485 1600 base = chainbase
1486 1601 chainlen, compresseddeltalen = self._chaininfo(rev)
1487 1602 chainlen += 1
1488 1603 compresseddeltalen += deltalen
1489 1604 return (dist, deltalen, (header, data), base,
1490 1605 chainbase, chainlen, compresseddeltalen)
1491 1606
1492 1607 curr = len(self)
1493 1608 prev = curr - 1
1494 1609 offset = self.end(prev)
1495 1610 delta = None
1496 1611 p1r, p2r = self.rev(p1), self.rev(p2)
1497 1612
1498 1613 # full versions are inserted when the needed deltas
1499 1614 # become comparable to the uncompressed text
1500 1615 if text is None:
1501 1616 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1502 1617 cachedelta[1])
1503 1618 else:
1504 1619 textlen = len(text)
1505 1620
1506 1621 # should we try to build a delta?
1507 1622 if prev != nullrev and self.storedeltachains:
1508 1623 tested = set()
1509 1624 # This condition is true most of the time when processing
1510 1625 # changegroup data into a generaldelta repo. The only time it
1511 1626 # isn't true is if this is the first revision in a delta chain
1512 1627 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1513 1628 if cachedelta and self._generaldelta and self._lazydeltabase:
1514 1629 # Assume what we received from the server is a good choice
1515 1630 # build delta will reuse the cache
1516 1631 candidatedelta = builddelta(cachedelta[0])
1517 1632 tested.add(cachedelta[0])
1518 1633 if self._isgooddelta(candidatedelta, textlen):
1519 1634 delta = candidatedelta
1520 1635 if delta is None and self._generaldelta:
1521 1636 # exclude already lazy tested base if any
1522 1637 parents = [p for p in (p1r, p2r)
1523 1638 if p != nullrev and p not in tested]
1524 1639 if parents and not self._aggressivemergedeltas:
1525 1640 # Pick whichever parent is closer to us (to minimize the
1526 1641 # chance of having to build a fulltext).
1527 1642 parents = [max(parents)]
1528 1643 tested.update(parents)
1529 1644 pdeltas = []
1530 1645 for p in parents:
1531 1646 pd = builddelta(p)
1532 1647 if self._isgooddelta(pd, textlen):
1533 1648 pdeltas.append(pd)
1534 1649 if pdeltas:
1535 1650 delta = min(pdeltas, key=lambda x: x[1])
1536 1651 if delta is None and prev not in tested:
1537 1652 # other approach failed try against prev to hopefully save us a
1538 1653 # fulltext.
1539 1654 candidatedelta = builddelta(prev)
1540 1655 if self._isgooddelta(candidatedelta, textlen):
1541 1656 delta = candidatedelta
1542 1657 if delta is not None:
1543 1658 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1544 1659 else:
1545 1660 text = buildtext()
1546 1661 data = self.compress(text)
1547 1662 l = len(data[1]) + len(data[0])
1548 1663 base = chainbase = curr
1549 1664
1550 1665 e = (offset_type(offset, flags), l, textlen,
1551 1666 base, link, p1r, p2r, node)
1552 1667 self.index.insert(-1, e)
1553 1668 self.nodemap[node] = curr
1554 1669
1555 1670 entry = self._io.packentry(e, self.node, self.version, curr)
1556 1671 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1557 1672
1558 1673 if alwayscache and text is None:
1559 1674 text = buildtext()
1560 1675
1561 1676 if type(text) == str: # only accept immutable objects
1562 1677 self._cache = (node, curr, text)
1563 1678 self._chainbasecache[curr] = chainbase
1564 1679 return node
1565 1680
1566 1681 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1567 1682 # Files opened in a+ mode have inconsistent behavior on various
1568 1683 # platforms. Windows requires that a file positioning call be made
1569 1684 # when the file handle transitions between reads and writes. See
1570 1685 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1571 1686 # platforms, Python or the platform itself can be buggy. Some versions
1572 1687 # of Solaris have been observed to not append at the end of the file
1573 1688 # if the file was seeked to before the end. See issue4943 for more.
1574 1689 #
1575 1690 # We work around this issue by inserting a seek() before writing.
1576 1691 # Note: This is likely not necessary on Python 3.
1577 1692 ifh.seek(0, os.SEEK_END)
1578 1693 if dfh:
1579 1694 dfh.seek(0, os.SEEK_END)
1580 1695
1581 1696 curr = len(self) - 1
1582 1697 if not self._inline:
1583 1698 transaction.add(self.datafile, offset)
1584 1699 transaction.add(self.indexfile, curr * len(entry))
1585 1700 if data[0]:
1586 1701 dfh.write(data[0])
1587 1702 dfh.write(data[1])
1588 1703 ifh.write(entry)
1589 1704 else:
1590 1705 offset += curr * self._io.size
1591 1706 transaction.add(self.indexfile, offset, curr)
1592 1707 ifh.write(entry)
1593 1708 ifh.write(data[0])
1594 1709 ifh.write(data[1])
1595 1710 self.checkinlinesize(transaction, ifh)
1596 1711
1597 1712 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1598 1713 """
1599 1714 add a delta group
1600 1715
1601 1716 given a set of deltas, add them to the revision log. the
1602 1717 first delta is against its parent, which should be in our
1603 1718 log, the rest are against the previous delta.
1604 1719
1605 1720 If ``addrevisioncb`` is defined, it will be called with arguments of
1606 1721 this revlog and the node that was added.
1607 1722 """
1608 1723
1609 1724 # track the base of the current delta log
1610 1725 content = []
1611 1726 node = None
1612 1727
1613 1728 r = len(self)
1614 1729 end = 0
1615 1730 if r:
1616 1731 end = self.end(r - 1)
1617 1732 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1618 1733 isize = r * self._io.size
1619 1734 if self._inline:
1620 1735 transaction.add(self.indexfile, end + isize, r)
1621 1736 dfh = None
1622 1737 else:
1623 1738 transaction.add(self.indexfile, isize, r)
1624 1739 transaction.add(self.datafile, end)
1625 1740 dfh = self.opener(self.datafile, "a+")
1626 1741 def flush():
1627 1742 if dfh:
1628 1743 dfh.flush()
1629 1744 ifh.flush()
1630 1745 try:
1631 1746 # loop through our set of deltas
1632 1747 chain = None
1633 1748 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1634 1749 node = chunkdata['node']
1635 1750 p1 = chunkdata['p1']
1636 1751 p2 = chunkdata['p2']
1637 1752 cs = chunkdata['cs']
1638 1753 deltabase = chunkdata['deltabase']
1639 1754 delta = chunkdata['delta']
1640 1755 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1641 1756
1642 1757 content.append(node)
1643 1758
1644 1759 link = linkmapper(cs)
1645 1760 if node in self.nodemap:
1646 1761 # this can happen if two branches make the same change
1647 1762 chain = node
1648 1763 continue
1649 1764
1650 1765 for p in (p1, p2):
1651 1766 if p not in self.nodemap:
1652 1767 raise LookupError(p, self.indexfile,
1653 1768 _('unknown parent'))
1654 1769
1655 1770 if deltabase not in self.nodemap:
1656 1771 raise LookupError(deltabase, self.indexfile,
1657 1772 _('unknown delta base'))
1658 1773
1659 1774 baserev = self.rev(deltabase)
1660 1775
1661 1776 if baserev != nullrev and self.iscensored(baserev):
1662 1777 # if base is censored, delta must be full replacement in a
1663 1778 # single patch operation
1664 1779 hlen = struct.calcsize(">lll")
1665 1780 oldlen = self.rawsize(baserev)
1666 1781 newlen = len(delta) - hlen
1667 1782 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1668 1783 raise error.CensoredBaseError(self.indexfile,
1669 1784 self.node(baserev))
1670 1785
1671 1786 if not flags and self._peek_iscensored(baserev, delta, flush):
1672 1787 flags |= REVIDX_ISCENSORED
1673 1788
1674 1789 # We assume consumers of addrevisioncb will want to retrieve
1675 1790 # the added revision, which will require a call to
1676 1791 # revision(). revision() will fast path if there is a cache
1677 1792 # hit. So, we tell _addrevision() to always cache in this case.
1678 1793 # We're only using addgroup() in the context of changegroup
1679 1794 # generation so the revision data can always be handled as raw
1680 1795 # by the flagprocessor.
1681 1796 chain = self._addrevision(node, None, transaction, link,
1682 1797 p1, p2, flags, (baserev, delta),
1683 1798 ifh, dfh,
1684 1799 alwayscache=bool(addrevisioncb),
1685 1800 raw=True)
1686 1801
1687 1802 if addrevisioncb:
1688 1803 addrevisioncb(self, chain)
1689 1804
1690 1805 if not dfh and not self._inline:
1691 1806 # addrevision switched from inline to conventional
1692 1807 # reopen the index
1693 1808 ifh.close()
1694 1809 dfh = self.opener(self.datafile, "a+")
1695 1810 ifh = self.opener(self.indexfile, "a+",
1696 1811 checkambig=self._checkambig)
1697 1812 finally:
1698 1813 if dfh:
1699 1814 dfh.close()
1700 1815 ifh.close()
1701 1816
1702 1817 return content
1703 1818
1704 1819 def iscensored(self, rev):
1705 1820 """Check if a file revision is censored."""
1706 1821 return False
1707 1822
1708 1823 def _peek_iscensored(self, baserev, delta, flush):
1709 1824 """Quickly check if a delta produces a censored revision."""
1710 1825 return False
1711 1826
1712 1827 def getstrippoint(self, minlink):
1713 1828 """find the minimum rev that must be stripped to strip the linkrev
1714 1829
1715 1830 Returns a tuple containing the minimum rev and a set of all revs that
1716 1831 have linkrevs that will be broken by this strip.
1717 1832 """
1718 1833 brokenrevs = set()
1719 1834 strippoint = len(self)
1720 1835
1721 1836 heads = {}
1722 1837 futurelargelinkrevs = set()
1723 1838 for head in self.headrevs():
1724 1839 headlinkrev = self.linkrev(head)
1725 1840 heads[head] = headlinkrev
1726 1841 if headlinkrev >= minlink:
1727 1842 futurelargelinkrevs.add(headlinkrev)
1728 1843
1729 1844 # This algorithm involves walking down the rev graph, starting at the
1730 1845 # heads. Since the revs are topologically sorted according to linkrev,
1731 1846 # once all head linkrevs are below the minlink, we know there are
1732 1847 # no more revs that could have a linkrev greater than minlink.
1733 1848 # So we can stop walking.
1734 1849 while futurelargelinkrevs:
1735 1850 strippoint -= 1
1736 1851 linkrev = heads.pop(strippoint)
1737 1852
1738 1853 if linkrev < minlink:
1739 1854 brokenrevs.add(strippoint)
1740 1855 else:
1741 1856 futurelargelinkrevs.remove(linkrev)
1742 1857
1743 1858 for p in self.parentrevs(strippoint):
1744 1859 if p != nullrev:
1745 1860 plinkrev = self.linkrev(p)
1746 1861 heads[p] = plinkrev
1747 1862 if plinkrev >= minlink:
1748 1863 futurelargelinkrevs.add(plinkrev)
1749 1864
1750 1865 return strippoint, brokenrevs
1751 1866
1752 1867 def strip(self, minlink, transaction):
1753 1868 """truncate the revlog on the first revision with a linkrev >= minlink
1754 1869
1755 1870 This function is called when we're stripping revision minlink and
1756 1871 its descendants from the repository.
1757 1872
1758 1873 We have to remove all revisions with linkrev >= minlink, because
1759 1874 the equivalent changelog revisions will be renumbered after the
1760 1875 strip.
1761 1876
1762 1877 So we truncate the revlog on the first of these revisions, and
1763 1878 trust that the caller has saved the revisions that shouldn't be
1764 1879 removed and that it'll re-add them after this truncation.
1765 1880 """
1766 1881 if len(self) == 0:
1767 1882 return
1768 1883
1769 1884 rev, _ = self.getstrippoint(minlink)
1770 1885 if rev == len(self):
1771 1886 return
1772 1887
1773 1888 # first truncate the files on disk
1774 1889 end = self.start(rev)
1775 1890 if not self._inline:
1776 1891 transaction.add(self.datafile, end)
1777 1892 end = rev * self._io.size
1778 1893 else:
1779 1894 end += rev * self._io.size
1780 1895
1781 1896 transaction.add(self.indexfile, end)
1782 1897
1783 1898 # then reset internal state in memory to forget those revisions
1784 1899 self._cache = None
1785 1900 self._chaininfocache = {}
1786 1901 self._chunkclear()
1787 1902 for x in xrange(rev, len(self)):
1788 1903 del self.nodemap[self.node(x)]
1789 1904
1790 1905 del self.index[rev:-1]
1791 1906
1792 1907 def checksize(self):
1793 1908 expected = 0
1794 1909 if len(self):
1795 1910 expected = max(0, self.end(len(self) - 1))
1796 1911
1797 1912 try:
1798 1913 f = self.opener(self.datafile)
1799 1914 f.seek(0, 2)
1800 1915 actual = f.tell()
1801 1916 f.close()
1802 1917 dd = actual - expected
1803 1918 except IOError as inst:
1804 1919 if inst.errno != errno.ENOENT:
1805 1920 raise
1806 1921 dd = 0
1807 1922
1808 1923 try:
1809 1924 f = self.opener(self.indexfile)
1810 1925 f.seek(0, 2)
1811 1926 actual = f.tell()
1812 1927 f.close()
1813 1928 s = self._io.size
1814 1929 i = max(0, actual // s)
1815 1930 di = actual - (i * s)
1816 1931 if self._inline:
1817 1932 databytes = 0
1818 1933 for r in self:
1819 1934 databytes += max(0, self.length(r))
1820 1935 dd = 0
1821 1936 di = actual - len(self) * s - databytes
1822 1937 except IOError as inst:
1823 1938 if inst.errno != errno.ENOENT:
1824 1939 raise
1825 1940 di = 0
1826 1941
1827 1942 return (dd, di)
1828 1943
1829 1944 def files(self):
1830 1945 res = [self.indexfile]
1831 1946 if not self._inline:
1832 1947 res.append(self.datafile)
1833 1948 return res
@@ -1,3250 +1,3256 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import signal
31 31 import socket
32 32 import stat
33 33 import string
34 34 import subprocess
35 35 import sys
36 36 import tempfile
37 37 import textwrap
38 38 import time
39 39 import traceback
40 40 import zlib
41 41
42 42 from . import (
43 43 encoding,
44 44 error,
45 45 i18n,
46 46 osutil,
47 47 parsers,
48 48 pycompat,
49 49 )
50 50
51 51 empty = pycompat.empty
52 52 httplib = pycompat.httplib
53 53 httpserver = pycompat.httpserver
54 54 pickle = pycompat.pickle
55 55 queue = pycompat.queue
56 56 socketserver = pycompat.socketserver
57 57 stderr = pycompat.stderr
58 58 stdin = pycompat.stdin
59 59 stdout = pycompat.stdout
60 60 stringio = pycompat.stringio
61 61 urlerr = pycompat.urlerr
62 62 urlparse = pycompat.urlparse
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 if pycompat.osname == 'nt':
67 67 from . import windows as platform
68 68 stdout = platform.winstdout(pycompat.stdout)
69 69 else:
70 70 from . import posix as platform
71 71
72 72 _ = i18n._
73 73
74 74 bindunixsocket = platform.bindunixsocket
75 75 cachestat = platform.cachestat
76 76 checkexec = platform.checkexec
77 77 checklink = platform.checklink
78 78 copymode = platform.copymode
79 79 executablepath = platform.executablepath
80 80 expandglobs = platform.expandglobs
81 81 explainexit = platform.explainexit
82 82 findexe = platform.findexe
83 83 gethgcmd = platform.gethgcmd
84 84 getuser = platform.getuser
85 85 getpid = os.getpid
86 86 groupmembers = platform.groupmembers
87 87 groupname = platform.groupname
88 88 hidewindow = platform.hidewindow
89 89 isexec = platform.isexec
90 90 isowner = platform.isowner
91 91 localpath = platform.localpath
92 92 lookupreg = platform.lookupreg
93 93 makedir = platform.makedir
94 94 nlinks = platform.nlinks
95 95 normpath = platform.normpath
96 96 normcase = platform.normcase
97 97 normcasespec = platform.normcasespec
98 98 normcasefallback = platform.normcasefallback
99 99 openhardlinks = platform.openhardlinks
100 100 oslink = platform.oslink
101 101 parsepatchoutput = platform.parsepatchoutput
102 102 pconvert = platform.pconvert
103 103 poll = platform.poll
104 104 popen = platform.popen
105 105 posixfile = platform.posixfile
106 106 quotecommand = platform.quotecommand
107 107 readpipe = platform.readpipe
108 108 rename = platform.rename
109 109 removedirs = platform.removedirs
110 110 samedevice = platform.samedevice
111 111 samefile = platform.samefile
112 112 samestat = platform.samestat
113 113 setbinary = platform.setbinary
114 114 setflags = platform.setflags
115 115 setsignalhandler = platform.setsignalhandler
116 116 shellquote = platform.shellquote
117 117 spawndetached = platform.spawndetached
118 118 split = platform.split
119 119 sshargs = platform.sshargs
120 120 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
121 121 statisexec = platform.statisexec
122 122 statislink = platform.statislink
123 123 testpid = platform.testpid
124 124 umask = platform.umask
125 125 unlink = platform.unlink
126 126 unlinkpath = platform.unlinkpath
127 127 username = platform.username
128 128
129 129 # Python compatibility
130 130
131 131 _notset = object()
132 132
133 133 # disable Python's problematic floating point timestamps (issue4836)
134 134 # (Python hypocritically says you shouldn't change this behavior in
135 135 # libraries, and sure enough Mercurial is not a library.)
136 136 os.stat_float_times(False)
137 137
138 138 def safehasattr(thing, attr):
139 139 return getattr(thing, attr, _notset) is not _notset
140 140
141 def bitsfrom(container):
142 bits = 0
143 for bit in container:
144 bits |= bit
145 return bits
146
141 147 DIGESTS = {
142 148 'md5': hashlib.md5,
143 149 'sha1': hashlib.sha1,
144 150 'sha512': hashlib.sha512,
145 151 }
146 152 # List of digest types from strongest to weakest
147 153 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
148 154
149 155 for k in DIGESTS_BY_STRENGTH:
150 156 assert k in DIGESTS
151 157
152 158 class digester(object):
153 159 """helper to compute digests.
154 160
155 161 This helper can be used to compute one or more digests given their name.
156 162
157 163 >>> d = digester(['md5', 'sha1'])
158 164 >>> d.update('foo')
159 165 >>> [k for k in sorted(d)]
160 166 ['md5', 'sha1']
161 167 >>> d['md5']
162 168 'acbd18db4cc2f85cedef654fccc4a4d8'
163 169 >>> d['sha1']
164 170 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
165 171 >>> digester.preferred(['md5', 'sha1'])
166 172 'sha1'
167 173 """
168 174
169 175 def __init__(self, digests, s=''):
170 176 self._hashes = {}
171 177 for k in digests:
172 178 if k not in DIGESTS:
173 179 raise Abort(_('unknown digest type: %s') % k)
174 180 self._hashes[k] = DIGESTS[k]()
175 181 if s:
176 182 self.update(s)
177 183
178 184 def update(self, data):
179 185 for h in self._hashes.values():
180 186 h.update(data)
181 187
182 188 def __getitem__(self, key):
183 189 if key not in DIGESTS:
184 190 raise Abort(_('unknown digest type: %s') % k)
185 191 return self._hashes[key].hexdigest()
186 192
187 193 def __iter__(self):
188 194 return iter(self._hashes)
189 195
190 196 @staticmethod
191 197 def preferred(supported):
192 198 """returns the strongest digest type in both supported and DIGESTS."""
193 199
194 200 for k in DIGESTS_BY_STRENGTH:
195 201 if k in supported:
196 202 return k
197 203 return None
198 204
199 205 class digestchecker(object):
200 206 """file handle wrapper that additionally checks content against a given
201 207 size and digests.
202 208
203 209 d = digestchecker(fh, size, {'md5': '...'})
204 210
205 211 When multiple digests are given, all of them are validated.
206 212 """
207 213
208 214 def __init__(self, fh, size, digests):
209 215 self._fh = fh
210 216 self._size = size
211 217 self._got = 0
212 218 self._digests = dict(digests)
213 219 self._digester = digester(self._digests.keys())
214 220
215 221 def read(self, length=-1):
216 222 content = self._fh.read(length)
217 223 self._digester.update(content)
218 224 self._got += len(content)
219 225 return content
220 226
221 227 def validate(self):
222 228 if self._size != self._got:
223 229 raise Abort(_('size mismatch: expected %d, got %d') %
224 230 (self._size, self._got))
225 231 for k, v in self._digests.items():
226 232 if v != self._digester[k]:
227 233 # i18n: first parameter is a digest name
228 234 raise Abort(_('%s mismatch: expected %s, got %s') %
229 235 (k, v, self._digester[k]))
230 236
231 237 try:
232 238 buffer = buffer
233 239 except NameError:
234 240 if not pycompat.ispy3:
235 241 def buffer(sliceable, offset=0):
236 242 return sliceable[offset:]
237 243 else:
238 244 def buffer(sliceable, offset=0):
239 245 return memoryview(sliceable)[offset:]
240 246
241 247 closefds = pycompat.osname == 'posix'
242 248
243 249 _chunksize = 4096
244 250
245 251 class bufferedinputpipe(object):
246 252 """a manually buffered input pipe
247 253
248 254 Python will not let us use buffered IO and lazy reading with 'polling' at
249 255 the same time. We cannot probe the buffer state and select will not detect
250 256 that data are ready to read if they are already buffered.
251 257
252 258 This class let us work around that by implementing its own buffering
253 259 (allowing efficient readline) while offering a way to know if the buffer is
254 260 empty from the output (allowing collaboration of the buffer with polling).
255 261
256 262 This class lives in the 'util' module because it makes use of the 'os'
257 263 module from the python stdlib.
258 264 """
259 265
260 266 def __init__(self, input):
261 267 self._input = input
262 268 self._buffer = []
263 269 self._eof = False
264 270 self._lenbuf = 0
265 271
266 272 @property
267 273 def hasbuffer(self):
268 274 """True is any data is currently buffered
269 275
270 276 This will be used externally a pre-step for polling IO. If there is
271 277 already data then no polling should be set in place."""
272 278 return bool(self._buffer)
273 279
274 280 @property
275 281 def closed(self):
276 282 return self._input.closed
277 283
278 284 def fileno(self):
279 285 return self._input.fileno()
280 286
281 287 def close(self):
282 288 return self._input.close()
283 289
284 290 def read(self, size):
285 291 while (not self._eof) and (self._lenbuf < size):
286 292 self._fillbuffer()
287 293 return self._frombuffer(size)
288 294
289 295 def readline(self, *args, **kwargs):
290 296 if 1 < len(self._buffer):
291 297 # this should not happen because both read and readline end with a
292 298 # _frombuffer call that collapse it.
293 299 self._buffer = [''.join(self._buffer)]
294 300 self._lenbuf = len(self._buffer[0])
295 301 lfi = -1
296 302 if self._buffer:
297 303 lfi = self._buffer[-1].find('\n')
298 304 while (not self._eof) and lfi < 0:
299 305 self._fillbuffer()
300 306 if self._buffer:
301 307 lfi = self._buffer[-1].find('\n')
302 308 size = lfi + 1
303 309 if lfi < 0: # end of file
304 310 size = self._lenbuf
305 311 elif 1 < len(self._buffer):
306 312 # we need to take previous chunks into account
307 313 size += self._lenbuf - len(self._buffer[-1])
308 314 return self._frombuffer(size)
309 315
310 316 def _frombuffer(self, size):
311 317 """return at most 'size' data from the buffer
312 318
313 319 The data are removed from the buffer."""
314 320 if size == 0 or not self._buffer:
315 321 return ''
316 322 buf = self._buffer[0]
317 323 if 1 < len(self._buffer):
318 324 buf = ''.join(self._buffer)
319 325
320 326 data = buf[:size]
321 327 buf = buf[len(data):]
322 328 if buf:
323 329 self._buffer = [buf]
324 330 self._lenbuf = len(buf)
325 331 else:
326 332 self._buffer = []
327 333 self._lenbuf = 0
328 334 return data
329 335
330 336 def _fillbuffer(self):
331 337 """read data to the buffer"""
332 338 data = os.read(self._input.fileno(), _chunksize)
333 339 if not data:
334 340 self._eof = True
335 341 else:
336 342 self._lenbuf += len(data)
337 343 self._buffer.append(data)
338 344
339 345 def popen2(cmd, env=None, newlines=False):
340 346 # Setting bufsize to -1 lets the system decide the buffer size.
341 347 # The default for bufsize is 0, meaning unbuffered. This leads to
342 348 # poor performance on Mac OS X: http://bugs.python.org/issue4194
343 349 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
344 350 close_fds=closefds,
345 351 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
346 352 universal_newlines=newlines,
347 353 env=env)
348 354 return p.stdin, p.stdout
349 355
350 356 def popen3(cmd, env=None, newlines=False):
351 357 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
352 358 return stdin, stdout, stderr
353 359
354 360 def popen4(cmd, env=None, newlines=False, bufsize=-1):
355 361 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
356 362 close_fds=closefds,
357 363 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
358 364 stderr=subprocess.PIPE,
359 365 universal_newlines=newlines,
360 366 env=env)
361 367 return p.stdin, p.stdout, p.stderr, p
362 368
363 369 def version():
364 370 """Return version information if available."""
365 371 try:
366 372 from . import __version__
367 373 return __version__.version
368 374 except ImportError:
369 375 return 'unknown'
370 376
371 377 def versiontuple(v=None, n=4):
372 378 """Parses a Mercurial version string into an N-tuple.
373 379
374 380 The version string to be parsed is specified with the ``v`` argument.
375 381 If it isn't defined, the current Mercurial version string will be parsed.
376 382
377 383 ``n`` can be 2, 3, or 4. Here is how some version strings map to
378 384 returned values:
379 385
380 386 >>> v = '3.6.1+190-df9b73d2d444'
381 387 >>> versiontuple(v, 2)
382 388 (3, 6)
383 389 >>> versiontuple(v, 3)
384 390 (3, 6, 1)
385 391 >>> versiontuple(v, 4)
386 392 (3, 6, 1, '190-df9b73d2d444')
387 393
388 394 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
389 395 (3, 6, 1, '190-df9b73d2d444+20151118')
390 396
391 397 >>> v = '3.6'
392 398 >>> versiontuple(v, 2)
393 399 (3, 6)
394 400 >>> versiontuple(v, 3)
395 401 (3, 6, None)
396 402 >>> versiontuple(v, 4)
397 403 (3, 6, None, None)
398 404
399 405 >>> v = '3.9-rc'
400 406 >>> versiontuple(v, 2)
401 407 (3, 9)
402 408 >>> versiontuple(v, 3)
403 409 (3, 9, None)
404 410 >>> versiontuple(v, 4)
405 411 (3, 9, None, 'rc')
406 412
407 413 >>> v = '3.9-rc+2-02a8fea4289b'
408 414 >>> versiontuple(v, 2)
409 415 (3, 9)
410 416 >>> versiontuple(v, 3)
411 417 (3, 9, None)
412 418 >>> versiontuple(v, 4)
413 419 (3, 9, None, 'rc+2-02a8fea4289b')
414 420 """
415 421 if not v:
416 422 v = version()
417 423 parts = remod.split('[\+-]', v, 1)
418 424 if len(parts) == 1:
419 425 vparts, extra = parts[0], None
420 426 else:
421 427 vparts, extra = parts
422 428
423 429 vints = []
424 430 for i in vparts.split('.'):
425 431 try:
426 432 vints.append(int(i))
427 433 except ValueError:
428 434 break
429 435 # (3, 6) -> (3, 6, None)
430 436 while len(vints) < 3:
431 437 vints.append(None)
432 438
433 439 if n == 2:
434 440 return (vints[0], vints[1])
435 441 if n == 3:
436 442 return (vints[0], vints[1], vints[2])
437 443 if n == 4:
438 444 return (vints[0], vints[1], vints[2], extra)
439 445
440 446 # used by parsedate
441 447 defaultdateformats = (
442 448 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
443 449 '%Y-%m-%dT%H:%M', # without seconds
444 450 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
445 451 '%Y-%m-%dT%H%M', # without seconds
446 452 '%Y-%m-%d %H:%M:%S', # our common legal variant
447 453 '%Y-%m-%d %H:%M', # without seconds
448 454 '%Y-%m-%d %H%M%S', # without :
449 455 '%Y-%m-%d %H%M', # without seconds
450 456 '%Y-%m-%d %I:%M:%S%p',
451 457 '%Y-%m-%d %H:%M',
452 458 '%Y-%m-%d %I:%M%p',
453 459 '%Y-%m-%d',
454 460 '%m-%d',
455 461 '%m/%d',
456 462 '%m/%d/%y',
457 463 '%m/%d/%Y',
458 464 '%a %b %d %H:%M:%S %Y',
459 465 '%a %b %d %I:%M:%S%p %Y',
460 466 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
461 467 '%b %d %H:%M:%S %Y',
462 468 '%b %d %I:%M:%S%p %Y',
463 469 '%b %d %H:%M:%S',
464 470 '%b %d %I:%M:%S%p',
465 471 '%b %d %H:%M',
466 472 '%b %d %I:%M%p',
467 473 '%b %d %Y',
468 474 '%b %d',
469 475 '%H:%M:%S',
470 476 '%I:%M:%S%p',
471 477 '%H:%M',
472 478 '%I:%M%p',
473 479 )
474 480
475 481 extendeddateformats = defaultdateformats + (
476 482 "%Y",
477 483 "%Y-%m",
478 484 "%b",
479 485 "%b %Y",
480 486 )
481 487
482 488 def cachefunc(func):
483 489 '''cache the result of function calls'''
484 490 # XXX doesn't handle keywords args
485 491 if func.__code__.co_argcount == 0:
486 492 cache = []
487 493 def f():
488 494 if len(cache) == 0:
489 495 cache.append(func())
490 496 return cache[0]
491 497 return f
492 498 cache = {}
493 499 if func.__code__.co_argcount == 1:
494 500 # we gain a small amount of time because
495 501 # we don't need to pack/unpack the list
496 502 def f(arg):
497 503 if arg not in cache:
498 504 cache[arg] = func(arg)
499 505 return cache[arg]
500 506 else:
501 507 def f(*args):
502 508 if args not in cache:
503 509 cache[args] = func(*args)
504 510 return cache[args]
505 511
506 512 return f
507 513
508 514 class sortdict(dict):
509 515 '''a simple sorted dictionary'''
510 516 def __init__(self, data=None):
511 517 self._list = []
512 518 if data:
513 519 self.update(data)
514 520 def copy(self):
515 521 return sortdict(self)
516 522 def __setitem__(self, key, val):
517 523 if key in self:
518 524 self._list.remove(key)
519 525 self._list.append(key)
520 526 dict.__setitem__(self, key, val)
521 527 def __iter__(self):
522 528 return self._list.__iter__()
523 529 def update(self, src):
524 530 if isinstance(src, dict):
525 531 src = src.iteritems()
526 532 for k, v in src:
527 533 self[k] = v
528 534 def clear(self):
529 535 dict.clear(self)
530 536 self._list = []
531 537 def items(self):
532 538 return [(k, self[k]) for k in self._list]
533 539 def __delitem__(self, key):
534 540 dict.__delitem__(self, key)
535 541 self._list.remove(key)
536 542 def pop(self, key, *args, **kwargs):
537 543 dict.pop(self, key, *args, **kwargs)
538 544 try:
539 545 self._list.remove(key)
540 546 except ValueError:
541 547 pass
542 548 def keys(self):
543 549 return self._list
544 550 def iterkeys(self):
545 551 return self._list.__iter__()
546 552 def iteritems(self):
547 553 for k in self._list:
548 554 yield k, self[k]
549 555 def insert(self, index, key, val):
550 556 self._list.insert(index, key)
551 557 dict.__setitem__(self, key, val)
552 558 def __repr__(self):
553 559 if not self:
554 560 return '%s()' % self.__class__.__name__
555 561 return '%s(%r)' % (self.__class__.__name__, self.items())
556 562
557 563 class _lrucachenode(object):
558 564 """A node in a doubly linked list.
559 565
560 566 Holds a reference to nodes on either side as well as a key-value
561 567 pair for the dictionary entry.
562 568 """
563 569 __slots__ = (u'next', u'prev', u'key', u'value')
564 570
565 571 def __init__(self):
566 572 self.next = None
567 573 self.prev = None
568 574
569 575 self.key = _notset
570 576 self.value = None
571 577
572 578 def markempty(self):
573 579 """Mark the node as emptied."""
574 580 self.key = _notset
575 581
576 582 class lrucachedict(object):
577 583 """Dict that caches most recent accesses and sets.
578 584
579 585 The dict consists of an actual backing dict - indexed by original
580 586 key - and a doubly linked circular list defining the order of entries in
581 587 the cache.
582 588
583 589 The head node is the newest entry in the cache. If the cache is full,
584 590 we recycle head.prev and make it the new head. Cache accesses result in
585 591 the node being moved to before the existing head and being marked as the
586 592 new head node.
587 593 """
588 594 def __init__(self, max):
589 595 self._cache = {}
590 596
591 597 self._head = head = _lrucachenode()
592 598 head.prev = head
593 599 head.next = head
594 600 self._size = 1
595 601 self._capacity = max
596 602
597 603 def __len__(self):
598 604 return len(self._cache)
599 605
600 606 def __contains__(self, k):
601 607 return k in self._cache
602 608
603 609 def __iter__(self):
604 610 # We don't have to iterate in cache order, but why not.
605 611 n = self._head
606 612 for i in range(len(self._cache)):
607 613 yield n.key
608 614 n = n.next
609 615
610 616 def __getitem__(self, k):
611 617 node = self._cache[k]
612 618 self._movetohead(node)
613 619 return node.value
614 620
615 621 def __setitem__(self, k, v):
616 622 node = self._cache.get(k)
617 623 # Replace existing value and mark as newest.
618 624 if node is not None:
619 625 node.value = v
620 626 self._movetohead(node)
621 627 return
622 628
623 629 if self._size < self._capacity:
624 630 node = self._addcapacity()
625 631 else:
626 632 # Grab the last/oldest item.
627 633 node = self._head.prev
628 634
629 635 # At capacity. Kill the old entry.
630 636 if node.key is not _notset:
631 637 del self._cache[node.key]
632 638
633 639 node.key = k
634 640 node.value = v
635 641 self._cache[k] = node
636 642 # And mark it as newest entry. No need to adjust order since it
637 643 # is already self._head.prev.
638 644 self._head = node
639 645
640 646 def __delitem__(self, k):
641 647 node = self._cache.pop(k)
642 648 node.markempty()
643 649
644 650 # Temporarily mark as newest item before re-adjusting head to make
645 651 # this node the oldest item.
646 652 self._movetohead(node)
647 653 self._head = node.next
648 654
649 655 # Additional dict methods.
650 656
651 657 def get(self, k, default=None):
652 658 try:
653 659 return self._cache[k].value
654 660 except KeyError:
655 661 return default
656 662
657 663 def clear(self):
658 664 n = self._head
659 665 while n.key is not _notset:
660 666 n.markempty()
661 667 n = n.next
662 668
663 669 self._cache.clear()
664 670
665 671 def copy(self):
666 672 result = lrucachedict(self._capacity)
667 673 n = self._head.prev
668 674 # Iterate in oldest-to-newest order, so the copy has the right ordering
669 675 for i in range(len(self._cache)):
670 676 result[n.key] = n.value
671 677 n = n.prev
672 678 return result
673 679
674 680 def _movetohead(self, node):
675 681 """Mark a node as the newest, making it the new head.
676 682
677 683 When a node is accessed, it becomes the freshest entry in the LRU
678 684 list, which is denoted by self._head.
679 685
680 686 Visually, let's make ``N`` the new head node (* denotes head):
681 687
682 688 previous/oldest <-> head <-> next/next newest
683 689
684 690 ----<->--- A* ---<->-----
685 691 | |
686 692 E <-> D <-> N <-> C <-> B
687 693
688 694 To:
689 695
690 696 ----<->--- N* ---<->-----
691 697 | |
692 698 E <-> D <-> C <-> B <-> A
693 699
694 700 This requires the following moves:
695 701
696 702 C.next = D (node.prev.next = node.next)
697 703 D.prev = C (node.next.prev = node.prev)
698 704 E.next = N (head.prev.next = node)
699 705 N.prev = E (node.prev = head.prev)
700 706 N.next = A (node.next = head)
701 707 A.prev = N (head.prev = node)
702 708 """
703 709 head = self._head
704 710 # C.next = D
705 711 node.prev.next = node.next
706 712 # D.prev = C
707 713 node.next.prev = node.prev
708 714 # N.prev = E
709 715 node.prev = head.prev
710 716 # N.next = A
711 717 # It is tempting to do just "head" here, however if node is
712 718 # adjacent to head, this will do bad things.
713 719 node.next = head.prev.next
714 720 # E.next = N
715 721 node.next.prev = node
716 722 # A.prev = N
717 723 node.prev.next = node
718 724
719 725 self._head = node
720 726
721 727 def _addcapacity(self):
722 728 """Add a node to the circular linked list.
723 729
724 730 The new node is inserted before the head node.
725 731 """
726 732 head = self._head
727 733 node = _lrucachenode()
728 734 head.prev.next = node
729 735 node.prev = head.prev
730 736 node.next = head
731 737 head.prev = node
732 738 self._size += 1
733 739 return node
734 740
735 741 def lrucachefunc(func):
736 742 '''cache most recent results of function calls'''
737 743 cache = {}
738 744 order = collections.deque()
739 745 if func.__code__.co_argcount == 1:
740 746 def f(arg):
741 747 if arg not in cache:
742 748 if len(cache) > 20:
743 749 del cache[order.popleft()]
744 750 cache[arg] = func(arg)
745 751 else:
746 752 order.remove(arg)
747 753 order.append(arg)
748 754 return cache[arg]
749 755 else:
750 756 def f(*args):
751 757 if args not in cache:
752 758 if len(cache) > 20:
753 759 del cache[order.popleft()]
754 760 cache[args] = func(*args)
755 761 else:
756 762 order.remove(args)
757 763 order.append(args)
758 764 return cache[args]
759 765
760 766 return f
761 767
762 768 class propertycache(object):
763 769 def __init__(self, func):
764 770 self.func = func
765 771 self.name = func.__name__
766 772 def __get__(self, obj, type=None):
767 773 result = self.func(obj)
768 774 self.cachevalue(obj, result)
769 775 return result
770 776
771 777 def cachevalue(self, obj, value):
772 778 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
773 779 obj.__dict__[self.name] = value
774 780
775 781 def pipefilter(s, cmd):
776 782 '''filter string S through command CMD, returning its output'''
777 783 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
778 784 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
779 785 pout, perr = p.communicate(s)
780 786 return pout
781 787
782 788 def tempfilter(s, cmd):
783 789 '''filter string S through a pair of temporary files with CMD.
784 790 CMD is used as a template to create the real command to be run,
785 791 with the strings INFILE and OUTFILE replaced by the real names of
786 792 the temporary files generated.'''
787 793 inname, outname = None, None
788 794 try:
789 795 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
790 796 fp = os.fdopen(infd, 'wb')
791 797 fp.write(s)
792 798 fp.close()
793 799 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
794 800 os.close(outfd)
795 801 cmd = cmd.replace('INFILE', inname)
796 802 cmd = cmd.replace('OUTFILE', outname)
797 803 code = os.system(cmd)
798 804 if pycompat.sysplatform == 'OpenVMS' and code & 1:
799 805 code = 0
800 806 if code:
801 807 raise Abort(_("command '%s' failed: %s") %
802 808 (cmd, explainexit(code)))
803 809 return readfile(outname)
804 810 finally:
805 811 try:
806 812 if inname:
807 813 os.unlink(inname)
808 814 except OSError:
809 815 pass
810 816 try:
811 817 if outname:
812 818 os.unlink(outname)
813 819 except OSError:
814 820 pass
815 821
816 822 filtertable = {
817 823 'tempfile:': tempfilter,
818 824 'pipe:': pipefilter,
819 825 }
820 826
821 827 def filter(s, cmd):
822 828 "filter a string through a command that transforms its input to its output"
823 829 for name, fn in filtertable.iteritems():
824 830 if cmd.startswith(name):
825 831 return fn(s, cmd[len(name):].lstrip())
826 832 return pipefilter(s, cmd)
827 833
828 834 def binary(s):
829 835 """return true if a string is binary data"""
830 836 return bool(s and '\0' in s)
831 837
832 838 def increasingchunks(source, min=1024, max=65536):
833 839 '''return no less than min bytes per chunk while data remains,
834 840 doubling min after each chunk until it reaches max'''
835 841 def log2(x):
836 842 if not x:
837 843 return 0
838 844 i = 0
839 845 while x:
840 846 x >>= 1
841 847 i += 1
842 848 return i - 1
843 849
844 850 buf = []
845 851 blen = 0
846 852 for chunk in source:
847 853 buf.append(chunk)
848 854 blen += len(chunk)
849 855 if blen >= min:
850 856 if min < max:
851 857 min = min << 1
852 858 nmin = 1 << log2(blen)
853 859 if nmin > min:
854 860 min = nmin
855 861 if min > max:
856 862 min = max
857 863 yield ''.join(buf)
858 864 blen = 0
859 865 buf = []
860 866 if buf:
861 867 yield ''.join(buf)
862 868
863 869 Abort = error.Abort
864 870
865 871 def always(fn):
866 872 return True
867 873
868 874 def never(fn):
869 875 return False
870 876
871 877 def nogc(func):
872 878 """disable garbage collector
873 879
874 880 Python's garbage collector triggers a GC each time a certain number of
875 881 container objects (the number being defined by gc.get_threshold()) are
876 882 allocated even when marked not to be tracked by the collector. Tracking has
877 883 no effect on when GCs are triggered, only on what objects the GC looks
878 884 into. As a workaround, disable GC while building complex (huge)
879 885 containers.
880 886
881 887 This garbage collector issue have been fixed in 2.7.
882 888 """
883 889 if sys.version_info >= (2, 7):
884 890 return func
885 891 def wrapper(*args, **kwargs):
886 892 gcenabled = gc.isenabled()
887 893 gc.disable()
888 894 try:
889 895 return func(*args, **kwargs)
890 896 finally:
891 897 if gcenabled:
892 898 gc.enable()
893 899 return wrapper
894 900
895 901 def pathto(root, n1, n2):
896 902 '''return the relative path from one place to another.
897 903 root should use os.sep to separate directories
898 904 n1 should use os.sep to separate directories
899 905 n2 should use "/" to separate directories
900 906 returns an os.sep-separated path.
901 907
902 908 If n1 is a relative path, it's assumed it's
903 909 relative to root.
904 910 n2 should always be relative to root.
905 911 '''
906 912 if not n1:
907 913 return localpath(n2)
908 914 if os.path.isabs(n1):
909 915 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
910 916 return os.path.join(root, localpath(n2))
911 917 n2 = '/'.join((pconvert(root), n2))
912 918 a, b = splitpath(n1), n2.split('/')
913 919 a.reverse()
914 920 b.reverse()
915 921 while a and b and a[-1] == b[-1]:
916 922 a.pop()
917 923 b.pop()
918 924 b.reverse()
919 925 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
920 926
921 927 def mainfrozen():
922 928 """return True if we are a frozen executable.
923 929
924 930 The code supports py2exe (most common, Windows only) and tools/freeze
925 931 (portable, not much used).
926 932 """
927 933 return (safehasattr(sys, "frozen") or # new py2exe
928 934 safehasattr(sys, "importers") or # old py2exe
929 935 imp.is_frozen(u"__main__")) # tools/freeze
930 936
931 937 # the location of data files matching the source code
932 938 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
933 939 # executable version (py2exe) doesn't support __file__
934 940 datapath = os.path.dirname(pycompat.sysexecutable)
935 941 else:
936 942 datapath = os.path.dirname(__file__)
937 943
938 944 if not isinstance(datapath, bytes):
939 945 datapath = pycompat.fsencode(datapath)
940 946
941 947 i18n.setdatapath(datapath)
942 948
943 949 _hgexecutable = None
944 950
945 951 def hgexecutable():
946 952 """return location of the 'hg' executable.
947 953
948 954 Defaults to $HG or 'hg' in the search path.
949 955 """
950 956 if _hgexecutable is None:
951 957 hg = encoding.environ.get('HG')
952 958 mainmod = sys.modules['__main__']
953 959 if hg:
954 960 _sethgexecutable(hg)
955 961 elif mainfrozen():
956 962 if getattr(sys, 'frozen', None) == 'macosx_app':
957 963 # Env variable set by py2app
958 964 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
959 965 else:
960 966 _sethgexecutable(pycompat.sysexecutable)
961 967 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
962 968 _sethgexecutable(mainmod.__file__)
963 969 else:
964 970 exe = findexe('hg') or os.path.basename(sys.argv[0])
965 971 _sethgexecutable(exe)
966 972 return _hgexecutable
967 973
968 974 def _sethgexecutable(path):
969 975 """set location of the 'hg' executable"""
970 976 global _hgexecutable
971 977 _hgexecutable = path
972 978
973 979 def _isstdout(f):
974 980 fileno = getattr(f, 'fileno', None)
975 981 return fileno and fileno() == sys.__stdout__.fileno()
976 982
977 983 def shellenviron(environ=None):
978 984 """return environ with optional override, useful for shelling out"""
979 985 def py2shell(val):
980 986 'convert python object into string that is useful to shell'
981 987 if val is None or val is False:
982 988 return '0'
983 989 if val is True:
984 990 return '1'
985 991 return str(val)
986 992 env = dict(encoding.environ)
987 993 if environ:
988 994 env.update((k, py2shell(v)) for k, v in environ.iteritems())
989 995 env['HG'] = hgexecutable()
990 996 return env
991 997
992 998 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
993 999 '''enhanced shell command execution.
994 1000 run with environment maybe modified, maybe in different dir.
995 1001
996 1002 if command fails and onerr is None, return status, else raise onerr
997 1003 object as exception.
998 1004
999 1005 if out is specified, it is assumed to be a file-like object that has a
1000 1006 write() method. stdout and stderr will be redirected to out.'''
1001 1007 try:
1002 1008 stdout.flush()
1003 1009 except Exception:
1004 1010 pass
1005 1011 origcmd = cmd
1006 1012 cmd = quotecommand(cmd)
1007 1013 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1008 1014 and sys.version_info[1] < 7):
1009 1015 # subprocess kludge to work around issues in half-baked Python
1010 1016 # ports, notably bichued/python:
1011 1017 if not cwd is None:
1012 1018 os.chdir(cwd)
1013 1019 rc = os.system(cmd)
1014 1020 else:
1015 1021 env = shellenviron(environ)
1016 1022 if out is None or _isstdout(out):
1017 1023 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1018 1024 env=env, cwd=cwd)
1019 1025 else:
1020 1026 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1021 1027 env=env, cwd=cwd, stdout=subprocess.PIPE,
1022 1028 stderr=subprocess.STDOUT)
1023 1029 for line in iter(proc.stdout.readline, ''):
1024 1030 out.write(line)
1025 1031 proc.wait()
1026 1032 rc = proc.returncode
1027 1033 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1028 1034 rc = 0
1029 1035 if rc and onerr:
1030 1036 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
1031 1037 explainexit(rc)[0])
1032 1038 if errprefix:
1033 1039 errmsg = '%s: %s' % (errprefix, errmsg)
1034 1040 raise onerr(errmsg)
1035 1041 return rc
1036 1042
1037 1043 def checksignature(func):
1038 1044 '''wrap a function with code to check for calling errors'''
1039 1045 def check(*args, **kwargs):
1040 1046 try:
1041 1047 return func(*args, **kwargs)
1042 1048 except TypeError:
1043 1049 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1044 1050 raise error.SignatureError
1045 1051 raise
1046 1052
1047 1053 return check
1048 1054
1049 1055 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1050 1056 '''copy a file, preserving mode and optionally other stat info like
1051 1057 atime/mtime
1052 1058
1053 1059 checkambig argument is used with filestat, and is useful only if
1054 1060 destination file is guarded by any lock (e.g. repo.lock or
1055 1061 repo.wlock).
1056 1062
1057 1063 copystat and checkambig should be exclusive.
1058 1064 '''
1059 1065 assert not (copystat and checkambig)
1060 1066 oldstat = None
1061 1067 if os.path.lexists(dest):
1062 1068 if checkambig:
1063 1069 oldstat = checkambig and filestat(dest)
1064 1070 unlink(dest)
1065 1071 # hardlinks are problematic on CIFS, quietly ignore this flag
1066 1072 # until we find a way to work around it cleanly (issue4546)
1067 1073 if False and hardlink:
1068 1074 try:
1069 1075 oslink(src, dest)
1070 1076 return
1071 1077 except (IOError, OSError):
1072 1078 pass # fall back to normal copy
1073 1079 if os.path.islink(src):
1074 1080 os.symlink(os.readlink(src), dest)
1075 1081 # copytime is ignored for symlinks, but in general copytime isn't needed
1076 1082 # for them anyway
1077 1083 else:
1078 1084 try:
1079 1085 shutil.copyfile(src, dest)
1080 1086 if copystat:
1081 1087 # copystat also copies mode
1082 1088 shutil.copystat(src, dest)
1083 1089 else:
1084 1090 shutil.copymode(src, dest)
1085 1091 if oldstat and oldstat.stat:
1086 1092 newstat = filestat(dest)
1087 1093 if newstat.isambig(oldstat):
1088 1094 # stat of copied file is ambiguous to original one
1089 1095 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1090 1096 os.utime(dest, (advanced, advanced))
1091 1097 except shutil.Error as inst:
1092 1098 raise Abort(str(inst))
1093 1099
1094 1100 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1095 1101 """Copy a directory tree using hardlinks if possible."""
1096 1102 num = 0
1097 1103
1098 1104 if hardlink is None:
1099 1105 hardlink = (os.stat(src).st_dev ==
1100 1106 os.stat(os.path.dirname(dst)).st_dev)
1101 1107 if hardlink:
1102 1108 topic = _('linking')
1103 1109 else:
1104 1110 topic = _('copying')
1105 1111
1106 1112 if os.path.isdir(src):
1107 1113 os.mkdir(dst)
1108 1114 for name, kind in osutil.listdir(src):
1109 1115 srcname = os.path.join(src, name)
1110 1116 dstname = os.path.join(dst, name)
1111 1117 def nprog(t, pos):
1112 1118 if pos is not None:
1113 1119 return progress(t, pos + num)
1114 1120 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1115 1121 num += n
1116 1122 else:
1117 1123 if hardlink:
1118 1124 try:
1119 1125 oslink(src, dst)
1120 1126 except (IOError, OSError):
1121 1127 hardlink = False
1122 1128 shutil.copy(src, dst)
1123 1129 else:
1124 1130 shutil.copy(src, dst)
1125 1131 num += 1
1126 1132 progress(topic, num)
1127 1133 progress(topic, None)
1128 1134
1129 1135 return hardlink, num
1130 1136
1131 1137 _winreservednames = '''con prn aux nul
1132 1138 com1 com2 com3 com4 com5 com6 com7 com8 com9
1133 1139 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1134 1140 _winreservedchars = ':*?"<>|'
1135 1141 def checkwinfilename(path):
1136 1142 r'''Check that the base-relative path is a valid filename on Windows.
1137 1143 Returns None if the path is ok, or a UI string describing the problem.
1138 1144
1139 1145 >>> checkwinfilename("just/a/normal/path")
1140 1146 >>> checkwinfilename("foo/bar/con.xml")
1141 1147 "filename contains 'con', which is reserved on Windows"
1142 1148 >>> checkwinfilename("foo/con.xml/bar")
1143 1149 "filename contains 'con', which is reserved on Windows"
1144 1150 >>> checkwinfilename("foo/bar/xml.con")
1145 1151 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1146 1152 "filename contains 'AUX', which is reserved on Windows"
1147 1153 >>> checkwinfilename("foo/bar/bla:.txt")
1148 1154 "filename contains ':', which is reserved on Windows"
1149 1155 >>> checkwinfilename("foo/bar/b\07la.txt")
1150 1156 "filename contains '\\x07', which is invalid on Windows"
1151 1157 >>> checkwinfilename("foo/bar/bla ")
1152 1158 "filename ends with ' ', which is not allowed on Windows"
1153 1159 >>> checkwinfilename("../bar")
1154 1160 >>> checkwinfilename("foo\\")
1155 1161 "filename ends with '\\', which is invalid on Windows"
1156 1162 >>> checkwinfilename("foo\\/bar")
1157 1163 "directory name ends with '\\', which is invalid on Windows"
1158 1164 '''
1159 1165 if path.endswith('\\'):
1160 1166 return _("filename ends with '\\', which is invalid on Windows")
1161 1167 if '\\/' in path:
1162 1168 return _("directory name ends with '\\', which is invalid on Windows")
1163 1169 for n in path.replace('\\', '/').split('/'):
1164 1170 if not n:
1165 1171 continue
1166 1172 for c in n:
1167 1173 if c in _winreservedchars:
1168 1174 return _("filename contains '%s', which is reserved "
1169 1175 "on Windows") % c
1170 1176 if ord(c) <= 31:
1171 1177 return _("filename contains %r, which is invalid "
1172 1178 "on Windows") % c
1173 1179 base = n.split('.')[0]
1174 1180 if base and base.lower() in _winreservednames:
1175 1181 return _("filename contains '%s', which is reserved "
1176 1182 "on Windows") % base
1177 1183 t = n[-1]
1178 1184 if t in '. ' and n not in '..':
1179 1185 return _("filename ends with '%s', which is not allowed "
1180 1186 "on Windows") % t
1181 1187
1182 1188 if pycompat.osname == 'nt':
1183 1189 checkosfilename = checkwinfilename
1184 1190 else:
1185 1191 checkosfilename = platform.checkosfilename
1186 1192
1187 1193 def makelock(info, pathname):
1188 1194 try:
1189 1195 return os.symlink(info, pathname)
1190 1196 except OSError as why:
1191 1197 if why.errno == errno.EEXIST:
1192 1198 raise
1193 1199 except AttributeError: # no symlink in os
1194 1200 pass
1195 1201
1196 1202 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1197 1203 os.write(ld, info)
1198 1204 os.close(ld)
1199 1205
1200 1206 def readlock(pathname):
1201 1207 try:
1202 1208 return os.readlink(pathname)
1203 1209 except OSError as why:
1204 1210 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1205 1211 raise
1206 1212 except AttributeError: # no symlink in os
1207 1213 pass
1208 1214 fp = posixfile(pathname)
1209 1215 r = fp.read()
1210 1216 fp.close()
1211 1217 return r
1212 1218
1213 1219 def fstat(fp):
1214 1220 '''stat file object that may not have fileno method.'''
1215 1221 try:
1216 1222 return os.fstat(fp.fileno())
1217 1223 except AttributeError:
1218 1224 return os.stat(fp.name)
1219 1225
1220 1226 # File system features
1221 1227
1222 1228 def fscasesensitive(path):
1223 1229 """
1224 1230 Return true if the given path is on a case-sensitive filesystem
1225 1231
1226 1232 Requires a path (like /foo/.hg) ending with a foldable final
1227 1233 directory component.
1228 1234 """
1229 1235 s1 = os.lstat(path)
1230 1236 d, b = os.path.split(path)
1231 1237 b2 = b.upper()
1232 1238 if b == b2:
1233 1239 b2 = b.lower()
1234 1240 if b == b2:
1235 1241 return True # no evidence against case sensitivity
1236 1242 p2 = os.path.join(d, b2)
1237 1243 try:
1238 1244 s2 = os.lstat(p2)
1239 1245 if s2 == s1:
1240 1246 return False
1241 1247 return True
1242 1248 except OSError:
1243 1249 return True
1244 1250
1245 1251 try:
1246 1252 import re2
1247 1253 _re2 = None
1248 1254 except ImportError:
1249 1255 _re2 = False
1250 1256
1251 1257 class _re(object):
1252 1258 def _checkre2(self):
1253 1259 global _re2
1254 1260 try:
1255 1261 # check if match works, see issue3964
1256 1262 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1257 1263 except ImportError:
1258 1264 _re2 = False
1259 1265
1260 1266 def compile(self, pat, flags=0):
1261 1267 '''Compile a regular expression, using re2 if possible
1262 1268
1263 1269 For best performance, use only re2-compatible regexp features. The
1264 1270 only flags from the re module that are re2-compatible are
1265 1271 IGNORECASE and MULTILINE.'''
1266 1272 if _re2 is None:
1267 1273 self._checkre2()
1268 1274 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1269 1275 if flags & remod.IGNORECASE:
1270 1276 pat = '(?i)' + pat
1271 1277 if flags & remod.MULTILINE:
1272 1278 pat = '(?m)' + pat
1273 1279 try:
1274 1280 return re2.compile(pat)
1275 1281 except re2.error:
1276 1282 pass
1277 1283 return remod.compile(pat, flags)
1278 1284
1279 1285 @propertycache
1280 1286 def escape(self):
1281 1287 '''Return the version of escape corresponding to self.compile.
1282 1288
1283 1289 This is imperfect because whether re2 or re is used for a particular
1284 1290 function depends on the flags, etc, but it's the best we can do.
1285 1291 '''
1286 1292 global _re2
1287 1293 if _re2 is None:
1288 1294 self._checkre2()
1289 1295 if _re2:
1290 1296 return re2.escape
1291 1297 else:
1292 1298 return remod.escape
1293 1299
1294 1300 re = _re()
1295 1301
1296 1302 _fspathcache = {}
1297 1303 def fspath(name, root):
1298 1304 '''Get name in the case stored in the filesystem
1299 1305
1300 1306 The name should be relative to root, and be normcase-ed for efficiency.
1301 1307
1302 1308 Note that this function is unnecessary, and should not be
1303 1309 called, for case-sensitive filesystems (simply because it's expensive).
1304 1310
1305 1311 The root should be normcase-ed, too.
1306 1312 '''
1307 1313 def _makefspathcacheentry(dir):
1308 1314 return dict((normcase(n), n) for n in os.listdir(dir))
1309 1315
1310 1316 seps = pycompat.ossep
1311 1317 if pycompat.osaltsep:
1312 1318 seps = seps + pycompat.osaltsep
1313 1319 # Protect backslashes. This gets silly very quickly.
1314 1320 seps.replace('\\','\\\\')
1315 1321 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1316 1322 dir = os.path.normpath(root)
1317 1323 result = []
1318 1324 for part, sep in pattern.findall(name):
1319 1325 if sep:
1320 1326 result.append(sep)
1321 1327 continue
1322 1328
1323 1329 if dir not in _fspathcache:
1324 1330 _fspathcache[dir] = _makefspathcacheentry(dir)
1325 1331 contents = _fspathcache[dir]
1326 1332
1327 1333 found = contents.get(part)
1328 1334 if not found:
1329 1335 # retry "once per directory" per "dirstate.walk" which
1330 1336 # may take place for each patches of "hg qpush", for example
1331 1337 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1332 1338 found = contents.get(part)
1333 1339
1334 1340 result.append(found or part)
1335 1341 dir = os.path.join(dir, part)
1336 1342
1337 1343 return ''.join(result)
1338 1344
1339 1345 def checknlink(testfile):
1340 1346 '''check whether hardlink count reporting works properly'''
1341 1347
1342 1348 # testfile may be open, so we need a separate file for checking to
1343 1349 # work around issue2543 (or testfile may get lost on Samba shares)
1344 1350 f1 = testfile + ".hgtmp1"
1345 1351 if os.path.lexists(f1):
1346 1352 return False
1347 1353 try:
1348 1354 posixfile(f1, 'w').close()
1349 1355 except IOError:
1350 1356 try:
1351 1357 os.unlink(f1)
1352 1358 except OSError:
1353 1359 pass
1354 1360 return False
1355 1361
1356 1362 f2 = testfile + ".hgtmp2"
1357 1363 fd = None
1358 1364 try:
1359 1365 oslink(f1, f2)
1360 1366 # nlinks() may behave differently for files on Windows shares if
1361 1367 # the file is open.
1362 1368 fd = posixfile(f2)
1363 1369 return nlinks(f2) > 1
1364 1370 except OSError:
1365 1371 return False
1366 1372 finally:
1367 1373 if fd is not None:
1368 1374 fd.close()
1369 1375 for f in (f1, f2):
1370 1376 try:
1371 1377 os.unlink(f)
1372 1378 except OSError:
1373 1379 pass
1374 1380
1375 1381 def endswithsep(path):
1376 1382 '''Check path ends with os.sep or os.altsep.'''
1377 1383 return (path.endswith(pycompat.ossep)
1378 1384 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1379 1385
1380 1386 def splitpath(path):
1381 1387 '''Split path by os.sep.
1382 1388 Note that this function does not use os.altsep because this is
1383 1389 an alternative of simple "xxx.split(os.sep)".
1384 1390 It is recommended to use os.path.normpath() before using this
1385 1391 function if need.'''
1386 1392 return path.split(pycompat.ossep)
1387 1393
1388 1394 def gui():
1389 1395 '''Are we running in a GUI?'''
1390 1396 if pycompat.sysplatform == 'darwin':
1391 1397 if 'SSH_CONNECTION' in encoding.environ:
1392 1398 # handle SSH access to a box where the user is logged in
1393 1399 return False
1394 1400 elif getattr(osutil, 'isgui', None):
1395 1401 # check if a CoreGraphics session is available
1396 1402 return osutil.isgui()
1397 1403 else:
1398 1404 # pure build; use a safe default
1399 1405 return True
1400 1406 else:
1401 1407 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1402 1408
1403 1409 def mktempcopy(name, emptyok=False, createmode=None):
1404 1410 """Create a temporary file with the same contents from name
1405 1411
1406 1412 The permission bits are copied from the original file.
1407 1413
1408 1414 If the temporary file is going to be truncated immediately, you
1409 1415 can use emptyok=True as an optimization.
1410 1416
1411 1417 Returns the name of the temporary file.
1412 1418 """
1413 1419 d, fn = os.path.split(name)
1414 1420 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1415 1421 os.close(fd)
1416 1422 # Temporary files are created with mode 0600, which is usually not
1417 1423 # what we want. If the original file already exists, just copy
1418 1424 # its mode. Otherwise, manually obey umask.
1419 1425 copymode(name, temp, createmode)
1420 1426 if emptyok:
1421 1427 return temp
1422 1428 try:
1423 1429 try:
1424 1430 ifp = posixfile(name, "rb")
1425 1431 except IOError as inst:
1426 1432 if inst.errno == errno.ENOENT:
1427 1433 return temp
1428 1434 if not getattr(inst, 'filename', None):
1429 1435 inst.filename = name
1430 1436 raise
1431 1437 ofp = posixfile(temp, "wb")
1432 1438 for chunk in filechunkiter(ifp):
1433 1439 ofp.write(chunk)
1434 1440 ifp.close()
1435 1441 ofp.close()
1436 1442 except: # re-raises
1437 1443 try: os.unlink(temp)
1438 1444 except OSError: pass
1439 1445 raise
1440 1446 return temp
1441 1447
1442 1448 class filestat(object):
1443 1449 """help to exactly detect change of a file
1444 1450
1445 1451 'stat' attribute is result of 'os.stat()' if specified 'path'
1446 1452 exists. Otherwise, it is None. This can avoid preparative
1447 1453 'exists()' examination on client side of this class.
1448 1454 """
1449 1455 def __init__(self, path):
1450 1456 try:
1451 1457 self.stat = os.stat(path)
1452 1458 except OSError as err:
1453 1459 if err.errno != errno.ENOENT:
1454 1460 raise
1455 1461 self.stat = None
1456 1462
1457 1463 __hash__ = object.__hash__
1458 1464
1459 1465 def __eq__(self, old):
1460 1466 try:
1461 1467 # if ambiguity between stat of new and old file is
1462 1468 # avoided, comparison of size, ctime and mtime is enough
1463 1469 # to exactly detect change of a file regardless of platform
1464 1470 return (self.stat.st_size == old.stat.st_size and
1465 1471 self.stat.st_ctime == old.stat.st_ctime and
1466 1472 self.stat.st_mtime == old.stat.st_mtime)
1467 1473 except AttributeError:
1468 1474 return False
1469 1475
1470 1476 def isambig(self, old):
1471 1477 """Examine whether new (= self) stat is ambiguous against old one
1472 1478
1473 1479 "S[N]" below means stat of a file at N-th change:
1474 1480
1475 1481 - S[n-1].ctime < S[n].ctime: can detect change of a file
1476 1482 - S[n-1].ctime == S[n].ctime
1477 1483 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1478 1484 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1479 1485 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1480 1486 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1481 1487
1482 1488 Case (*2) above means that a file was changed twice or more at
1483 1489 same time in sec (= S[n-1].ctime), and comparison of timestamp
1484 1490 is ambiguous.
1485 1491
1486 1492 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1487 1493 timestamp is ambiguous".
1488 1494
1489 1495 But advancing mtime only in case (*2) doesn't work as
1490 1496 expected, because naturally advanced S[n].mtime in case (*1)
1491 1497 might be equal to manually advanced S[n-1 or earlier].mtime.
1492 1498
1493 1499 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1494 1500 treated as ambiguous regardless of mtime, to avoid overlooking
1495 1501 by confliction between such mtime.
1496 1502
1497 1503 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1498 1504 S[n].mtime", even if size of a file isn't changed.
1499 1505 """
1500 1506 try:
1501 1507 return (self.stat.st_ctime == old.stat.st_ctime)
1502 1508 except AttributeError:
1503 1509 return False
1504 1510
1505 1511 def avoidambig(self, path, old):
1506 1512 """Change file stat of specified path to avoid ambiguity
1507 1513
1508 1514 'old' should be previous filestat of 'path'.
1509 1515
1510 1516 This skips avoiding ambiguity, if a process doesn't have
1511 1517 appropriate privileges for 'path'.
1512 1518 """
1513 1519 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1514 1520 try:
1515 1521 os.utime(path, (advanced, advanced))
1516 1522 except OSError as inst:
1517 1523 if inst.errno == errno.EPERM:
1518 1524 # utime() on the file created by another user causes EPERM,
1519 1525 # if a process doesn't have appropriate privileges
1520 1526 return
1521 1527 raise
1522 1528
1523 1529 def __ne__(self, other):
1524 1530 return not self == other
1525 1531
1526 1532 class atomictempfile(object):
1527 1533 '''writable file object that atomically updates a file
1528 1534
1529 1535 All writes will go to a temporary copy of the original file. Call
1530 1536 close() when you are done writing, and atomictempfile will rename
1531 1537 the temporary copy to the original name, making the changes
1532 1538 visible. If the object is destroyed without being closed, all your
1533 1539 writes are discarded.
1534 1540
1535 1541 checkambig argument of constructor is used with filestat, and is
1536 1542 useful only if target file is guarded by any lock (e.g. repo.lock
1537 1543 or repo.wlock).
1538 1544 '''
1539 1545 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1540 1546 self.__name = name # permanent name
1541 1547 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1542 1548 createmode=createmode)
1543 1549 self._fp = posixfile(self._tempname, mode)
1544 1550 self._checkambig = checkambig
1545 1551
1546 1552 # delegated methods
1547 1553 self.read = self._fp.read
1548 1554 self.write = self._fp.write
1549 1555 self.seek = self._fp.seek
1550 1556 self.tell = self._fp.tell
1551 1557 self.fileno = self._fp.fileno
1552 1558
1553 1559 def close(self):
1554 1560 if not self._fp.closed:
1555 1561 self._fp.close()
1556 1562 filename = localpath(self.__name)
1557 1563 oldstat = self._checkambig and filestat(filename)
1558 1564 if oldstat and oldstat.stat:
1559 1565 rename(self._tempname, filename)
1560 1566 newstat = filestat(filename)
1561 1567 if newstat.isambig(oldstat):
1562 1568 # stat of changed file is ambiguous to original one
1563 1569 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1564 1570 os.utime(filename, (advanced, advanced))
1565 1571 else:
1566 1572 rename(self._tempname, filename)
1567 1573
1568 1574 def discard(self):
1569 1575 if not self._fp.closed:
1570 1576 try:
1571 1577 os.unlink(self._tempname)
1572 1578 except OSError:
1573 1579 pass
1574 1580 self._fp.close()
1575 1581
1576 1582 def __del__(self):
1577 1583 if safehasattr(self, '_fp'): # constructor actually did something
1578 1584 self.discard()
1579 1585
1580 1586 def __enter__(self):
1581 1587 return self
1582 1588
1583 1589 def __exit__(self, exctype, excvalue, traceback):
1584 1590 if exctype is not None:
1585 1591 self.discard()
1586 1592 else:
1587 1593 self.close()
1588 1594
1589 1595 def makedirs(name, mode=None, notindexed=False):
1590 1596 """recursive directory creation with parent mode inheritance
1591 1597
1592 1598 Newly created directories are marked as "not to be indexed by
1593 1599 the content indexing service", if ``notindexed`` is specified
1594 1600 for "write" mode access.
1595 1601 """
1596 1602 try:
1597 1603 makedir(name, notindexed)
1598 1604 except OSError as err:
1599 1605 if err.errno == errno.EEXIST:
1600 1606 return
1601 1607 if err.errno != errno.ENOENT or not name:
1602 1608 raise
1603 1609 parent = os.path.dirname(os.path.abspath(name))
1604 1610 if parent == name:
1605 1611 raise
1606 1612 makedirs(parent, mode, notindexed)
1607 1613 try:
1608 1614 makedir(name, notindexed)
1609 1615 except OSError as err:
1610 1616 # Catch EEXIST to handle races
1611 1617 if err.errno == errno.EEXIST:
1612 1618 return
1613 1619 raise
1614 1620 if mode is not None:
1615 1621 os.chmod(name, mode)
1616 1622
1617 1623 def readfile(path):
1618 1624 with open(path, 'rb') as fp:
1619 1625 return fp.read()
1620 1626
1621 1627 def writefile(path, text):
1622 1628 with open(path, 'wb') as fp:
1623 1629 fp.write(text)
1624 1630
1625 1631 def appendfile(path, text):
1626 1632 with open(path, 'ab') as fp:
1627 1633 fp.write(text)
1628 1634
1629 1635 class chunkbuffer(object):
1630 1636 """Allow arbitrary sized chunks of data to be efficiently read from an
1631 1637 iterator over chunks of arbitrary size."""
1632 1638
1633 1639 def __init__(self, in_iter):
1634 1640 """in_iter is the iterator that's iterating over the input chunks.
1635 1641 targetsize is how big a buffer to try to maintain."""
1636 1642 def splitbig(chunks):
1637 1643 for chunk in chunks:
1638 1644 if len(chunk) > 2**20:
1639 1645 pos = 0
1640 1646 while pos < len(chunk):
1641 1647 end = pos + 2 ** 18
1642 1648 yield chunk[pos:end]
1643 1649 pos = end
1644 1650 else:
1645 1651 yield chunk
1646 1652 self.iter = splitbig(in_iter)
1647 1653 self._queue = collections.deque()
1648 1654 self._chunkoffset = 0
1649 1655
1650 1656 def read(self, l=None):
1651 1657 """Read L bytes of data from the iterator of chunks of data.
1652 1658 Returns less than L bytes if the iterator runs dry.
1653 1659
1654 1660 If size parameter is omitted, read everything"""
1655 1661 if l is None:
1656 1662 return ''.join(self.iter)
1657 1663
1658 1664 left = l
1659 1665 buf = []
1660 1666 queue = self._queue
1661 1667 while left > 0:
1662 1668 # refill the queue
1663 1669 if not queue:
1664 1670 target = 2**18
1665 1671 for chunk in self.iter:
1666 1672 queue.append(chunk)
1667 1673 target -= len(chunk)
1668 1674 if target <= 0:
1669 1675 break
1670 1676 if not queue:
1671 1677 break
1672 1678
1673 1679 # The easy way to do this would be to queue.popleft(), modify the
1674 1680 # chunk (if necessary), then queue.appendleft(). However, for cases
1675 1681 # where we read partial chunk content, this incurs 2 dequeue
1676 1682 # mutations and creates a new str for the remaining chunk in the
1677 1683 # queue. Our code below avoids this overhead.
1678 1684
1679 1685 chunk = queue[0]
1680 1686 chunkl = len(chunk)
1681 1687 offset = self._chunkoffset
1682 1688
1683 1689 # Use full chunk.
1684 1690 if offset == 0 and left >= chunkl:
1685 1691 left -= chunkl
1686 1692 queue.popleft()
1687 1693 buf.append(chunk)
1688 1694 # self._chunkoffset remains at 0.
1689 1695 continue
1690 1696
1691 1697 chunkremaining = chunkl - offset
1692 1698
1693 1699 # Use all of unconsumed part of chunk.
1694 1700 if left >= chunkremaining:
1695 1701 left -= chunkremaining
1696 1702 queue.popleft()
1697 1703 # offset == 0 is enabled by block above, so this won't merely
1698 1704 # copy via ``chunk[0:]``.
1699 1705 buf.append(chunk[offset:])
1700 1706 self._chunkoffset = 0
1701 1707
1702 1708 # Partial chunk needed.
1703 1709 else:
1704 1710 buf.append(chunk[offset:offset + left])
1705 1711 self._chunkoffset += left
1706 1712 left -= chunkremaining
1707 1713
1708 1714 return ''.join(buf)
1709 1715
1710 1716 def filechunkiter(f, size=131072, limit=None):
1711 1717 """Create a generator that produces the data in the file size
1712 1718 (default 131072) bytes at a time, up to optional limit (default is
1713 1719 to read all data). Chunks may be less than size bytes if the
1714 1720 chunk is the last chunk in the file, or the file is a socket or
1715 1721 some other type of file that sometimes reads less data than is
1716 1722 requested."""
1717 1723 assert size >= 0
1718 1724 assert limit is None or limit >= 0
1719 1725 while True:
1720 1726 if limit is None:
1721 1727 nbytes = size
1722 1728 else:
1723 1729 nbytes = min(limit, size)
1724 1730 s = nbytes and f.read(nbytes)
1725 1731 if not s:
1726 1732 break
1727 1733 if limit:
1728 1734 limit -= len(s)
1729 1735 yield s
1730 1736
1731 1737 def makedate(timestamp=None):
1732 1738 '''Return a unix timestamp (or the current time) as a (unixtime,
1733 1739 offset) tuple based off the local timezone.'''
1734 1740 if timestamp is None:
1735 1741 timestamp = time.time()
1736 1742 if timestamp < 0:
1737 1743 hint = _("check your clock")
1738 1744 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1739 1745 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1740 1746 datetime.datetime.fromtimestamp(timestamp))
1741 1747 tz = delta.days * 86400 + delta.seconds
1742 1748 return timestamp, tz
1743 1749
1744 1750 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1745 1751 """represent a (unixtime, offset) tuple as a localized time.
1746 1752 unixtime is seconds since the epoch, and offset is the time zone's
1747 1753 number of seconds away from UTC.
1748 1754
1749 1755 >>> datestr((0, 0))
1750 1756 'Thu Jan 01 00:00:00 1970 +0000'
1751 1757 >>> datestr((42, 0))
1752 1758 'Thu Jan 01 00:00:42 1970 +0000'
1753 1759 >>> datestr((-42, 0))
1754 1760 'Wed Dec 31 23:59:18 1969 +0000'
1755 1761 >>> datestr((0x7fffffff, 0))
1756 1762 'Tue Jan 19 03:14:07 2038 +0000'
1757 1763 >>> datestr((-0x80000000, 0))
1758 1764 'Fri Dec 13 20:45:52 1901 +0000'
1759 1765 """
1760 1766 t, tz = date or makedate()
1761 1767 if "%1" in format or "%2" in format or "%z" in format:
1762 1768 sign = (tz > 0) and "-" or "+"
1763 1769 minutes = abs(tz) // 60
1764 1770 q, r = divmod(minutes, 60)
1765 1771 format = format.replace("%z", "%1%2")
1766 1772 format = format.replace("%1", "%c%02d" % (sign, q))
1767 1773 format = format.replace("%2", "%02d" % r)
1768 1774 d = t - tz
1769 1775 if d > 0x7fffffff:
1770 1776 d = 0x7fffffff
1771 1777 elif d < -0x80000000:
1772 1778 d = -0x80000000
1773 1779 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1774 1780 # because they use the gmtime() system call which is buggy on Windows
1775 1781 # for negative values.
1776 1782 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1777 1783 s = t.strftime(format)
1778 1784 return s
1779 1785
1780 1786 def shortdate(date=None):
1781 1787 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1782 1788 return datestr(date, format='%Y-%m-%d')
1783 1789
1784 1790 def parsetimezone(s):
1785 1791 """find a trailing timezone, if any, in string, and return a
1786 1792 (offset, remainder) pair"""
1787 1793
1788 1794 if s.endswith("GMT") or s.endswith("UTC"):
1789 1795 return 0, s[:-3].rstrip()
1790 1796
1791 1797 # Unix-style timezones [+-]hhmm
1792 1798 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1793 1799 sign = (s[-5] == "+") and 1 or -1
1794 1800 hours = int(s[-4:-2])
1795 1801 minutes = int(s[-2:])
1796 1802 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1797 1803
1798 1804 # ISO8601 trailing Z
1799 1805 if s.endswith("Z") and s[-2:-1].isdigit():
1800 1806 return 0, s[:-1]
1801 1807
1802 1808 # ISO8601-style [+-]hh:mm
1803 1809 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1804 1810 s[-5:-3].isdigit() and s[-2:].isdigit()):
1805 1811 sign = (s[-6] == "+") and 1 or -1
1806 1812 hours = int(s[-5:-3])
1807 1813 minutes = int(s[-2:])
1808 1814 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1809 1815
1810 1816 return None, s
1811 1817
1812 1818 def strdate(string, format, defaults=[]):
1813 1819 """parse a localized time string and return a (unixtime, offset) tuple.
1814 1820 if the string cannot be parsed, ValueError is raised."""
1815 1821 # NOTE: unixtime = localunixtime + offset
1816 1822 offset, date = parsetimezone(string)
1817 1823
1818 1824 # add missing elements from defaults
1819 1825 usenow = False # default to using biased defaults
1820 1826 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1821 1827 found = [True for p in part if ("%"+p) in format]
1822 1828 if not found:
1823 1829 date += "@" + defaults[part][usenow]
1824 1830 format += "@%" + part[0]
1825 1831 else:
1826 1832 # We've found a specific time element, less specific time
1827 1833 # elements are relative to today
1828 1834 usenow = True
1829 1835
1830 1836 timetuple = time.strptime(date, format)
1831 1837 localunixtime = int(calendar.timegm(timetuple))
1832 1838 if offset is None:
1833 1839 # local timezone
1834 1840 unixtime = int(time.mktime(timetuple))
1835 1841 offset = unixtime - localunixtime
1836 1842 else:
1837 1843 unixtime = localunixtime + offset
1838 1844 return unixtime, offset
1839 1845
1840 1846 def parsedate(date, formats=None, bias=None):
1841 1847 """parse a localized date/time and return a (unixtime, offset) tuple.
1842 1848
1843 1849 The date may be a "unixtime offset" string or in one of the specified
1844 1850 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1845 1851
1846 1852 >>> parsedate(' today ') == parsedate(\
1847 1853 datetime.date.today().strftime('%b %d'))
1848 1854 True
1849 1855 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1850 1856 datetime.timedelta(days=1)\
1851 1857 ).strftime('%b %d'))
1852 1858 True
1853 1859 >>> now, tz = makedate()
1854 1860 >>> strnow, strtz = parsedate('now')
1855 1861 >>> (strnow - now) < 1
1856 1862 True
1857 1863 >>> tz == strtz
1858 1864 True
1859 1865 """
1860 1866 if bias is None:
1861 1867 bias = {}
1862 1868 if not date:
1863 1869 return 0, 0
1864 1870 if isinstance(date, tuple) and len(date) == 2:
1865 1871 return date
1866 1872 if not formats:
1867 1873 formats = defaultdateformats
1868 1874 date = date.strip()
1869 1875
1870 1876 if date == 'now' or date == _('now'):
1871 1877 return makedate()
1872 1878 if date == 'today' or date == _('today'):
1873 1879 date = datetime.date.today().strftime('%b %d')
1874 1880 elif date == 'yesterday' or date == _('yesterday'):
1875 1881 date = (datetime.date.today() -
1876 1882 datetime.timedelta(days=1)).strftime('%b %d')
1877 1883
1878 1884 try:
1879 1885 when, offset = map(int, date.split(' '))
1880 1886 except ValueError:
1881 1887 # fill out defaults
1882 1888 now = makedate()
1883 1889 defaults = {}
1884 1890 for part in ("d", "mb", "yY", "HI", "M", "S"):
1885 1891 # this piece is for rounding the specific end of unknowns
1886 1892 b = bias.get(part)
1887 1893 if b is None:
1888 1894 if part[0] in "HMS":
1889 1895 b = "00"
1890 1896 else:
1891 1897 b = "0"
1892 1898
1893 1899 # this piece is for matching the generic end to today's date
1894 1900 n = datestr(now, "%" + part[0])
1895 1901
1896 1902 defaults[part] = (b, n)
1897 1903
1898 1904 for format in formats:
1899 1905 try:
1900 1906 when, offset = strdate(date, format, defaults)
1901 1907 except (ValueError, OverflowError):
1902 1908 pass
1903 1909 else:
1904 1910 break
1905 1911 else:
1906 1912 raise Abort(_('invalid date: %r') % date)
1907 1913 # validate explicit (probably user-specified) date and
1908 1914 # time zone offset. values must fit in signed 32 bits for
1909 1915 # current 32-bit linux runtimes. timezones go from UTC-12
1910 1916 # to UTC+14
1911 1917 if when < -0x80000000 or when > 0x7fffffff:
1912 1918 raise Abort(_('date exceeds 32 bits: %d') % when)
1913 1919 if offset < -50400 or offset > 43200:
1914 1920 raise Abort(_('impossible time zone offset: %d') % offset)
1915 1921 return when, offset
1916 1922
1917 1923 def matchdate(date):
1918 1924 """Return a function that matches a given date match specifier
1919 1925
1920 1926 Formats include:
1921 1927
1922 1928 '{date}' match a given date to the accuracy provided
1923 1929
1924 1930 '<{date}' on or before a given date
1925 1931
1926 1932 '>{date}' on or after a given date
1927 1933
1928 1934 >>> p1 = parsedate("10:29:59")
1929 1935 >>> p2 = parsedate("10:30:00")
1930 1936 >>> p3 = parsedate("10:30:59")
1931 1937 >>> p4 = parsedate("10:31:00")
1932 1938 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1933 1939 >>> f = matchdate("10:30")
1934 1940 >>> f(p1[0])
1935 1941 False
1936 1942 >>> f(p2[0])
1937 1943 True
1938 1944 >>> f(p3[0])
1939 1945 True
1940 1946 >>> f(p4[0])
1941 1947 False
1942 1948 >>> f(p5[0])
1943 1949 False
1944 1950 """
1945 1951
1946 1952 def lower(date):
1947 1953 d = {'mb': "1", 'd': "1"}
1948 1954 return parsedate(date, extendeddateformats, d)[0]
1949 1955
1950 1956 def upper(date):
1951 1957 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1952 1958 for days in ("31", "30", "29"):
1953 1959 try:
1954 1960 d["d"] = days
1955 1961 return parsedate(date, extendeddateformats, d)[0]
1956 1962 except Abort:
1957 1963 pass
1958 1964 d["d"] = "28"
1959 1965 return parsedate(date, extendeddateformats, d)[0]
1960 1966
1961 1967 date = date.strip()
1962 1968
1963 1969 if not date:
1964 1970 raise Abort(_("dates cannot consist entirely of whitespace"))
1965 1971 elif date[0] == "<":
1966 1972 if not date[1:]:
1967 1973 raise Abort(_("invalid day spec, use '<DATE'"))
1968 1974 when = upper(date[1:])
1969 1975 return lambda x: x <= when
1970 1976 elif date[0] == ">":
1971 1977 if not date[1:]:
1972 1978 raise Abort(_("invalid day spec, use '>DATE'"))
1973 1979 when = lower(date[1:])
1974 1980 return lambda x: x >= when
1975 1981 elif date[0] == "-":
1976 1982 try:
1977 1983 days = int(date[1:])
1978 1984 except ValueError:
1979 1985 raise Abort(_("invalid day spec: %s") % date[1:])
1980 1986 if days < 0:
1981 1987 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1982 1988 % date[1:])
1983 1989 when = makedate()[0] - days * 3600 * 24
1984 1990 return lambda x: x >= when
1985 1991 elif " to " in date:
1986 1992 a, b = date.split(" to ")
1987 1993 start, stop = lower(a), upper(b)
1988 1994 return lambda x: x >= start and x <= stop
1989 1995 else:
1990 1996 start, stop = lower(date), upper(date)
1991 1997 return lambda x: x >= start and x <= stop
1992 1998
1993 1999 def stringmatcher(pattern):
1994 2000 """
1995 2001 accepts a string, possibly starting with 're:' or 'literal:' prefix.
1996 2002 returns the matcher name, pattern, and matcher function.
1997 2003 missing or unknown prefixes are treated as literal matches.
1998 2004
1999 2005 helper for tests:
2000 2006 >>> def test(pattern, *tests):
2001 2007 ... kind, pattern, matcher = stringmatcher(pattern)
2002 2008 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2003 2009
2004 2010 exact matching (no prefix):
2005 2011 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2006 2012 ('literal', 'abcdefg', [False, False, True])
2007 2013
2008 2014 regex matching ('re:' prefix)
2009 2015 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2010 2016 ('re', 'a.+b', [False, False, True])
2011 2017
2012 2018 force exact matches ('literal:' prefix)
2013 2019 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2014 2020 ('literal', 're:foobar', [False, True])
2015 2021
2016 2022 unknown prefixes are ignored and treated as literals
2017 2023 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2018 2024 ('literal', 'foo:bar', [False, False, True])
2019 2025 """
2020 2026 if pattern.startswith('re:'):
2021 2027 pattern = pattern[3:]
2022 2028 try:
2023 2029 regex = remod.compile(pattern)
2024 2030 except remod.error as e:
2025 2031 raise error.ParseError(_('invalid regular expression: %s')
2026 2032 % e)
2027 2033 return 're', pattern, regex.search
2028 2034 elif pattern.startswith('literal:'):
2029 2035 pattern = pattern[8:]
2030 2036 return 'literal', pattern, pattern.__eq__
2031 2037
2032 2038 def shortuser(user):
2033 2039 """Return a short representation of a user name or email address."""
2034 2040 f = user.find('@')
2035 2041 if f >= 0:
2036 2042 user = user[:f]
2037 2043 f = user.find('<')
2038 2044 if f >= 0:
2039 2045 user = user[f + 1:]
2040 2046 f = user.find(' ')
2041 2047 if f >= 0:
2042 2048 user = user[:f]
2043 2049 f = user.find('.')
2044 2050 if f >= 0:
2045 2051 user = user[:f]
2046 2052 return user
2047 2053
2048 2054 def emailuser(user):
2049 2055 """Return the user portion of an email address."""
2050 2056 f = user.find('@')
2051 2057 if f >= 0:
2052 2058 user = user[:f]
2053 2059 f = user.find('<')
2054 2060 if f >= 0:
2055 2061 user = user[f + 1:]
2056 2062 return user
2057 2063
2058 2064 def email(author):
2059 2065 '''get email of author.'''
2060 2066 r = author.find('>')
2061 2067 if r == -1:
2062 2068 r = None
2063 2069 return author[author.find('<') + 1:r]
2064 2070
2065 2071 def ellipsis(text, maxlength=400):
2066 2072 """Trim string to at most maxlength (default: 400) columns in display."""
2067 2073 return encoding.trim(text, maxlength, ellipsis='...')
2068 2074
2069 2075 def unitcountfn(*unittable):
2070 2076 '''return a function that renders a readable count of some quantity'''
2071 2077
2072 2078 def go(count):
2073 2079 for multiplier, divisor, format in unittable:
2074 2080 if count >= divisor * multiplier:
2075 2081 return format % (count / float(divisor))
2076 2082 return unittable[-1][2] % count
2077 2083
2078 2084 return go
2079 2085
2080 2086 bytecount = unitcountfn(
2081 2087 (100, 1 << 30, _('%.0f GB')),
2082 2088 (10, 1 << 30, _('%.1f GB')),
2083 2089 (1, 1 << 30, _('%.2f GB')),
2084 2090 (100, 1 << 20, _('%.0f MB')),
2085 2091 (10, 1 << 20, _('%.1f MB')),
2086 2092 (1, 1 << 20, _('%.2f MB')),
2087 2093 (100, 1 << 10, _('%.0f KB')),
2088 2094 (10, 1 << 10, _('%.1f KB')),
2089 2095 (1, 1 << 10, _('%.2f KB')),
2090 2096 (1, 1, _('%.0f bytes')),
2091 2097 )
2092 2098
2093 2099 def uirepr(s):
2094 2100 # Avoid double backslash in Windows path repr()
2095 2101 return repr(s).replace('\\\\', '\\')
2096 2102
2097 2103 # delay import of textwrap
2098 2104 def MBTextWrapper(**kwargs):
2099 2105 class tw(textwrap.TextWrapper):
2100 2106 """
2101 2107 Extend TextWrapper for width-awareness.
2102 2108
2103 2109 Neither number of 'bytes' in any encoding nor 'characters' is
2104 2110 appropriate to calculate terminal columns for specified string.
2105 2111
2106 2112 Original TextWrapper implementation uses built-in 'len()' directly,
2107 2113 so overriding is needed to use width information of each characters.
2108 2114
2109 2115 In addition, characters classified into 'ambiguous' width are
2110 2116 treated as wide in East Asian area, but as narrow in other.
2111 2117
2112 2118 This requires use decision to determine width of such characters.
2113 2119 """
2114 2120 def _cutdown(self, ucstr, space_left):
2115 2121 l = 0
2116 2122 colwidth = encoding.ucolwidth
2117 2123 for i in xrange(len(ucstr)):
2118 2124 l += colwidth(ucstr[i])
2119 2125 if space_left < l:
2120 2126 return (ucstr[:i], ucstr[i:])
2121 2127 return ucstr, ''
2122 2128
2123 2129 # overriding of base class
2124 2130 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2125 2131 space_left = max(width - cur_len, 1)
2126 2132
2127 2133 if self.break_long_words:
2128 2134 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2129 2135 cur_line.append(cut)
2130 2136 reversed_chunks[-1] = res
2131 2137 elif not cur_line:
2132 2138 cur_line.append(reversed_chunks.pop())
2133 2139
2134 2140 # this overriding code is imported from TextWrapper of Python 2.6
2135 2141 # to calculate columns of string by 'encoding.ucolwidth()'
2136 2142 def _wrap_chunks(self, chunks):
2137 2143 colwidth = encoding.ucolwidth
2138 2144
2139 2145 lines = []
2140 2146 if self.width <= 0:
2141 2147 raise ValueError("invalid width %r (must be > 0)" % self.width)
2142 2148
2143 2149 # Arrange in reverse order so items can be efficiently popped
2144 2150 # from a stack of chucks.
2145 2151 chunks.reverse()
2146 2152
2147 2153 while chunks:
2148 2154
2149 2155 # Start the list of chunks that will make up the current line.
2150 2156 # cur_len is just the length of all the chunks in cur_line.
2151 2157 cur_line = []
2152 2158 cur_len = 0
2153 2159
2154 2160 # Figure out which static string will prefix this line.
2155 2161 if lines:
2156 2162 indent = self.subsequent_indent
2157 2163 else:
2158 2164 indent = self.initial_indent
2159 2165
2160 2166 # Maximum width for this line.
2161 2167 width = self.width - len(indent)
2162 2168
2163 2169 # First chunk on line is whitespace -- drop it, unless this
2164 2170 # is the very beginning of the text (i.e. no lines started yet).
2165 2171 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2166 2172 del chunks[-1]
2167 2173
2168 2174 while chunks:
2169 2175 l = colwidth(chunks[-1])
2170 2176
2171 2177 # Can at least squeeze this chunk onto the current line.
2172 2178 if cur_len + l <= width:
2173 2179 cur_line.append(chunks.pop())
2174 2180 cur_len += l
2175 2181
2176 2182 # Nope, this line is full.
2177 2183 else:
2178 2184 break
2179 2185
2180 2186 # The current line is full, and the next chunk is too big to
2181 2187 # fit on *any* line (not just this one).
2182 2188 if chunks and colwidth(chunks[-1]) > width:
2183 2189 self._handle_long_word(chunks, cur_line, cur_len, width)
2184 2190
2185 2191 # If the last chunk on this line is all whitespace, drop it.
2186 2192 if (self.drop_whitespace and
2187 2193 cur_line and cur_line[-1].strip() == ''):
2188 2194 del cur_line[-1]
2189 2195
2190 2196 # Convert current line back to a string and store it in list
2191 2197 # of all lines (return value).
2192 2198 if cur_line:
2193 2199 lines.append(indent + ''.join(cur_line))
2194 2200
2195 2201 return lines
2196 2202
2197 2203 global MBTextWrapper
2198 2204 MBTextWrapper = tw
2199 2205 return tw(**kwargs)
2200 2206
2201 2207 def wrap(line, width, initindent='', hangindent=''):
2202 2208 maxindent = max(len(hangindent), len(initindent))
2203 2209 if width <= maxindent:
2204 2210 # adjust for weird terminal size
2205 2211 width = max(78, maxindent + 1)
2206 2212 line = line.decode(encoding.encoding, encoding.encodingmode)
2207 2213 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
2208 2214 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
2209 2215 wrapper = MBTextWrapper(width=width,
2210 2216 initial_indent=initindent,
2211 2217 subsequent_indent=hangindent)
2212 2218 return wrapper.fill(line).encode(encoding.encoding)
2213 2219
2214 2220 if (pyplatform.python_implementation() == 'CPython' and
2215 2221 sys.version_info < (3, 0)):
2216 2222 # There is an issue in CPython that some IO methods do not handle EINTR
2217 2223 # correctly. The following table shows what CPython version (and functions)
2218 2224 # are affected (buggy: has the EINTR bug, okay: otherwise):
2219 2225 #
2220 2226 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2221 2227 # --------------------------------------------------
2222 2228 # fp.__iter__ | buggy | buggy | okay
2223 2229 # fp.read* | buggy | okay [1] | okay
2224 2230 #
2225 2231 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2226 2232 #
2227 2233 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2228 2234 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2229 2235 #
2230 2236 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2231 2237 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2232 2238 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2233 2239 # fp.__iter__ but not other fp.read* methods.
2234 2240 #
2235 2241 # On modern systems like Linux, the "read" syscall cannot be interrupted
2236 2242 # when reading "fast" files like on-disk files. So the EINTR issue only
2237 2243 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2238 2244 # files approximately as "fast" files and use the fast (unsafe) code path,
2239 2245 # to minimize the performance impact.
2240 2246 if sys.version_info >= (2, 7, 4):
2241 2247 # fp.readline deals with EINTR correctly, use it as a workaround.
2242 2248 def _safeiterfile(fp):
2243 2249 return iter(fp.readline, '')
2244 2250 else:
2245 2251 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2246 2252 # note: this may block longer than necessary because of bufsize.
2247 2253 def _safeiterfile(fp, bufsize=4096):
2248 2254 fd = fp.fileno()
2249 2255 line = ''
2250 2256 while True:
2251 2257 try:
2252 2258 buf = os.read(fd, bufsize)
2253 2259 except OSError as ex:
2254 2260 # os.read only raises EINTR before any data is read
2255 2261 if ex.errno == errno.EINTR:
2256 2262 continue
2257 2263 else:
2258 2264 raise
2259 2265 line += buf
2260 2266 if '\n' in buf:
2261 2267 splitted = line.splitlines(True)
2262 2268 line = ''
2263 2269 for l in splitted:
2264 2270 if l[-1] == '\n':
2265 2271 yield l
2266 2272 else:
2267 2273 line = l
2268 2274 if not buf:
2269 2275 break
2270 2276 if line:
2271 2277 yield line
2272 2278
2273 2279 def iterfile(fp):
2274 2280 fastpath = True
2275 2281 if type(fp) is file:
2276 2282 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2277 2283 if fastpath:
2278 2284 return fp
2279 2285 else:
2280 2286 return _safeiterfile(fp)
2281 2287 else:
2282 2288 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2283 2289 def iterfile(fp):
2284 2290 return fp
2285 2291
2286 2292 def iterlines(iterator):
2287 2293 for chunk in iterator:
2288 2294 for line in chunk.splitlines():
2289 2295 yield line
2290 2296
2291 2297 def expandpath(path):
2292 2298 return os.path.expanduser(os.path.expandvars(path))
2293 2299
2294 2300 def hgcmd():
2295 2301 """Return the command used to execute current hg
2296 2302
2297 2303 This is different from hgexecutable() because on Windows we want
2298 2304 to avoid things opening new shell windows like batch files, so we
2299 2305 get either the python call or current executable.
2300 2306 """
2301 2307 if mainfrozen():
2302 2308 if getattr(sys, 'frozen', None) == 'macosx_app':
2303 2309 # Env variable set by py2app
2304 2310 return [encoding.environ['EXECUTABLEPATH']]
2305 2311 else:
2306 2312 return [pycompat.sysexecutable]
2307 2313 return gethgcmd()
2308 2314
2309 2315 def rundetached(args, condfn):
2310 2316 """Execute the argument list in a detached process.
2311 2317
2312 2318 condfn is a callable which is called repeatedly and should return
2313 2319 True once the child process is known to have started successfully.
2314 2320 At this point, the child process PID is returned. If the child
2315 2321 process fails to start or finishes before condfn() evaluates to
2316 2322 True, return -1.
2317 2323 """
2318 2324 # Windows case is easier because the child process is either
2319 2325 # successfully starting and validating the condition or exiting
2320 2326 # on failure. We just poll on its PID. On Unix, if the child
2321 2327 # process fails to start, it will be left in a zombie state until
2322 2328 # the parent wait on it, which we cannot do since we expect a long
2323 2329 # running process on success. Instead we listen for SIGCHLD telling
2324 2330 # us our child process terminated.
2325 2331 terminated = set()
2326 2332 def handler(signum, frame):
2327 2333 terminated.add(os.wait())
2328 2334 prevhandler = None
2329 2335 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2330 2336 if SIGCHLD is not None:
2331 2337 prevhandler = signal.signal(SIGCHLD, handler)
2332 2338 try:
2333 2339 pid = spawndetached(args)
2334 2340 while not condfn():
2335 2341 if ((pid in terminated or not testpid(pid))
2336 2342 and not condfn()):
2337 2343 return -1
2338 2344 time.sleep(0.1)
2339 2345 return pid
2340 2346 finally:
2341 2347 if prevhandler is not None:
2342 2348 signal.signal(signal.SIGCHLD, prevhandler)
2343 2349
2344 2350 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2345 2351 """Return the result of interpolating items in the mapping into string s.
2346 2352
2347 2353 prefix is a single character string, or a two character string with
2348 2354 a backslash as the first character if the prefix needs to be escaped in
2349 2355 a regular expression.
2350 2356
2351 2357 fn is an optional function that will be applied to the replacement text
2352 2358 just before replacement.
2353 2359
2354 2360 escape_prefix is an optional flag that allows using doubled prefix for
2355 2361 its escaping.
2356 2362 """
2357 2363 fn = fn or (lambda s: s)
2358 2364 patterns = '|'.join(mapping.keys())
2359 2365 if escape_prefix:
2360 2366 patterns += '|' + prefix
2361 2367 if len(prefix) > 1:
2362 2368 prefix_char = prefix[1:]
2363 2369 else:
2364 2370 prefix_char = prefix
2365 2371 mapping[prefix_char] = prefix_char
2366 2372 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2367 2373 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2368 2374
2369 2375 def getport(port):
2370 2376 """Return the port for a given network service.
2371 2377
2372 2378 If port is an integer, it's returned as is. If it's a string, it's
2373 2379 looked up using socket.getservbyname(). If there's no matching
2374 2380 service, error.Abort is raised.
2375 2381 """
2376 2382 try:
2377 2383 return int(port)
2378 2384 except ValueError:
2379 2385 pass
2380 2386
2381 2387 try:
2382 2388 return socket.getservbyname(port)
2383 2389 except socket.error:
2384 2390 raise Abort(_("no port number associated with service '%s'") % port)
2385 2391
2386 2392 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2387 2393 '0': False, 'no': False, 'false': False, 'off': False,
2388 2394 'never': False}
2389 2395
2390 2396 def parsebool(s):
2391 2397 """Parse s into a boolean.
2392 2398
2393 2399 If s is not a valid boolean, returns None.
2394 2400 """
2395 2401 return _booleans.get(s.lower(), None)
2396 2402
2397 2403 _hextochr = dict((a + b, chr(int(a + b, 16)))
2398 2404 for a in string.hexdigits for b in string.hexdigits)
2399 2405
2400 2406 class url(object):
2401 2407 r"""Reliable URL parser.
2402 2408
2403 2409 This parses URLs and provides attributes for the following
2404 2410 components:
2405 2411
2406 2412 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2407 2413
2408 2414 Missing components are set to None. The only exception is
2409 2415 fragment, which is set to '' if present but empty.
2410 2416
2411 2417 If parsefragment is False, fragment is included in query. If
2412 2418 parsequery is False, query is included in path. If both are
2413 2419 False, both fragment and query are included in path.
2414 2420
2415 2421 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2416 2422
2417 2423 Note that for backward compatibility reasons, bundle URLs do not
2418 2424 take host names. That means 'bundle://../' has a path of '../'.
2419 2425
2420 2426 Examples:
2421 2427
2422 2428 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2423 2429 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2424 2430 >>> url('ssh://[::1]:2200//home/joe/repo')
2425 2431 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2426 2432 >>> url('file:///home/joe/repo')
2427 2433 <url scheme: 'file', path: '/home/joe/repo'>
2428 2434 >>> url('file:///c:/temp/foo/')
2429 2435 <url scheme: 'file', path: 'c:/temp/foo/'>
2430 2436 >>> url('bundle:foo')
2431 2437 <url scheme: 'bundle', path: 'foo'>
2432 2438 >>> url('bundle://../foo')
2433 2439 <url scheme: 'bundle', path: '../foo'>
2434 2440 >>> url(r'c:\foo\bar')
2435 2441 <url path: 'c:\\foo\\bar'>
2436 2442 >>> url(r'\\blah\blah\blah')
2437 2443 <url path: '\\\\blah\\blah\\blah'>
2438 2444 >>> url(r'\\blah\blah\blah#baz')
2439 2445 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2440 2446 >>> url(r'file:///C:\users\me')
2441 2447 <url scheme: 'file', path: 'C:\\users\\me'>
2442 2448
2443 2449 Authentication credentials:
2444 2450
2445 2451 >>> url('ssh://joe:xyz@x/repo')
2446 2452 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2447 2453 >>> url('ssh://joe@x/repo')
2448 2454 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2449 2455
2450 2456 Query strings and fragments:
2451 2457
2452 2458 >>> url('http://host/a?b#c')
2453 2459 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2454 2460 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2455 2461 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2456 2462
2457 2463 Empty path:
2458 2464
2459 2465 >>> url('')
2460 2466 <url path: ''>
2461 2467 >>> url('#a')
2462 2468 <url path: '', fragment: 'a'>
2463 2469 >>> url('http://host/')
2464 2470 <url scheme: 'http', host: 'host', path: ''>
2465 2471 >>> url('http://host/#a')
2466 2472 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2467 2473
2468 2474 Only scheme:
2469 2475
2470 2476 >>> url('http:')
2471 2477 <url scheme: 'http'>
2472 2478 """
2473 2479
2474 2480 _safechars = "!~*'()+"
2475 2481 _safepchars = "/!~*'()+:\\"
2476 2482 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2477 2483
2478 2484 def __init__(self, path, parsequery=True, parsefragment=True):
2479 2485 # We slowly chomp away at path until we have only the path left
2480 2486 self.scheme = self.user = self.passwd = self.host = None
2481 2487 self.port = self.path = self.query = self.fragment = None
2482 2488 self._localpath = True
2483 2489 self._hostport = ''
2484 2490 self._origpath = path
2485 2491
2486 2492 if parsefragment and '#' in path:
2487 2493 path, self.fragment = path.split('#', 1)
2488 2494
2489 2495 # special case for Windows drive letters and UNC paths
2490 2496 if hasdriveletter(path) or path.startswith('\\\\'):
2491 2497 self.path = path
2492 2498 return
2493 2499
2494 2500 # For compatibility reasons, we can't handle bundle paths as
2495 2501 # normal URLS
2496 2502 if path.startswith('bundle:'):
2497 2503 self.scheme = 'bundle'
2498 2504 path = path[7:]
2499 2505 if path.startswith('//'):
2500 2506 path = path[2:]
2501 2507 self.path = path
2502 2508 return
2503 2509
2504 2510 if self._matchscheme(path):
2505 2511 parts = path.split(':', 1)
2506 2512 if parts[0]:
2507 2513 self.scheme, path = parts
2508 2514 self._localpath = False
2509 2515
2510 2516 if not path:
2511 2517 path = None
2512 2518 if self._localpath:
2513 2519 self.path = ''
2514 2520 return
2515 2521 else:
2516 2522 if self._localpath:
2517 2523 self.path = path
2518 2524 return
2519 2525
2520 2526 if parsequery and '?' in path:
2521 2527 path, self.query = path.split('?', 1)
2522 2528 if not path:
2523 2529 path = None
2524 2530 if not self.query:
2525 2531 self.query = None
2526 2532
2527 2533 # // is required to specify a host/authority
2528 2534 if path and path.startswith('//'):
2529 2535 parts = path[2:].split('/', 1)
2530 2536 if len(parts) > 1:
2531 2537 self.host, path = parts
2532 2538 else:
2533 2539 self.host = parts[0]
2534 2540 path = None
2535 2541 if not self.host:
2536 2542 self.host = None
2537 2543 # path of file:///d is /d
2538 2544 # path of file:///d:/ is d:/, not /d:/
2539 2545 if path and not hasdriveletter(path):
2540 2546 path = '/' + path
2541 2547
2542 2548 if self.host and '@' in self.host:
2543 2549 self.user, self.host = self.host.rsplit('@', 1)
2544 2550 if ':' in self.user:
2545 2551 self.user, self.passwd = self.user.split(':', 1)
2546 2552 if not self.host:
2547 2553 self.host = None
2548 2554
2549 2555 # Don't split on colons in IPv6 addresses without ports
2550 2556 if (self.host and ':' in self.host and
2551 2557 not (self.host.startswith('[') and self.host.endswith(']'))):
2552 2558 self._hostport = self.host
2553 2559 self.host, self.port = self.host.rsplit(':', 1)
2554 2560 if not self.host:
2555 2561 self.host = None
2556 2562
2557 2563 if (self.host and self.scheme == 'file' and
2558 2564 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2559 2565 raise Abort(_('file:// URLs can only refer to localhost'))
2560 2566
2561 2567 self.path = path
2562 2568
2563 2569 # leave the query string escaped
2564 2570 for a in ('user', 'passwd', 'host', 'port',
2565 2571 'path', 'fragment'):
2566 2572 v = getattr(self, a)
2567 2573 if v is not None:
2568 2574 setattr(self, a, pycompat.urlunquote(v))
2569 2575
2570 2576 def __repr__(self):
2571 2577 attrs = []
2572 2578 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2573 2579 'query', 'fragment'):
2574 2580 v = getattr(self, a)
2575 2581 if v is not None:
2576 2582 attrs.append('%s: %r' % (a, v))
2577 2583 return '<url %s>' % ', '.join(attrs)
2578 2584
2579 2585 def __str__(self):
2580 2586 r"""Join the URL's components back into a URL string.
2581 2587
2582 2588 Examples:
2583 2589
2584 2590 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2585 2591 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2586 2592 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2587 2593 'http://user:pw@host:80/?foo=bar&baz=42'
2588 2594 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2589 2595 'http://user:pw@host:80/?foo=bar%3dbaz'
2590 2596 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2591 2597 'ssh://user:pw@[::1]:2200//home/joe#'
2592 2598 >>> str(url('http://localhost:80//'))
2593 2599 'http://localhost:80//'
2594 2600 >>> str(url('http://localhost:80/'))
2595 2601 'http://localhost:80/'
2596 2602 >>> str(url('http://localhost:80'))
2597 2603 'http://localhost:80/'
2598 2604 >>> str(url('bundle:foo'))
2599 2605 'bundle:foo'
2600 2606 >>> str(url('bundle://../foo'))
2601 2607 'bundle:../foo'
2602 2608 >>> str(url('path'))
2603 2609 'path'
2604 2610 >>> str(url('file:///tmp/foo/bar'))
2605 2611 'file:///tmp/foo/bar'
2606 2612 >>> str(url('file:///c:/tmp/foo/bar'))
2607 2613 'file:///c:/tmp/foo/bar'
2608 2614 >>> print url(r'bundle:foo\bar')
2609 2615 bundle:foo\bar
2610 2616 >>> print url(r'file:///D:\data\hg')
2611 2617 file:///D:\data\hg
2612 2618 """
2613 2619 if self._localpath:
2614 2620 s = self.path
2615 2621 if self.scheme == 'bundle':
2616 2622 s = 'bundle:' + s
2617 2623 if self.fragment:
2618 2624 s += '#' + self.fragment
2619 2625 return s
2620 2626
2621 2627 s = self.scheme + ':'
2622 2628 if self.user or self.passwd or self.host:
2623 2629 s += '//'
2624 2630 elif self.scheme and (not self.path or self.path.startswith('/')
2625 2631 or hasdriveletter(self.path)):
2626 2632 s += '//'
2627 2633 if hasdriveletter(self.path):
2628 2634 s += '/'
2629 2635 if self.user:
2630 2636 s += urlreq.quote(self.user, safe=self._safechars)
2631 2637 if self.passwd:
2632 2638 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2633 2639 if self.user or self.passwd:
2634 2640 s += '@'
2635 2641 if self.host:
2636 2642 if not (self.host.startswith('[') and self.host.endswith(']')):
2637 2643 s += urlreq.quote(self.host)
2638 2644 else:
2639 2645 s += self.host
2640 2646 if self.port:
2641 2647 s += ':' + urlreq.quote(self.port)
2642 2648 if self.host:
2643 2649 s += '/'
2644 2650 if self.path:
2645 2651 # TODO: similar to the query string, we should not unescape the
2646 2652 # path when we store it, the path might contain '%2f' = '/',
2647 2653 # which we should *not* escape.
2648 2654 s += urlreq.quote(self.path, safe=self._safepchars)
2649 2655 if self.query:
2650 2656 # we store the query in escaped form.
2651 2657 s += '?' + self.query
2652 2658 if self.fragment is not None:
2653 2659 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2654 2660 return s
2655 2661
2656 2662 def authinfo(self):
2657 2663 user, passwd = self.user, self.passwd
2658 2664 try:
2659 2665 self.user, self.passwd = None, None
2660 2666 s = str(self)
2661 2667 finally:
2662 2668 self.user, self.passwd = user, passwd
2663 2669 if not self.user:
2664 2670 return (s, None)
2665 2671 # authinfo[1] is passed to urllib2 password manager, and its
2666 2672 # URIs must not contain credentials. The host is passed in the
2667 2673 # URIs list because Python < 2.4.3 uses only that to search for
2668 2674 # a password.
2669 2675 return (s, (None, (s, self.host),
2670 2676 self.user, self.passwd or ''))
2671 2677
2672 2678 def isabs(self):
2673 2679 if self.scheme and self.scheme != 'file':
2674 2680 return True # remote URL
2675 2681 if hasdriveletter(self.path):
2676 2682 return True # absolute for our purposes - can't be joined()
2677 2683 if self.path.startswith(r'\\'):
2678 2684 return True # Windows UNC path
2679 2685 if self.path.startswith('/'):
2680 2686 return True # POSIX-style
2681 2687 return False
2682 2688
2683 2689 def localpath(self):
2684 2690 if self.scheme == 'file' or self.scheme == 'bundle':
2685 2691 path = self.path or '/'
2686 2692 # For Windows, we need to promote hosts containing drive
2687 2693 # letters to paths with drive letters.
2688 2694 if hasdriveletter(self._hostport):
2689 2695 path = self._hostport + '/' + self.path
2690 2696 elif (self.host is not None and self.path
2691 2697 and not hasdriveletter(path)):
2692 2698 path = '/' + path
2693 2699 return path
2694 2700 return self._origpath
2695 2701
2696 2702 def islocal(self):
2697 2703 '''whether localpath will return something that posixfile can open'''
2698 2704 return (not self.scheme or self.scheme == 'file'
2699 2705 or self.scheme == 'bundle')
2700 2706
2701 2707 def hasscheme(path):
2702 2708 return bool(url(path).scheme)
2703 2709
2704 2710 def hasdriveletter(path):
2705 2711 return path and path[1:2] == ':' and path[0:1].isalpha()
2706 2712
2707 2713 def urllocalpath(path):
2708 2714 return url(path, parsequery=False, parsefragment=False).localpath()
2709 2715
2710 2716 def hidepassword(u):
2711 2717 '''hide user credential in a url string'''
2712 2718 u = url(u)
2713 2719 if u.passwd:
2714 2720 u.passwd = '***'
2715 2721 return str(u)
2716 2722
2717 2723 def removeauth(u):
2718 2724 '''remove all authentication information from a url string'''
2719 2725 u = url(u)
2720 2726 u.user = u.passwd = None
2721 2727 return str(u)
2722 2728
2723 2729 def isatty(fp):
2724 2730 try:
2725 2731 return fp.isatty()
2726 2732 except AttributeError:
2727 2733 return False
2728 2734
2729 2735 timecount = unitcountfn(
2730 2736 (1, 1e3, _('%.0f s')),
2731 2737 (100, 1, _('%.1f s')),
2732 2738 (10, 1, _('%.2f s')),
2733 2739 (1, 1, _('%.3f s')),
2734 2740 (100, 0.001, _('%.1f ms')),
2735 2741 (10, 0.001, _('%.2f ms')),
2736 2742 (1, 0.001, _('%.3f ms')),
2737 2743 (100, 0.000001, _('%.1f us')),
2738 2744 (10, 0.000001, _('%.2f us')),
2739 2745 (1, 0.000001, _('%.3f us')),
2740 2746 (100, 0.000000001, _('%.1f ns')),
2741 2747 (10, 0.000000001, _('%.2f ns')),
2742 2748 (1, 0.000000001, _('%.3f ns')),
2743 2749 )
2744 2750
2745 2751 _timenesting = [0]
2746 2752
2747 2753 def timed(func):
2748 2754 '''Report the execution time of a function call to stderr.
2749 2755
2750 2756 During development, use as a decorator when you need to measure
2751 2757 the cost of a function, e.g. as follows:
2752 2758
2753 2759 @util.timed
2754 2760 def foo(a, b, c):
2755 2761 pass
2756 2762 '''
2757 2763
2758 2764 def wrapper(*args, **kwargs):
2759 2765 start = time.time()
2760 2766 indent = 2
2761 2767 _timenesting[0] += indent
2762 2768 try:
2763 2769 return func(*args, **kwargs)
2764 2770 finally:
2765 2771 elapsed = time.time() - start
2766 2772 _timenesting[0] -= indent
2767 2773 stderr.write('%s%s: %s\n' %
2768 2774 (' ' * _timenesting[0], func.__name__,
2769 2775 timecount(elapsed)))
2770 2776 return wrapper
2771 2777
2772 2778 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2773 2779 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2774 2780
2775 2781 def sizetoint(s):
2776 2782 '''Convert a space specifier to a byte count.
2777 2783
2778 2784 >>> sizetoint('30')
2779 2785 30
2780 2786 >>> sizetoint('2.2kb')
2781 2787 2252
2782 2788 >>> sizetoint('6M')
2783 2789 6291456
2784 2790 '''
2785 2791 t = s.strip().lower()
2786 2792 try:
2787 2793 for k, u in _sizeunits:
2788 2794 if t.endswith(k):
2789 2795 return int(float(t[:-len(k)]) * u)
2790 2796 return int(t)
2791 2797 except ValueError:
2792 2798 raise error.ParseError(_("couldn't parse size: %s") % s)
2793 2799
2794 2800 class hooks(object):
2795 2801 '''A collection of hook functions that can be used to extend a
2796 2802 function's behavior. Hooks are called in lexicographic order,
2797 2803 based on the names of their sources.'''
2798 2804
2799 2805 def __init__(self):
2800 2806 self._hooks = []
2801 2807
2802 2808 def add(self, source, hook):
2803 2809 self._hooks.append((source, hook))
2804 2810
2805 2811 def __call__(self, *args):
2806 2812 self._hooks.sort(key=lambda x: x[0])
2807 2813 results = []
2808 2814 for source, hook in self._hooks:
2809 2815 results.append(hook(*args))
2810 2816 return results
2811 2817
2812 2818 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s'):
2813 2819 '''Yields lines for a nicely formatted stacktrace.
2814 2820 Skips the 'skip' last entries.
2815 2821 Each file+linenumber is formatted according to fileline.
2816 2822 Each line is formatted according to line.
2817 2823 If line is None, it yields:
2818 2824 length of longest filepath+line number,
2819 2825 filepath+linenumber,
2820 2826 function
2821 2827
2822 2828 Not be used in production code but very convenient while developing.
2823 2829 '''
2824 2830 entries = [(fileline % (fn, ln), func)
2825 2831 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2826 2832 if entries:
2827 2833 fnmax = max(len(entry[0]) for entry in entries)
2828 2834 for fnln, func in entries:
2829 2835 if line is None:
2830 2836 yield (fnmax, fnln, func)
2831 2837 else:
2832 2838 yield line % (fnmax, fnln, func)
2833 2839
2834 2840 def debugstacktrace(msg='stacktrace', skip=0, f=stderr, otherf=stdout):
2835 2841 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2836 2842 Skips the 'skip' last entries. By default it will flush stdout first.
2837 2843 It can be used everywhere and intentionally does not require an ui object.
2838 2844 Not be used in production code but very convenient while developing.
2839 2845 '''
2840 2846 if otherf:
2841 2847 otherf.flush()
2842 2848 f.write('%s at:\n' % msg)
2843 2849 for line in getstackframes(skip + 1):
2844 2850 f.write(line)
2845 2851 f.flush()
2846 2852
2847 2853 class dirs(object):
2848 2854 '''a multiset of directory names from a dirstate or manifest'''
2849 2855
2850 2856 def __init__(self, map, skip=None):
2851 2857 self._dirs = {}
2852 2858 addpath = self.addpath
2853 2859 if safehasattr(map, 'iteritems') and skip is not None:
2854 2860 for f, s in map.iteritems():
2855 2861 if s[0] != skip:
2856 2862 addpath(f)
2857 2863 else:
2858 2864 for f in map:
2859 2865 addpath(f)
2860 2866
2861 2867 def addpath(self, path):
2862 2868 dirs = self._dirs
2863 2869 for base in finddirs(path):
2864 2870 if base in dirs:
2865 2871 dirs[base] += 1
2866 2872 return
2867 2873 dirs[base] = 1
2868 2874
2869 2875 def delpath(self, path):
2870 2876 dirs = self._dirs
2871 2877 for base in finddirs(path):
2872 2878 if dirs[base] > 1:
2873 2879 dirs[base] -= 1
2874 2880 return
2875 2881 del dirs[base]
2876 2882
2877 2883 def __iter__(self):
2878 2884 return self._dirs.iterkeys()
2879 2885
2880 2886 def __contains__(self, d):
2881 2887 return d in self._dirs
2882 2888
2883 2889 if safehasattr(parsers, 'dirs'):
2884 2890 dirs = parsers.dirs
2885 2891
2886 2892 def finddirs(path):
2887 2893 pos = path.rfind('/')
2888 2894 while pos != -1:
2889 2895 yield path[:pos]
2890 2896 pos = path.rfind('/', 0, pos)
2891 2897
2892 2898 class ctxmanager(object):
2893 2899 '''A context manager for use in 'with' blocks to allow multiple
2894 2900 contexts to be entered at once. This is both safer and more
2895 2901 flexible than contextlib.nested.
2896 2902
2897 2903 Once Mercurial supports Python 2.7+, this will become mostly
2898 2904 unnecessary.
2899 2905 '''
2900 2906
2901 2907 def __init__(self, *args):
2902 2908 '''Accepts a list of no-argument functions that return context
2903 2909 managers. These will be invoked at __call__ time.'''
2904 2910 self._pending = args
2905 2911 self._atexit = []
2906 2912
2907 2913 def __enter__(self):
2908 2914 return self
2909 2915
2910 2916 def enter(self):
2911 2917 '''Create and enter context managers in the order in which they were
2912 2918 passed to the constructor.'''
2913 2919 values = []
2914 2920 for func in self._pending:
2915 2921 obj = func()
2916 2922 values.append(obj.__enter__())
2917 2923 self._atexit.append(obj.__exit__)
2918 2924 del self._pending
2919 2925 return values
2920 2926
2921 2927 def atexit(self, func, *args, **kwargs):
2922 2928 '''Add a function to call when this context manager exits. The
2923 2929 ordering of multiple atexit calls is unspecified, save that
2924 2930 they will happen before any __exit__ functions.'''
2925 2931 def wrapper(exc_type, exc_val, exc_tb):
2926 2932 func(*args, **kwargs)
2927 2933 self._atexit.append(wrapper)
2928 2934 return func
2929 2935
2930 2936 def __exit__(self, exc_type, exc_val, exc_tb):
2931 2937 '''Context managers are exited in the reverse order from which
2932 2938 they were created.'''
2933 2939 received = exc_type is not None
2934 2940 suppressed = False
2935 2941 pending = None
2936 2942 self._atexit.reverse()
2937 2943 for exitfunc in self._atexit:
2938 2944 try:
2939 2945 if exitfunc(exc_type, exc_val, exc_tb):
2940 2946 suppressed = True
2941 2947 exc_type = None
2942 2948 exc_val = None
2943 2949 exc_tb = None
2944 2950 except BaseException:
2945 2951 pending = sys.exc_info()
2946 2952 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2947 2953 del self._atexit
2948 2954 if pending:
2949 2955 raise exc_val
2950 2956 return received and suppressed
2951 2957
2952 2958 # compression code
2953 2959
2954 2960 class compressormanager(object):
2955 2961 """Holds registrations of various compression engines.
2956 2962
2957 2963 This class essentially abstracts the differences between compression
2958 2964 engines to allow new compression formats to be added easily, possibly from
2959 2965 extensions.
2960 2966
2961 2967 Compressors are registered against the global instance by calling its
2962 2968 ``register()`` method.
2963 2969 """
2964 2970 def __init__(self):
2965 2971 self._engines = {}
2966 2972 # Bundle spec human name to engine name.
2967 2973 self._bundlenames = {}
2968 2974 # Internal bundle identifier to engine name.
2969 2975 self._bundletypes = {}
2970 2976
2971 2977 def __getitem__(self, key):
2972 2978 return self._engines[key]
2973 2979
2974 2980 def __contains__(self, key):
2975 2981 return key in self._engines
2976 2982
2977 2983 def __iter__(self):
2978 2984 return iter(self._engines.keys())
2979 2985
2980 2986 def register(self, engine):
2981 2987 """Register a compression engine with the manager.
2982 2988
2983 2989 The argument must be a ``compressionengine`` instance.
2984 2990 """
2985 2991 if not isinstance(engine, compressionengine):
2986 2992 raise ValueError(_('argument must be a compressionengine'))
2987 2993
2988 2994 name = engine.name()
2989 2995
2990 2996 if name in self._engines:
2991 2997 raise error.Abort(_('compression engine %s already registered') %
2992 2998 name)
2993 2999
2994 3000 bundleinfo = engine.bundletype()
2995 3001 if bundleinfo:
2996 3002 bundlename, bundletype = bundleinfo
2997 3003
2998 3004 if bundlename in self._bundlenames:
2999 3005 raise error.Abort(_('bundle name %s already registered') %
3000 3006 bundlename)
3001 3007 if bundletype in self._bundletypes:
3002 3008 raise error.Abort(_('bundle type %s already registered by %s') %
3003 3009 (bundletype, self._bundletypes[bundletype]))
3004 3010
3005 3011 # No external facing name declared.
3006 3012 if bundlename:
3007 3013 self._bundlenames[bundlename] = name
3008 3014
3009 3015 self._bundletypes[bundletype] = name
3010 3016
3011 3017 self._engines[name] = engine
3012 3018
3013 3019 @property
3014 3020 def supportedbundlenames(self):
3015 3021 return set(self._bundlenames.keys())
3016 3022
3017 3023 @property
3018 3024 def supportedbundletypes(self):
3019 3025 return set(self._bundletypes.keys())
3020 3026
3021 3027 def forbundlename(self, bundlename):
3022 3028 """Obtain a compression engine registered to a bundle name.
3023 3029
3024 3030 Will raise KeyError if the bundle type isn't registered.
3025 3031
3026 3032 Will abort if the engine is known but not available.
3027 3033 """
3028 3034 engine = self._engines[self._bundlenames[bundlename]]
3029 3035 if not engine.available():
3030 3036 raise error.Abort(_('compression engine %s could not be loaded') %
3031 3037 engine.name())
3032 3038 return engine
3033 3039
3034 3040 def forbundletype(self, bundletype):
3035 3041 """Obtain a compression engine registered to a bundle type.
3036 3042
3037 3043 Will raise KeyError if the bundle type isn't registered.
3038 3044
3039 3045 Will abort if the engine is known but not available.
3040 3046 """
3041 3047 engine = self._engines[self._bundletypes[bundletype]]
3042 3048 if not engine.available():
3043 3049 raise error.Abort(_('compression engine %s could not be loaded') %
3044 3050 engine.name())
3045 3051 return engine
3046 3052
3047 3053 compengines = compressormanager()
3048 3054
3049 3055 class compressionengine(object):
3050 3056 """Base class for compression engines.
3051 3057
3052 3058 Compression engines must implement the interface defined by this class.
3053 3059 """
3054 3060 def name(self):
3055 3061 """Returns the name of the compression engine.
3056 3062
3057 3063 This is the key the engine is registered under.
3058 3064
3059 3065 This method must be implemented.
3060 3066 """
3061 3067 raise NotImplementedError()
3062 3068
3063 3069 def available(self):
3064 3070 """Whether the compression engine is available.
3065 3071
3066 3072 The intent of this method is to allow optional compression engines
3067 3073 that may not be available in all installations (such as engines relying
3068 3074 on C extensions that may not be present).
3069 3075 """
3070 3076 return True
3071 3077
3072 3078 def bundletype(self):
3073 3079 """Describes bundle identifiers for this engine.
3074 3080
3075 3081 If this compression engine isn't supported for bundles, returns None.
3076 3082
3077 3083 If this engine can be used for bundles, returns a 2-tuple of strings of
3078 3084 the user-facing "bundle spec" compression name and an internal
3079 3085 identifier used to denote the compression format within bundles. To
3080 3086 exclude the name from external usage, set the first element to ``None``.
3081 3087
3082 3088 If bundle compression is supported, the class must also implement
3083 3089 ``compressstream`` and `decompressorreader``.
3084 3090 """
3085 3091 return None
3086 3092
3087 3093 def compressstream(self, it, opts=None):
3088 3094 """Compress an iterator of chunks.
3089 3095
3090 3096 The method receives an iterator (ideally a generator) of chunks of
3091 3097 bytes to be compressed. It returns an iterator (ideally a generator)
3092 3098 of bytes of chunks representing the compressed output.
3093 3099
3094 3100 Optionally accepts an argument defining how to perform compression.
3095 3101 Each engine treats this argument differently.
3096 3102 """
3097 3103 raise NotImplementedError()
3098 3104
3099 3105 def decompressorreader(self, fh):
3100 3106 """Perform decompression on a file object.
3101 3107
3102 3108 Argument is an object with a ``read(size)`` method that returns
3103 3109 compressed data. Return value is an object with a ``read(size)`` that
3104 3110 returns uncompressed data.
3105 3111 """
3106 3112 raise NotImplementedError()
3107 3113
3108 3114 class _zlibengine(compressionengine):
3109 3115 def name(self):
3110 3116 return 'zlib'
3111 3117
3112 3118 def bundletype(self):
3113 3119 return 'gzip', 'GZ'
3114 3120
3115 3121 def compressstream(self, it, opts=None):
3116 3122 opts = opts or {}
3117 3123
3118 3124 z = zlib.compressobj(opts.get('level', -1))
3119 3125 for chunk in it:
3120 3126 data = z.compress(chunk)
3121 3127 # Not all calls to compress emit data. It is cheaper to inspect
3122 3128 # here than to feed empty chunks through generator.
3123 3129 if data:
3124 3130 yield data
3125 3131
3126 3132 yield z.flush()
3127 3133
3128 3134 def decompressorreader(self, fh):
3129 3135 def gen():
3130 3136 d = zlib.decompressobj()
3131 3137 for chunk in filechunkiter(fh):
3132 3138 while chunk:
3133 3139 # Limit output size to limit memory.
3134 3140 yield d.decompress(chunk, 2 ** 18)
3135 3141 chunk = d.unconsumed_tail
3136 3142
3137 3143 return chunkbuffer(gen())
3138 3144
3139 3145 compengines.register(_zlibengine())
3140 3146
3141 3147 class _bz2engine(compressionengine):
3142 3148 def name(self):
3143 3149 return 'bz2'
3144 3150
3145 3151 def bundletype(self):
3146 3152 return 'bzip2', 'BZ'
3147 3153
3148 3154 def compressstream(self, it, opts=None):
3149 3155 opts = opts or {}
3150 3156 z = bz2.BZ2Compressor(opts.get('level', 9))
3151 3157 for chunk in it:
3152 3158 data = z.compress(chunk)
3153 3159 if data:
3154 3160 yield data
3155 3161
3156 3162 yield z.flush()
3157 3163
3158 3164 def decompressorreader(self, fh):
3159 3165 def gen():
3160 3166 d = bz2.BZ2Decompressor()
3161 3167 for chunk in filechunkiter(fh):
3162 3168 yield d.decompress(chunk)
3163 3169
3164 3170 return chunkbuffer(gen())
3165 3171
3166 3172 compengines.register(_bz2engine())
3167 3173
3168 3174 class _truncatedbz2engine(compressionengine):
3169 3175 def name(self):
3170 3176 return 'bz2truncated'
3171 3177
3172 3178 def bundletype(self):
3173 3179 return None, '_truncatedBZ'
3174 3180
3175 3181 # We don't implement compressstream because it is hackily handled elsewhere.
3176 3182
3177 3183 def decompressorreader(self, fh):
3178 3184 def gen():
3179 3185 # The input stream doesn't have the 'BZ' header. So add it back.
3180 3186 d = bz2.BZ2Decompressor()
3181 3187 d.decompress('BZ')
3182 3188 for chunk in filechunkiter(fh):
3183 3189 yield d.decompress(chunk)
3184 3190
3185 3191 return chunkbuffer(gen())
3186 3192
3187 3193 compengines.register(_truncatedbz2engine())
3188 3194
3189 3195 class _noopengine(compressionengine):
3190 3196 def name(self):
3191 3197 return 'none'
3192 3198
3193 3199 def bundletype(self):
3194 3200 return 'none', 'UN'
3195 3201
3196 3202 def compressstream(self, it, opts=None):
3197 3203 return it
3198 3204
3199 3205 def decompressorreader(self, fh):
3200 3206 return fh
3201 3207
3202 3208 compengines.register(_noopengine())
3203 3209
3204 3210 class _zstdengine(compressionengine):
3205 3211 def name(self):
3206 3212 return 'zstd'
3207 3213
3208 3214 @propertycache
3209 3215 def _module(self):
3210 3216 # Not all installs have the zstd module available. So defer importing
3211 3217 # until first access.
3212 3218 try:
3213 3219 from . import zstd
3214 3220 # Force delayed import.
3215 3221 zstd.__version__
3216 3222 return zstd
3217 3223 except ImportError:
3218 3224 return None
3219 3225
3220 3226 def available(self):
3221 3227 return bool(self._module)
3222 3228
3223 3229 def bundletype(self):
3224 3230 return 'zstd', 'ZS'
3225 3231
3226 3232 def compressstream(self, it, opts=None):
3227 3233 opts = opts or {}
3228 3234 # zstd level 3 is almost always significantly faster than zlib
3229 3235 # while providing no worse compression. It strikes a good balance
3230 3236 # between speed and compression.
3231 3237 level = opts.get('level', 3)
3232 3238
3233 3239 zstd = self._module
3234 3240 z = zstd.ZstdCompressor(level=level).compressobj()
3235 3241 for chunk in it:
3236 3242 data = z.compress(chunk)
3237 3243 if data:
3238 3244 yield data
3239 3245
3240 3246 yield z.flush()
3241 3247
3242 3248 def decompressorreader(self, fh):
3243 3249 zstd = self._module
3244 3250 dctx = zstd.ZstdDecompressor()
3245 3251 return chunkbuffer(dctx.read_from(fh))
3246 3252
3247 3253 compengines.register(_zstdengine())
3248 3254
3249 3255 # convenient shortcut
3250 3256 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now