##// END OF EJS Templates
revlog: fix caching of buffer objects
Matt Mackall -
r5450:c728424d default
parent child Browse files
Show More
@@ -1,198 +1,198 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from revlog import *
9 9 from i18n import _
10 10 import os, time, util
11 11
12 12 def _string_escape(text):
13 13 """
14 14 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
15 15 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
16 16 >>> s
17 17 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
18 18 >>> res = _string_escape(s)
19 19 >>> s == _string_unescape(res)
20 20 True
21 21 """
22 22 # subset of the string_escape codec
23 23 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
24 24 return text.replace('\0', '\\0')
25 25
26 26 def _string_unescape(text):
27 27 return text.decode('string_escape')
28 28
29 29 class appender:
30 30 '''the changelog index must be update last on disk, so we use this class
31 31 to delay writes to it'''
32 32 def __init__(self, fp, buf):
33 33 self.data = buf
34 34 self.fp = fp
35 35 self.offset = fp.tell()
36 36 self.size = util.fstat(fp).st_size
37 37
38 38 def end(self):
39 39 return self.size + len("".join(self.data))
40 40 def tell(self):
41 41 return self.offset
42 42 def flush(self):
43 43 pass
44 44 def close(self):
45 45 self.fp.close()
46 46
47 47 def seek(self, offset, whence=0):
48 48 '''virtual file offset spans real file and data'''
49 49 if whence == 0:
50 50 self.offset = offset
51 51 elif whence == 1:
52 52 self.offset += offset
53 53 elif whence == 2:
54 54 self.offset = self.end() + offset
55 55 if self.offset < self.size:
56 56 self.fp.seek(self.offset)
57 57
58 58 def read(self, count=-1):
59 59 '''only trick here is reads that span real file and data'''
60 60 ret = ""
61 61 if self.offset < self.size:
62 62 s = self.fp.read(count)
63 63 ret = s
64 64 self.offset += len(s)
65 65 if count > 0:
66 66 count -= len(s)
67 67 if count != 0:
68 68 doff = self.offset - self.size
69 69 self.data.insert(0, "".join(self.data))
70 70 del self.data[1:]
71 71 s = self.data[0][doff:doff+count]
72 72 self.offset += len(s)
73 73 ret += s
74 74 return ret
75 75
76 76 def write(self, s):
77 self.data.append(s)
77 self.data.append(str(s))
78 78 self.offset += len(s)
79 79
80 80 class changelog(revlog):
81 81 def __init__(self, opener):
82 82 revlog.__init__(self, opener, "00changelog.i")
83 83
84 84 def delayupdate(self):
85 85 "delay visibility of index updates to other readers"
86 86 self._realopener = self.opener
87 87 self.opener = self._delayopener
88 88 self._delaycount = self.count()
89 89 self._delaybuf = []
90 90 self._delayname = None
91 91
92 92 def finalize(self, tr):
93 93 "finalize index updates"
94 94 self.opener = self._realopener
95 95 # move redirected index data back into place
96 96 if self._delayname:
97 97 util.rename(self._delayname + ".a", self._delayname)
98 98 elif self._delaybuf:
99 99 fp = self.opener(self.indexfile, 'a')
100 100 fp.write("".join(self._delaybuf))
101 101 fp.close()
102 102 del self._delaybuf
103 103 # split when we're done
104 104 self.checkinlinesize(tr)
105 105
106 106 def _delayopener(self, name, mode='r'):
107 107 fp = self._realopener(name, mode)
108 108 # only divert the index
109 109 if not name == self.indexfile:
110 110 return fp
111 111 # if we're doing an initial clone, divert to another file
112 112 if self._delaycount == 0:
113 113 self._delayname = fp.name
114 114 return self._realopener(name + ".a", mode)
115 115 # otherwise, divert to memory
116 116 return appender(fp, self._delaybuf)
117 117
118 118 def checkinlinesize(self, tr, fp=None):
119 119 if self.opener == self._delayopener:
120 120 return
121 121 return revlog.checkinlinesize(self, tr, fp)
122 122
123 123 def decode_extra(self, text):
124 124 extra = {}
125 125 for l in text.split('\0'):
126 126 if not l:
127 127 continue
128 128 k, v = _string_unescape(l).split(':', 1)
129 129 extra[k] = v
130 130 return extra
131 131
132 132 def encode_extra(self, d):
133 133 # keys must be sorted to produce a deterministic changelog entry
134 134 keys = d.keys()
135 135 keys.sort()
136 136 items = [_string_escape('%s:%s' % (k, d[k])) for k in keys]
137 137 return "\0".join(items)
138 138
139 139 def extract(self, text):
140 140 """
141 141 format used:
142 142 nodeid\n : manifest node in ascii
143 143 user\n : user, no \n or \r allowed
144 144 time tz extra\n : date (time is int or float, timezone is int)
145 145 : extra is metadatas, encoded and separated by '\0'
146 146 : older versions ignore it
147 147 files\n\n : files modified by the cset, no \n or \r allowed
148 148 (.*) : comment (free text, ideally utf-8)
149 149
150 150 changelog v0 doesn't use extra
151 151 """
152 152 if not text:
153 153 return (nullid, "", (0, 0), [], "", {'branch': 'default'})
154 154 last = text.index("\n\n")
155 155 desc = util.tolocal(text[last + 2:])
156 156 l = text[:last].split('\n')
157 157 manifest = bin(l[0])
158 158 user = util.tolocal(l[1])
159 159
160 160 extra_data = l[2].split(' ', 2)
161 161 if len(extra_data) != 3:
162 162 time = float(extra_data.pop(0))
163 163 try:
164 164 # various tools did silly things with the time zone field.
165 165 timezone = int(extra_data[0])
166 166 except:
167 167 timezone = 0
168 168 extra = {}
169 169 else:
170 170 time, timezone, extra = extra_data
171 171 time, timezone = float(time), int(timezone)
172 172 extra = self.decode_extra(extra)
173 173 if not extra.get('branch'):
174 174 extra['branch'] = 'default'
175 175 files = l[3:]
176 176 return (manifest, user, (time, timezone), files, desc, extra)
177 177
178 178 def read(self, node):
179 179 return self.extract(self.revision(node))
180 180
181 181 def add(self, manifest, list, desc, transaction, p1=None, p2=None,
182 182 user=None, date=None, extra={}):
183 183
184 184 user, desc = util.fromlocal(user), util.fromlocal(desc)
185 185
186 186 if date:
187 187 parseddate = "%d %d" % util.parsedate(date)
188 188 else:
189 189 parseddate = "%d %d" % util.makedate()
190 190 if extra and extra.get("branch") in ("default", ""):
191 191 del extra["branch"]
192 192 if extra:
193 193 extra = self.encode_extra(extra)
194 194 parseddate = "%s %s" % (parseddate, extra)
195 195 list.sort()
196 196 l = [hex(manifest), user, parseddate] + list + ["", desc]
197 197 text = "\n".join(l)
198 198 return self.addrevision(text, transaction, self.count(), p1, p2)
@@ -1,1291 +1,1291 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22 _sha = sha.new
23 23
24 24 # revlog flags
25 25 REVLOGV0 = 0
26 26 REVLOGNG = 1
27 27 REVLOGNGINLINEDATA = (1 << 16)
28 28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 31
32 32 class RevlogError(Exception):
33 33 pass
34 34 class LookupError(RevlogError):
35 35 pass
36 36
37 37 def getoffset(q):
38 38 return int(q >> 16)
39 39
40 40 def gettype(q):
41 41 return int(q & 0xFFFF)
42 42
43 43 def offset_type(offset, type):
44 44 return long(long(offset) << 16 | type)
45 45
46 46 def hash(text, p1, p2):
47 47 """generate a hash from the given text and its parent hashes
48 48
49 49 This hash combines both the current file contents and its history
50 50 in a manner that makes it easy to distinguish nodes with the same
51 51 content in the revision graph.
52 52 """
53 53 l = [p1, p2]
54 54 l.sort()
55 55 s = _sha(l[0])
56 56 s.update(l[1])
57 57 s.update(text)
58 58 return s.digest()
59 59
60 60 def compress(text):
61 61 """ generate a possibly-compressed representation of text """
62 62 if not text:
63 63 return ("", text)
64 64 if len(text) < 44:
65 65 if text[0] == '\0':
66 66 return ("", text)
67 67 return ('u', text)
68 68 bin = _compress(text)
69 69 if len(bin) > len(text):
70 70 if text[0] == '\0':
71 71 return ("", text)
72 72 return ('u', text)
73 73 return ("", bin)
74 74
75 75 def decompress(bin):
76 76 """ decompress the given input """
77 77 if not bin:
78 78 return bin
79 79 t = bin[0]
80 80 if t == '\0':
81 81 return bin
82 82 if t == 'x':
83 83 return _decompress(bin)
84 84 if t == 'u':
85 85 return bin[1:]
86 86 raise RevlogError(_("unknown compression type %r") % t)
87 87
88 88 class lazyparser(object):
89 89 """
90 90 this class avoids the need to parse the entirety of large indices
91 91 """
92 92
93 93 # lazyparser is not safe to use on windows if win32 extensions not
94 94 # available. it keeps file handle open, which make it not possible
95 95 # to break hardlinks on local cloned repos.
96 96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
97 97 hasattr(util, 'win32api'))
98 98
99 99 def __init__(self, dataf, size):
100 100 self.dataf = dataf
101 101 self.s = struct.calcsize(indexformatng)
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: nullrev}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109
110 110 def loadmap(self):
111 111 """
112 112 during a commit, we need to make sure the rev being added is
113 113 not a duplicate. This requires loading the entire index,
114 114 which is fairly slow. loadmap can load up just the node map,
115 115 which takes much less time.
116 116 """
117 117 if self.allmap:
118 118 return
119 119 end = self.datasize
120 120 self.allmap = 1
121 121 cur = 0
122 122 count = 0
123 123 blocksize = self.s * 256
124 124 self.dataf.seek(0)
125 125 while cur < end:
126 126 data = self.dataf.read(blocksize)
127 127 off = 0
128 128 for x in xrange(256):
129 129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
130 130 self.map[n] = count
131 131 count += 1
132 132 if count >= self.l:
133 133 break
134 134 off += self.s
135 135 cur += blocksize
136 136
137 137 def loadblock(self, blockstart, blocksize, data=None):
138 138 if self.all:
139 139 return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 if blockstart + blocksize > self.datasize:
143 143 # the revlog may have grown since we've started running,
144 144 # but we don't have space in self.index for more entries.
145 145 # limit blocksize so that we don't get too much data.
146 146 blocksize = max(self.datasize - blockstart, 0)
147 147 data = self.dataf.read(blocksize)
148 148 lend = len(data) / self.s
149 149 i = blockstart / self.s
150 150 off = 0
151 151 # lazyindex supports __delitem__
152 152 if lend > len(self.index) - i:
153 153 lend = len(self.index) - i
154 154 for x in xrange(lend):
155 155 if self.index[i + x] == None:
156 156 b = data[off : off + self.s]
157 157 self.index[i + x] = b
158 158 n = b[ngshaoffset:ngshaoffset + 20]
159 159 self.map[n] = i + x
160 160 off += self.s
161 161
162 162 def findnode(self, node):
163 163 """search backwards through the index file for a specific node"""
164 164 if self.allmap:
165 165 return None
166 166
167 167 # hg log will cause many many searches for the manifest
168 168 # nodes. After we get called a few times, just load the whole
169 169 # thing.
170 170 if self.mapfind_count > 8:
171 171 self.loadmap()
172 172 if node in self.map:
173 173 return node
174 174 return None
175 175 self.mapfind_count += 1
176 176 last = self.l - 1
177 177 while self.index[last] != None:
178 178 if last == 0:
179 179 self.all = 1
180 180 self.allmap = 1
181 181 return None
182 182 last -= 1
183 183 end = (last + 1) * self.s
184 184 blocksize = self.s * 256
185 185 while end >= 0:
186 186 start = max(end - blocksize, 0)
187 187 self.dataf.seek(start)
188 188 data = self.dataf.read(end - start)
189 189 findend = end - start
190 190 while True:
191 191 # we're searching backwards, so we have to make sure
192 192 # we don't find a changeset where this node is a parent
193 193 off = data.find(node, 0, findend)
194 194 findend = off
195 195 if off >= 0:
196 196 i = off / self.s
197 197 off = i * self.s
198 198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
199 199 if n == node:
200 200 self.map[n] = i + start / self.s
201 201 return node
202 202 else:
203 203 break
204 204 end -= blocksize
205 205 return None
206 206
207 207 def loadindex(self, i=None, end=None):
208 208 if self.all:
209 209 return
210 210 all = False
211 211 if i == None:
212 212 blockstart = 0
213 213 blocksize = (65536 / self.s) * self.s
214 214 end = self.datasize
215 215 all = True
216 216 else:
217 217 if end:
218 218 blockstart = i * self.s
219 219 end = end * self.s
220 220 blocksize = end - blockstart
221 221 else:
222 222 blockstart = (i & ~1023) * self.s
223 223 blocksize = self.s * 1024
224 224 end = blockstart + blocksize
225 225 while blockstart < end:
226 226 self.loadblock(blockstart, blocksize)
227 227 blockstart += blocksize
228 228 if all:
229 229 self.all = True
230 230
231 231 class lazyindex(object):
232 232 """a lazy version of the index array"""
233 233 def __init__(self, parser):
234 234 self.p = parser
235 235 def __len__(self):
236 236 return len(self.p.index)
237 237 def load(self, pos):
238 238 if pos < 0:
239 239 pos += len(self.p.index)
240 240 self.p.loadindex(pos)
241 241 return self.p.index[pos]
242 242 def __getitem__(self, pos):
243 243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
244 244 def __setitem__(self, pos, item):
245 245 self.p.index[pos] = _pack(indexformatng, *item)
246 246 def __delitem__(self, pos):
247 247 del self.p.index[pos]
248 248 def insert(self, pos, e):
249 249 self.p.index.insert(pos, _pack(indexformatng, *e))
250 250 def append(self, e):
251 251 self.p.index.append(_pack(indexformatng, *e))
252 252
253 253 class lazymap(object):
254 254 """a lazy version of the node map"""
255 255 def __init__(self, parser):
256 256 self.p = parser
257 257 def load(self, key):
258 258 n = self.p.findnode(key)
259 259 if n == None:
260 260 raise KeyError(key)
261 261 def __contains__(self, key):
262 262 if key in self.p.map:
263 263 return True
264 264 self.p.loadmap()
265 265 return key in self.p.map
266 266 def __iter__(self):
267 267 yield nullid
268 268 for i in xrange(self.p.l):
269 269 ret = self.p.index[i]
270 270 if not ret:
271 271 self.p.loadindex(i)
272 272 ret = self.p.index[i]
273 273 if isinstance(ret, str):
274 274 ret = _unpack(indexformatng, ret)
275 275 yield ret[7]
276 276 def __getitem__(self, key):
277 277 try:
278 278 return self.p.map[key]
279 279 except KeyError:
280 280 try:
281 281 self.load(key)
282 282 return self.p.map[key]
283 283 except KeyError:
284 284 raise KeyError("node " + hex(key))
285 285 def __setitem__(self, key, val):
286 286 self.p.map[key] = val
287 287 def __delitem__(self, key):
288 288 del self.p.map[key]
289 289
290 290 indexformatv0 = ">4l20s20s20s"
291 291 v0shaoffset = 56
292 292
293 293 class revlogoldio(object):
294 294 def __init__(self):
295 295 self.size = struct.calcsize(indexformatv0)
296 296
297 297 def parseindex(self, fp, inline):
298 298 s = self.size
299 299 index = []
300 300 nodemap = {nullid: nullrev}
301 301 n = off = 0
302 302 data = fp.read()
303 303 l = len(data)
304 304 while off + s <= l:
305 305 cur = data[off:off + s]
306 306 off += s
307 307 e = _unpack(indexformatv0, cur)
308 308 # transform to revlogv1 format
309 309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 310 nodemap[e[4]], nodemap[e[5]], e[6])
311 311 index.append(e2)
312 312 nodemap[e[6]] = n
313 313 n += 1
314 314
315 315 return index, nodemap, None
316 316
317 317 def packentry(self, entry, node, version, rev):
318 318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
319 319 node(entry[5]), node(entry[6]), entry[7])
320 320 return _pack(indexformatv0, *e2)
321 321
322 322 # index ng:
323 323 # 6 bytes offset
324 324 # 2 bytes flags
325 325 # 4 bytes compressed length
326 326 # 4 bytes uncompressed length
327 327 # 4 bytes: base rev
328 328 # 4 bytes link rev
329 329 # 4 bytes parent 1 rev
330 330 # 4 bytes parent 2 rev
331 331 # 32 bytes: nodeid
332 332 indexformatng = ">Qiiiiii20s12x"
333 333 ngshaoffset = 32
334 334 versionformat = ">I"
335 335
336 336 class revlogio(object):
337 337 def __init__(self):
338 338 self.size = struct.calcsize(indexformatng)
339 339
340 340 def parseindex(self, fp, inline):
341 341 try:
342 342 size = util.fstat(fp).st_size
343 343 except AttributeError:
344 344 size = 0
345 345
346 346 if lazyparser.safe_to_use and not inline and size > 1000000:
347 347 # big index, let's parse it on demand
348 348 parser = lazyparser(fp, size)
349 349 index = lazyindex(parser)
350 350 nodemap = lazymap(parser)
351 351 e = list(index[0])
352 352 type = gettype(e[0])
353 353 e[0] = offset_type(0, type)
354 354 index[0] = e
355 355 return index, nodemap, None
356 356
357 357 s = self.size
358 358 cache = None
359 359 index = []
360 360 nodemap = {nullid: nullrev}
361 361 n = off = 0
362 362 # if we're not using lazymap, always read the whole index
363 363 data = fp.read()
364 364 l = len(data) - s
365 365 append = index.append
366 366 if inline:
367 367 cache = (0, data)
368 368 while off <= l:
369 369 e = _unpack(indexformatng, data[off:off + s])
370 370 nodemap[e[7]] = n
371 371 append(e)
372 372 n += 1
373 373 if e[1] < 0:
374 374 break
375 375 off += e[1] + s
376 376 else:
377 377 while off <= l:
378 378 e = _unpack(indexformatng, data[off:off + s])
379 379 nodemap[e[7]] = n
380 380 append(e)
381 381 n += 1
382 382 off += s
383 383
384 384 e = list(index[0])
385 385 type = gettype(e[0])
386 386 e[0] = offset_type(0, type)
387 387 index[0] = e
388 388
389 389 return index, nodemap, cache
390 390
391 391 def packentry(self, entry, node, version, rev):
392 392 p = _pack(indexformatng, *entry)
393 393 if rev == 0:
394 394 p = _pack(versionformat, version) + p[4:]
395 395 return p
396 396
397 397 class revlog(object):
398 398 """
399 399 the underlying revision storage object
400 400
401 401 A revlog consists of two parts, an index and the revision data.
402 402
403 403 The index is a file with a fixed record size containing
404 404 information on each revision, includings its nodeid (hash), the
405 405 nodeids of its parents, the position and offset of its data within
406 406 the data file, and the revision it's based on. Finally, each entry
407 407 contains a linkrev entry that can serve as a pointer to external
408 408 data.
409 409
410 410 The revision data itself is a linear collection of data chunks.
411 411 Each chunk represents a revision and is usually represented as a
412 412 delta against the previous chunk. To bound lookup time, runs of
413 413 deltas are limited to about 2 times the length of the original
414 414 version data. This makes retrieval of a version proportional to
415 415 its size, or O(1) relative to the number of revisions.
416 416
417 417 Both pieces of the revlog are written to in an append-only
418 418 fashion, which means we never need to rewrite a file to insert or
419 419 remove data, and can use some simple techniques to avoid the need
420 420 for locking while reading.
421 421 """
422 422 def __init__(self, opener, indexfile):
423 423 """
424 424 create a revlog object
425 425
426 426 opener is a function that abstracts the file opening operation
427 427 and can be used to implement COW semantics or the like.
428 428 """
429 429 self.indexfile = indexfile
430 430 self.datafile = indexfile[:-2] + ".d"
431 431 self.opener = opener
432 432 self._cache = None
433 433 self._chunkcache = None
434 434 self.nodemap = {nullid: nullrev}
435 435 self.index = []
436 436
437 437 v = REVLOG_DEFAULT_VERSION
438 438 if hasattr(opener, "defversion"):
439 439 v = opener.defversion
440 440 if v & REVLOGNG:
441 441 v |= REVLOGNGINLINEDATA
442 442
443 443 i = ""
444 444 try:
445 445 f = self.opener(self.indexfile)
446 446 i = f.read(4)
447 447 f.seek(0)
448 448 if len(i) > 0:
449 449 v = struct.unpack(versionformat, i)[0]
450 450 except IOError, inst:
451 451 if inst.errno != errno.ENOENT:
452 452 raise
453 453
454 454 self.version = v
455 455 self._inline = v & REVLOGNGINLINEDATA
456 456 flags = v & ~0xFFFF
457 457 fmt = v & 0xFFFF
458 458 if fmt == REVLOGV0 and flags:
459 459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 460 % (self.indexfile, flags >> 16))
461 461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 463 % (self.indexfile, flags >> 16))
464 464 elif fmt > REVLOGNG:
465 465 raise RevlogError(_("index %s unknown format %d")
466 466 % (self.indexfile, fmt))
467 467
468 468 self._io = revlogio()
469 469 if self.version == REVLOGV0:
470 470 self._io = revlogoldio()
471 471 if i:
472 472 d = self._io.parseindex(f, self._inline)
473 473 self.index, self.nodemap, self._chunkcache = d
474 474
475 475 # add the magic null revision at -1
476 476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477 477
478 478 def _loadindex(self, start, end):
479 479 """load a block of indexes all at once from the lazy parser"""
480 480 if isinstance(self.index, lazyindex):
481 481 self.index.p.loadindex(start, end)
482 482
483 483 def _loadindexmap(self):
484 484 """loads both the map and the index from the lazy parser"""
485 485 if isinstance(self.index, lazyindex):
486 486 p = self.index.p
487 487 p.loadindex()
488 488 self.nodemap = p.map
489 489
490 490 def _loadmap(self):
491 491 """loads the map from the lazy parser"""
492 492 if isinstance(self.nodemap, lazymap):
493 493 self.nodemap.p.loadmap()
494 494 self.nodemap = self.nodemap.p.map
495 495
496 496 def tip(self):
497 497 return self.node(len(self.index) - 2)
498 498 def count(self):
499 499 return len(self.index) - 1
500 500
501 501 def rev(self, node):
502 502 try:
503 503 return self.nodemap[node]
504 504 except KeyError:
505 505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 506 def node(self, rev):
507 507 return self.index[rev][7]
508 508 def linkrev(self, node):
509 509 return self.index[self.rev(node)][4]
510 510 def parents(self, node):
511 511 d = self.index[self.rev(node)][5:7]
512 512 return (self.node(d[0]), self.node(d[1]))
513 513 def parentrevs(self, rev):
514 514 return self.index[rev][5:7]
515 515 def start(self, rev):
516 516 return int(self.index[rev][0] >> 16)
517 517 def end(self, rev):
518 518 return self.start(rev) + self.length(rev)
519 519 def length(self, rev):
520 520 return self.index[rev][1]
521 521 def base(self, rev):
522 522 return self.index[rev][3]
523 523
524 524 def size(self, rev):
525 525 """return the length of the uncompressed text for a given revision"""
526 526 l = self.index[rev][2]
527 527 if l >= 0:
528 528 return l
529 529
530 530 t = self.revision(self.node(rev))
531 531 return len(t)
532 532
533 533 # alternate implementation, The advantage to this code is it
534 534 # will be faster for a single revision. But, the results are not
535 535 # cached, so finding the size of every revision will be slower.
536 536 """
537 537 if self.cache and self.cache[1] == rev:
538 538 return len(self.cache[2])
539 539
540 540 base = self.base(rev)
541 541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 542 base = self.cache[1]
543 543 text = self.cache[2]
544 544 else:
545 545 text = self.revision(self.node(base))
546 546
547 547 l = len(text)
548 548 for x in xrange(base + 1, rev + 1):
549 549 l = mdiff.patchedsize(l, self.chunk(x))
550 550 return l
551 551 """
552 552
553 553 def reachable(self, node, stop=None):
554 554 """return a hash of all nodes ancestral to a given node, including
555 555 the node itself, stopping when stop is matched"""
556 556 reachable = {}
557 557 visit = [node]
558 558 reachable[node] = 1
559 559 if stop:
560 560 stopn = self.rev(stop)
561 561 else:
562 562 stopn = 0
563 563 while visit:
564 564 n = visit.pop(0)
565 565 if n == stop:
566 566 continue
567 567 if n == nullid:
568 568 continue
569 569 for p in self.parents(n):
570 570 if self.rev(p) < stopn:
571 571 continue
572 572 if p not in reachable:
573 573 reachable[p] = 1
574 574 visit.append(p)
575 575 return reachable
576 576
577 577 def nodesbetween(self, roots=None, heads=None):
578 578 """Return a tuple containing three elements. Elements 1 and 2 contain
579 579 a final list bases and heads after all the unreachable ones have been
580 580 pruned. Element 0 contains a topologically sorted list of all
581 581
582 582 nodes that satisfy these constraints:
583 583 1. All nodes must be descended from a node in roots (the nodes on
584 584 roots are considered descended from themselves).
585 585 2. All nodes must also be ancestors of a node in heads (the nodes in
586 586 heads are considered to be their own ancestors).
587 587
588 588 If roots is unspecified, nullid is assumed as the only root.
589 589 If heads is unspecified, it is taken to be the output of the
590 590 heads method (i.e. a list of all nodes in the repository that
591 591 have no children)."""
592 592 nonodes = ([], [], [])
593 593 if roots is not None:
594 594 roots = list(roots)
595 595 if not roots:
596 596 return nonodes
597 597 lowestrev = min([self.rev(n) for n in roots])
598 598 else:
599 599 roots = [nullid] # Everybody's a descendent of nullid
600 600 lowestrev = nullrev
601 601 if (lowestrev == nullrev) and (heads is None):
602 602 # We want _all_ the nodes!
603 603 return ([self.node(r) for r in xrange(0, self.count())],
604 604 [nullid], list(self.heads()))
605 605 if heads is None:
606 606 # All nodes are ancestors, so the latest ancestor is the last
607 607 # node.
608 608 highestrev = self.count() - 1
609 609 # Set ancestors to None to signal that every node is an ancestor.
610 610 ancestors = None
611 611 # Set heads to an empty dictionary for later discovery of heads
612 612 heads = {}
613 613 else:
614 614 heads = list(heads)
615 615 if not heads:
616 616 return nonodes
617 617 ancestors = {}
618 618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 619 # Also, later we will be using it to filter out the heads we can't
620 620 # find from roots.
621 621 heads = dict.fromkeys(heads, 0)
622 622 # Start at the top and keep marking parents until we're done.
623 623 nodestotag = heads.keys()
624 624 # Remember where the top was so we can use it as a limit later.
625 625 highestrev = max([self.rev(n) for n in nodestotag])
626 626 while nodestotag:
627 627 # grab a node to tag
628 628 n = nodestotag.pop()
629 629 # Never tag nullid
630 630 if n == nullid:
631 631 continue
632 632 # A node's revision number represents its place in a
633 633 # topologically sorted list of nodes.
634 634 r = self.rev(n)
635 635 if r >= lowestrev:
636 636 if n not in ancestors:
637 637 # If we are possibly a descendent of one of the roots
638 638 # and we haven't already been marked as an ancestor
639 639 ancestors[n] = 1 # Mark as ancestor
640 640 # Add non-nullid parents to list of nodes to tag.
641 641 nodestotag.extend([p for p in self.parents(n) if
642 642 p != nullid])
643 643 elif n in heads: # We've seen it before, is it a fake head?
644 644 # So it is, real heads should not be the ancestors of
645 645 # any other heads.
646 646 heads.pop(n)
647 647 if not ancestors:
648 648 return nonodes
649 649 # Now that we have our set of ancestors, we want to remove any
650 650 # roots that are not ancestors.
651 651
652 652 # If one of the roots was nullid, everything is included anyway.
653 653 if lowestrev > nullrev:
654 654 # But, since we weren't, let's recompute the lowest rev to not
655 655 # include roots that aren't ancestors.
656 656
657 657 # Filter out roots that aren't ancestors of heads
658 658 roots = [n for n in roots if n in ancestors]
659 659 # Recompute the lowest revision
660 660 if roots:
661 661 lowestrev = min([self.rev(n) for n in roots])
662 662 else:
663 663 # No more roots? Return empty list
664 664 return nonodes
665 665 else:
666 666 # We are descending from nullid, and don't need to care about
667 667 # any other roots.
668 668 lowestrev = nullrev
669 669 roots = [nullid]
670 670 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 671 # values don't matter.
672 672 descendents = dict.fromkeys(roots, 1)
673 673 # Also, keep the original roots so we can filter out roots that aren't
674 674 # 'real' roots (i.e. are descended from other roots).
675 675 roots = descendents.copy()
676 676 # Our topologically sorted list of output nodes.
677 677 orderedout = []
678 678 # Don't start at nullid since we don't want nullid in our output list,
679 679 # and if nullid shows up in descedents, empty parents will look like
680 680 # they're descendents.
681 681 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 682 n = self.node(r)
683 683 isdescendent = False
684 684 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 685 isdescendent = True
686 686 elif n in descendents:
687 687 # n is already a descendent
688 688 isdescendent = True
689 689 # This check only needs to be done here because all the roots
690 690 # will start being marked is descendents before the loop.
691 691 if n in roots:
692 692 # If n was a root, check if it's a 'real' root.
693 693 p = tuple(self.parents(n))
694 694 # If any of its parents are descendents, it's not a root.
695 695 if (p[0] in descendents) or (p[1] in descendents):
696 696 roots.pop(n)
697 697 else:
698 698 p = tuple(self.parents(n))
699 699 # A node is a descendent if either of its parents are
700 700 # descendents. (We seeded the dependents list with the roots
701 701 # up there, remember?)
702 702 if (p[0] in descendents) or (p[1] in descendents):
703 703 descendents[n] = 1
704 704 isdescendent = True
705 705 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 706 # Only include nodes that are both descendents and ancestors.
707 707 orderedout.append(n)
708 708 if (ancestors is not None) and (n in heads):
709 709 # We're trying to figure out which heads are reachable
710 710 # from roots.
711 711 # Mark this head as having been reached
712 712 heads[n] = 1
713 713 elif ancestors is None:
714 714 # Otherwise, we're trying to discover the heads.
715 715 # Assume this is a head because if it isn't, the next step
716 716 # will eventually remove it.
717 717 heads[n] = 1
718 718 # But, obviously its parents aren't.
719 719 for p in self.parents(n):
720 720 heads.pop(p, None)
721 721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 722 roots = roots.keys()
723 723 assert orderedout
724 724 assert roots
725 725 assert heads
726 726 return (orderedout, roots, heads)
727 727
728 728 def heads(self, start=None, stop=None):
729 729 """return the list of all nodes that have no children
730 730
731 731 if start is specified, only heads that are descendants of
732 732 start will be returned
733 733 if stop is specified, it will consider all the revs from stop
734 734 as if they had no children
735 735 """
736 736 if start is None and stop is None:
737 737 count = self.count()
738 738 if not count:
739 739 return [nullid]
740 740 ishead = [1] * (count + 1)
741 741 index = self.index
742 742 for r in xrange(count):
743 743 e = index[r]
744 744 ishead[e[5]] = ishead[e[6]] = 0
745 745 return [self.node(r) for r in xrange(count) if ishead[r]]
746 746
747 747 if start is None:
748 748 start = nullid
749 749 if stop is None:
750 750 stop = []
751 751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
752 752 startrev = self.rev(start)
753 753 reachable = {startrev: 1}
754 754 heads = {startrev: 1}
755 755
756 756 parentrevs = self.parentrevs
757 757 for r in xrange(startrev + 1, self.count()):
758 758 for p in parentrevs(r):
759 759 if p in reachable:
760 760 if r not in stoprevs:
761 761 reachable[r] = 1
762 762 heads[r] = 1
763 763 if p in heads and p not in stoprevs:
764 764 del heads[p]
765 765
766 766 return [self.node(r) for r in heads]
767 767
768 768 def children(self, node):
769 769 """find the children of a given node"""
770 770 c = []
771 771 p = self.rev(node)
772 772 for r in range(p + 1, self.count()):
773 773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
774 774 if prevs:
775 775 for pr in prevs:
776 776 if pr == p:
777 777 c.append(self.node(r))
778 778 elif p == nullrev:
779 779 c.append(self.node(r))
780 780 return c
781 781
782 782 def _match(self, id):
783 783 if isinstance(id, (long, int)):
784 784 # rev
785 785 return self.node(id)
786 786 if len(id) == 20:
787 787 # possibly a binary node
788 788 # odds of a binary node being all hex in ASCII are 1 in 10**25
789 789 try:
790 790 node = id
791 791 r = self.rev(node) # quick search the index
792 792 return node
793 793 except LookupError:
794 794 pass # may be partial hex id
795 795 try:
796 796 # str(rev)
797 797 rev = int(id)
798 798 if str(rev) != id:
799 799 raise ValueError
800 800 if rev < 0:
801 801 rev = self.count() + rev
802 802 if rev < 0 or rev >= self.count():
803 803 raise ValueError
804 804 return self.node(rev)
805 805 except (ValueError, OverflowError):
806 806 pass
807 807 if len(id) == 40:
808 808 try:
809 809 # a full hex nodeid?
810 810 node = bin(id)
811 811 r = self.rev(node)
812 812 return node
813 813 except TypeError:
814 814 pass
815 815
816 816 def _partialmatch(self, id):
817 817 if len(id) < 40:
818 818 try:
819 819 # hex(node)[:...]
820 820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
821 821 node = None
822 822 for n in self.nodemap:
823 823 if n.startswith(bin_id) and hex(n).startswith(id):
824 824 if node is not None:
825 825 raise LookupError(_("Ambiguous identifier"))
826 826 node = n
827 827 if node is not None:
828 828 return node
829 829 except TypeError:
830 830 pass
831 831
832 832 def lookup(self, id):
833 833 """locate a node based on:
834 834 - revision number or str(revision number)
835 835 - nodeid or subset of hex nodeid
836 836 """
837 837 n = self._match(id)
838 838 if n is not None:
839 839 return n
840 840 n = self._partialmatch(id)
841 841 if n:
842 842 return n
843 843
844 844 raise LookupError(_("No match found"))
845 845
846 846 def cmp(self, node, text):
847 847 """compare text with a given file revision"""
848 848 p1, p2 = self.parents(node)
849 849 return hash(text, p1, p2) != node
850 850
851 851 def chunk(self, rev, df=None):
852 852 def loadcache(df):
853 853 if not df:
854 854 if self._inline:
855 855 df = self.opener(self.indexfile)
856 856 else:
857 857 df = self.opener(self.datafile)
858 858 df.seek(start)
859 859 self._chunkcache = (start, df.read(cache_length))
860 860
861 861 start, length = self.start(rev), self.length(rev)
862 862 if self._inline:
863 863 start += (rev + 1) * self._io.size
864 864 end = start + length
865 865
866 866 offset = 0
867 867 if not self._chunkcache:
868 868 cache_length = max(65536, length)
869 869 loadcache(df)
870 870 else:
871 871 cache_start = self._chunkcache[0]
872 872 cache_length = len(self._chunkcache[1])
873 873 cache_end = cache_start + cache_length
874 874 if start >= cache_start and end <= cache_end:
875 875 # it is cached
876 876 offset = start - cache_start
877 877 else:
878 878 cache_length = max(65536, length)
879 879 loadcache(df)
880 880
881 881 # avoid copying large chunks
882 882 c = self._chunkcache[1]
883 883 if cache_length != length:
884 884 c = c[offset:offset + length]
885 885
886 886 return decompress(c)
887 887
888 888 def delta(self, node):
889 889 """return or calculate a delta between a node and its predecessor"""
890 890 r = self.rev(node)
891 891 return self.revdiff(r - 1, r)
892 892
893 893 def revdiff(self, rev1, rev2):
894 894 """return or calculate a delta between two revisions"""
895 895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
896 896 return self.chunk(rev2)
897 897
898 898 return mdiff.textdiff(self.revision(self.node(rev1)),
899 899 self.revision(self.node(rev2)))
900 900
901 901 def revision(self, node):
902 902 """return an uncompressed revision of a given"""
903 903 if node == nullid:
904 904 return ""
905 905 if self._cache and self._cache[0] == node:
906 return self._cache[2]
906 return str(self._cache[2])
907 907
908 908 # look up what we need to read
909 909 text = None
910 910 rev = self.rev(node)
911 911 base = self.base(rev)
912 912
913 913 # check rev flags
914 914 if self.index[rev][0] & 0xFFFF:
915 915 raise RevlogError(_('incompatible revision flag %x') %
916 916 (self.index[rev][0] & 0xFFFF))
917 917
918 918 if self._inline:
919 919 # we probably have the whole chunk cached
920 920 df = None
921 921 else:
922 922 df = self.opener(self.datafile)
923 923
924 924 # do we have useful data cached?
925 925 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
926 926 base = self._cache[1]
927 text = self._cache[2]
927 text = str(self._cache[2])
928 928 self._loadindex(base, rev + 1)
929 929 else:
930 930 self._loadindex(base, rev + 1)
931 931 text = self.chunk(base, df=df)
932 932
933 933 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
934 934 text = mdiff.patches(text, bins)
935 935 p1, p2 = self.parents(node)
936 936 if node != hash(text, p1, p2):
937 937 raise RevlogError(_("integrity check failed on %s:%d")
938 938 % (self.datafile, rev))
939 939
940 940 self._cache = (node, rev, text)
941 941 return text
942 942
943 943 def checkinlinesize(self, tr, fp=None):
944 944 if not self._inline:
945 945 return
946 946 if not fp:
947 947 fp = self.opener(self.indexfile, 'r')
948 948 fp.seek(0, 2)
949 949 size = fp.tell()
950 950 if size < 131072:
951 951 return
952 952 trinfo = tr.find(self.indexfile)
953 953 if trinfo == None:
954 954 raise RevlogError(_("%s not found in the transaction")
955 955 % self.indexfile)
956 956
957 957 trindex = trinfo[2]
958 958 dataoff = self.start(trindex)
959 959
960 960 tr.add(self.datafile, dataoff)
961 961 df = self.opener(self.datafile, 'w')
962 962 calc = self._io.size
963 963 for r in xrange(self.count()):
964 964 start = self.start(r) + (r + 1) * calc
965 965 length = self.length(r)
966 966 fp.seek(start)
967 967 d = fp.read(length)
968 968 df.write(d)
969 969 fp.close()
970 970 df.close()
971 971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
972 972 self.version &= ~(REVLOGNGINLINEDATA)
973 973 self._inline = False
974 974 for i in xrange(self.count()):
975 975 e = self._io.packentry(self.index[i], self.node, self.version, i)
976 976 fp.write(e)
977 977
978 978 # if we don't call rename, the temp file will never replace the
979 979 # real index
980 980 fp.rename()
981 981
982 982 tr.replace(self.indexfile, trindex * calc)
983 983 self._chunkcache = None
984 984
985 985 def addrevision(self, text, transaction, link, p1, p2, d=None):
986 986 """add a revision to the log
987 987
988 988 text - the revision data to add
989 989 transaction - the transaction object used for rollback
990 990 link - the linkrev data to add
991 991 p1, p2 - the parent nodeids of the revision
992 992 d - an optional precomputed delta
993 993 """
994 994 dfh = None
995 995 if not self._inline:
996 996 dfh = self.opener(self.datafile, "a")
997 997 ifh = self.opener(self.indexfile, "a+")
998 998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
999 999
1000 1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1001 1001 node = hash(text, p1, p2)
1002 1002 if node in self.nodemap:
1003 1003 return node
1004 1004
1005 1005 curr = self.count()
1006 1006 prev = curr - 1
1007 1007 base = self.base(prev)
1008 1008 offset = self.end(prev)
1009 1009
1010 1010 if curr:
1011 1011 if not d:
1012 1012 ptext = self.revision(self.node(prev))
1013 1013 d = mdiff.textdiff(ptext, text)
1014 1014 data = compress(d)
1015 1015 l = len(data[1]) + len(data[0])
1016 1016 dist = l + offset - self.start(base)
1017 1017
1018 1018 # full versions are inserted when the needed deltas
1019 1019 # become comparable to the uncompressed text
1020 1020 if not curr or dist > len(text) * 2:
1021 1021 data = compress(text)
1022 1022 l = len(data[1]) + len(data[0])
1023 1023 base = curr
1024 1024
1025 1025 e = (offset_type(offset, 0), l, len(text),
1026 1026 base, link, self.rev(p1), self.rev(p2), node)
1027 1027 self.index.insert(-1, e)
1028 1028 self.nodemap[node] = curr
1029 1029
1030 1030 entry = self._io.packentry(e, self.node, self.version, curr)
1031 1031 if not self._inline:
1032 1032 transaction.add(self.datafile, offset)
1033 1033 transaction.add(self.indexfile, curr * len(entry))
1034 1034 if data[0]:
1035 1035 dfh.write(data[0])
1036 1036 dfh.write(data[1])
1037 1037 dfh.flush()
1038 1038 ifh.write(entry)
1039 1039 else:
1040 1040 offset += curr * self._io.size
1041 1041 transaction.add(self.indexfile, offset, curr)
1042 1042 ifh.write(entry)
1043 1043 ifh.write(data[0])
1044 1044 ifh.write(data[1])
1045 1045 self.checkinlinesize(transaction, ifh)
1046 1046
1047 1047 self._cache = (node, curr, text)
1048 1048 return node
1049 1049
1050 1050 def ancestor(self, a, b):
1051 1051 """calculate the least common ancestor of nodes a and b"""
1052 1052
1053 1053 def parents(rev):
1054 1054 return [p for p in self.parentrevs(rev) if p != nullrev]
1055 1055
1056 1056 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1057 1057 if c is None:
1058 1058 return nullid
1059 1059
1060 1060 return self.node(c)
1061 1061
1062 1062 def group(self, nodelist, lookup, infocollect=None):
1063 1063 """calculate a delta group
1064 1064
1065 1065 Given a list of changeset revs, return a set of deltas and
1066 1066 metadata corresponding to nodes. the first delta is
1067 1067 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1068 1068 have this parent as it has all history before these
1069 1069 changesets. parent is parent[0]
1070 1070 """
1071 1071 revs = [self.rev(n) for n in nodelist]
1072 1072
1073 1073 # if we don't have any revisions touched by these changesets, bail
1074 1074 if not revs:
1075 1075 yield changegroup.closechunk()
1076 1076 return
1077 1077
1078 1078 # add the parent of the first rev
1079 1079 p = self.parents(self.node(revs[0]))[0]
1080 1080 revs.insert(0, self.rev(p))
1081 1081
1082 1082 # build deltas
1083 1083 for d in xrange(0, len(revs) - 1):
1084 1084 a, b = revs[d], revs[d + 1]
1085 1085 nb = self.node(b)
1086 1086
1087 1087 if infocollect is not None:
1088 1088 infocollect(nb)
1089 1089
1090 1090 p = self.parents(nb)
1091 1091 meta = nb + p[0] + p[1] + lookup(nb)
1092 1092 if a == -1:
1093 1093 d = self.revision(nb)
1094 1094 meta += mdiff.trivialdiffheader(len(d))
1095 1095 else:
1096 1096 d = self.revdiff(a, b)
1097 1097 yield changegroup.chunkheader(len(meta) + len(d))
1098 1098 yield meta
1099 1099 if len(d) > 2**20:
1100 1100 pos = 0
1101 1101 while pos < len(d):
1102 1102 pos2 = pos + 2 ** 18
1103 1103 yield d[pos:pos2]
1104 1104 pos = pos2
1105 1105 else:
1106 1106 yield d
1107 1107
1108 1108 yield changegroup.closechunk()
1109 1109
1110 1110 def addgroup(self, revs, linkmapper, transaction, unique=0):
1111 1111 """
1112 1112 add a delta group
1113 1113
1114 1114 given a set of deltas, add them to the revision log. the
1115 1115 first delta is against its parent, which should be in our
1116 1116 log, the rest are against the previous delta.
1117 1117 """
1118 1118
1119 1119 #track the base of the current delta log
1120 1120 r = self.count()
1121 1121 t = r - 1
1122 1122 node = None
1123 1123
1124 1124 base = prev = nullrev
1125 1125 start = end = textlen = 0
1126 1126 if r:
1127 1127 end = self.end(t)
1128 1128
1129 1129 ifh = self.opener(self.indexfile, "a+")
1130 1130 isize = r * self._io.size
1131 1131 if self._inline:
1132 1132 transaction.add(self.indexfile, end + isize, r)
1133 1133 dfh = None
1134 1134 else:
1135 1135 transaction.add(self.indexfile, isize, r)
1136 1136 transaction.add(self.datafile, end)
1137 1137 dfh = self.opener(self.datafile, "a")
1138 1138
1139 1139 # loop through our set of deltas
1140 1140 chain = None
1141 1141 for chunk in revs:
1142 1142 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1143 1143 link = linkmapper(cs)
1144 1144 if node in self.nodemap:
1145 1145 # this can happen if two branches make the same change
1146 1146 # if unique:
1147 1147 # raise RevlogError(_("already have %s") % hex(node[:4]))
1148 1148 chain = node
1149 1149 continue
1150 1150 delta = buffer(chunk, 80)
1151 1151 del chunk
1152 1152
1153 1153 for p in (p1, p2):
1154 1154 if not p in self.nodemap:
1155 1155 raise LookupError(_("unknown parent %s") % short(p))
1156 1156
1157 1157 if not chain:
1158 1158 # retrieve the parent revision of the delta chain
1159 1159 chain = p1
1160 1160 if not chain in self.nodemap:
1161 1161 raise LookupError(_("unknown base %s") % short(chain[:4]))
1162 1162
1163 1163 # full versions are inserted when the needed deltas become
1164 1164 # comparable to the uncompressed text or when the previous
1165 1165 # version is not the one we have a delta against. We use
1166 1166 # the size of the previous full rev as a proxy for the
1167 1167 # current size.
1168 1168
1169 1169 if chain == prev:
1170 1170 cdelta = compress(delta)
1171 1171 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1172 1172 textlen = mdiff.patchedsize(textlen, delta)
1173 1173
1174 1174 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1175 1175 # flush our writes here so we can read it in revision
1176 1176 if dfh:
1177 1177 dfh.flush()
1178 1178 ifh.flush()
1179 1179 text = self.revision(chain)
1180 1180 if len(text) == 0:
1181 1181 # skip over trivial delta header
1182 1182 text = buffer(delta, 12)
1183 1183 else:
1184 1184 text = mdiff.patches(text, [delta])
1185 1185 del delta
1186 1186 chk = self._addrevision(text, transaction, link, p1, p2, None,
1187 1187 ifh, dfh)
1188 1188 if not dfh and not self._inline:
1189 1189 # addrevision switched from inline to conventional
1190 1190 # reopen the index
1191 1191 dfh = self.opener(self.datafile, "a")
1192 1192 ifh = self.opener(self.indexfile, "a")
1193 1193 if chk != node:
1194 1194 raise RevlogError(_("consistency error adding group"))
1195 1195 textlen = len(text)
1196 1196 else:
1197 1197 e = (offset_type(end, 0), cdeltalen, textlen, base,
1198 1198 link, self.rev(p1), self.rev(p2), node)
1199 1199 self.index.insert(-1, e)
1200 1200 self.nodemap[node] = r
1201 1201 entry = self._io.packentry(e, self.node, self.version, r)
1202 1202 if self._inline:
1203 1203 ifh.write(entry)
1204 1204 ifh.write(cdelta[0])
1205 1205 ifh.write(cdelta[1])
1206 1206 self.checkinlinesize(transaction, ifh)
1207 1207 if not self._inline:
1208 1208 dfh = self.opener(self.datafile, "a")
1209 1209 ifh = self.opener(self.indexfile, "a")
1210 1210 else:
1211 1211 dfh.write(cdelta[0])
1212 1212 dfh.write(cdelta[1])
1213 1213 ifh.write(entry)
1214 1214
1215 1215 t, r, chain, prev = r, r + 1, node, node
1216 1216 base = self.base(t)
1217 1217 start = self.start(base)
1218 1218 end = self.end(t)
1219 1219
1220 1220 return node
1221 1221
1222 1222 def strip(self, rev, minlink):
1223 1223 if self.count() == 0 or rev >= self.count():
1224 1224 return
1225 1225
1226 1226 if isinstance(self.index, lazyindex):
1227 1227 self._loadindexmap()
1228 1228
1229 1229 # When stripping away a revision, we need to make sure it
1230 1230 # does not actually belong to an older changeset.
1231 1231 # The minlink parameter defines the oldest revision
1232 1232 # we're allowed to strip away.
1233 1233 while minlink > self.index[rev][4]:
1234 1234 rev += 1
1235 1235 if rev >= self.count():
1236 1236 return
1237 1237
1238 1238 # first truncate the files on disk
1239 1239 end = self.start(rev)
1240 1240 if not self._inline:
1241 1241 df = self.opener(self.datafile, "a")
1242 1242 df.truncate(end)
1243 1243 end = rev * self._io.size
1244 1244 else:
1245 1245 end += rev * self._io.size
1246 1246
1247 1247 indexf = self.opener(self.indexfile, "a")
1248 1248 indexf.truncate(end)
1249 1249
1250 1250 # then reset internal state in memory to forget those revisions
1251 1251 self._cache = None
1252 1252 self._chunkcache = None
1253 1253 for x in xrange(rev, self.count()):
1254 1254 del self.nodemap[self.node(x)]
1255 1255
1256 1256 del self.index[rev:-1]
1257 1257
1258 1258 def checksize(self):
1259 1259 expected = 0
1260 1260 if self.count():
1261 1261 expected = max(0, self.end(self.count() - 1))
1262 1262
1263 1263 try:
1264 1264 f = self.opener(self.datafile)
1265 1265 f.seek(0, 2)
1266 1266 actual = f.tell()
1267 1267 dd = actual - expected
1268 1268 except IOError, inst:
1269 1269 if inst.errno != errno.ENOENT:
1270 1270 raise
1271 1271 dd = 0
1272 1272
1273 1273 try:
1274 1274 f = self.opener(self.indexfile)
1275 1275 f.seek(0, 2)
1276 1276 actual = f.tell()
1277 1277 s = self._io.size
1278 1278 i = max(0, actual / s)
1279 1279 di = actual - (i * s)
1280 1280 if self._inline:
1281 1281 databytes = 0
1282 1282 for r in xrange(self.count()):
1283 1283 databytes += max(0, self.length(r))
1284 1284 dd = 0
1285 1285 di = actual - self.count() * s - databytes
1286 1286 except IOError, inst:
1287 1287 if inst.errno != errno.ENOENT:
1288 1288 raise
1289 1289 di = 0
1290 1290
1291 1291 return (dd, di)
@@ -1,1690 +1,1690 b''
1 1 """
2 2 util.py - Mercurial utility functions and platform specfic implementations
3 3
4 4 Copyright 2005 K. Thananchayan <thananck@yahoo.com>
5 5 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
6 6 Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
7 7
8 8 This software may be used and distributed according to the terms
9 9 of the GNU General Public License, incorporated herein by reference.
10 10
11 11 This contains helper routines that are independent of the SCM core and hide
12 12 platform-specific details from the core.
13 13 """
14 14
15 15 from i18n import _
16 16 import cStringIO, errno, getpass, popen2, re, shutil, sys, tempfile, strutil
17 17 import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
18 18
19 19 try:
20 20 set = set
21 21 frozenset = frozenset
22 22 except NameError:
23 23 from sets import Set as set, ImmutableSet as frozenset
24 24
25 25 try:
26 26 _encoding = os.environ.get("HGENCODING")
27 27 if sys.platform == 'darwin' and not _encoding:
28 28 # On darwin, getpreferredencoding ignores the locale environment and
29 29 # always returns mac-roman. We override this if the environment is
30 30 # not C (has been customized by the user).
31 31 locale.setlocale(locale.LC_CTYPE, '')
32 32 _encoding = locale.getlocale()[1]
33 33 if not _encoding:
34 34 _encoding = locale.getpreferredencoding() or 'ascii'
35 35 except locale.Error:
36 36 _encoding = 'ascii'
37 37 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
38 38 _fallbackencoding = 'ISO-8859-1'
39 39
40 40 def tolocal(s):
41 41 """
42 42 Convert a string from internal UTF-8 to local encoding
43 43
44 44 All internal strings should be UTF-8 but some repos before the
45 45 implementation of locale support may contain latin1 or possibly
46 46 other character sets. We attempt to decode everything strictly
47 47 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
48 48 replace unknown characters.
49 49 """
50 50 for e in ('UTF-8', _fallbackencoding):
51 51 try:
52 52 u = s.decode(e) # attempt strict decoding
53 53 return u.encode(_encoding, "replace")
54 54 except LookupError, k:
55 55 raise Abort(_("%s, please check your locale settings") % k)
56 56 except UnicodeDecodeError:
57 57 pass
58 58 u = s.decode("utf-8", "replace") # last ditch
59 59 return u.encode(_encoding, "replace")
60 60
61 61 def fromlocal(s):
62 62 """
63 63 Convert a string from the local character encoding to UTF-8
64 64
65 65 We attempt to decode strings using the encoding mode set by
66 66 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
67 67 characters will cause an error message. Other modes include
68 68 'replace', which replaces unknown characters with a special
69 69 Unicode character, and 'ignore', which drops the character.
70 70 """
71 71 try:
72 72 return s.decode(_encoding, _encodingmode).encode("utf-8")
73 73 except UnicodeDecodeError, inst:
74 74 sub = s[max(0, inst.start-10):inst.start+10]
75 75 raise Abort("decoding near '%s': %s!" % (sub, inst))
76 76 except LookupError, k:
77 77 raise Abort(_("%s, please check your locale settings") % k)
78 78
79 79 def locallen(s):
80 80 """Find the length in characters of a local string"""
81 81 return len(s.decode(_encoding, "replace"))
82 82
83 83 def localsub(s, a, b=None):
84 84 try:
85 85 u = s.decode(_encoding, _encodingmode)
86 86 if b is not None:
87 87 u = u[a:b]
88 88 else:
89 89 u = u[:a]
90 90 return u.encode(_encoding, _encodingmode)
91 91 except UnicodeDecodeError, inst:
92 92 sub = s[max(0, inst.start-10), inst.start+10]
93 93 raise Abort(_("decoding near '%s': %s!") % (sub, inst))
94 94
95 95 # used by parsedate
96 96 defaultdateformats = (
97 97 '%Y-%m-%d %H:%M:%S',
98 98 '%Y-%m-%d %I:%M:%S%p',
99 99 '%Y-%m-%d %H:%M',
100 100 '%Y-%m-%d %I:%M%p',
101 101 '%Y-%m-%d',
102 102 '%m-%d',
103 103 '%m/%d',
104 104 '%m/%d/%y',
105 105 '%m/%d/%Y',
106 106 '%a %b %d %H:%M:%S %Y',
107 107 '%a %b %d %I:%M:%S%p %Y',
108 108 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
109 109 '%b %d %H:%M:%S %Y',
110 110 '%b %d %I:%M:%S%p %Y',
111 111 '%b %d %H:%M:%S',
112 112 '%b %d %I:%M:%S%p',
113 113 '%b %d %H:%M',
114 114 '%b %d %I:%M%p',
115 115 '%b %d %Y',
116 116 '%b %d',
117 117 '%H:%M:%S',
118 118 '%I:%M:%SP',
119 119 '%H:%M',
120 120 '%I:%M%p',
121 121 )
122 122
123 123 extendeddateformats = defaultdateformats + (
124 124 "%Y",
125 125 "%Y-%m",
126 126 "%b",
127 127 "%b %Y",
128 128 )
129 129
130 130 class SignalInterrupt(Exception):
131 131 """Exception raised on SIGTERM and SIGHUP."""
132 132
133 133 # differences from SafeConfigParser:
134 134 # - case-sensitive keys
135 135 # - allows values that are not strings (this means that you may not
136 136 # be able to save the configuration to a file)
137 137 class configparser(ConfigParser.SafeConfigParser):
138 138 def optionxform(self, optionstr):
139 139 return optionstr
140 140
141 141 def set(self, section, option, value):
142 142 return ConfigParser.ConfigParser.set(self, section, option, value)
143 143
144 144 def _interpolate(self, section, option, rawval, vars):
145 145 if not isinstance(rawval, basestring):
146 146 return rawval
147 147 return ConfigParser.SafeConfigParser._interpolate(self, section,
148 148 option, rawval, vars)
149 149
150 150 def cachefunc(func):
151 151 '''cache the result of function calls'''
152 152 # XXX doesn't handle keywords args
153 153 cache = {}
154 154 if func.func_code.co_argcount == 1:
155 155 # we gain a small amount of time because
156 156 # we don't need to pack/unpack the list
157 157 def f(arg):
158 158 if arg not in cache:
159 159 cache[arg] = func(arg)
160 160 return cache[arg]
161 161 else:
162 162 def f(*args):
163 163 if args not in cache:
164 164 cache[args] = func(*args)
165 165 return cache[args]
166 166
167 167 return f
168 168
169 169 def pipefilter(s, cmd):
170 170 '''filter string S through command CMD, returning its output'''
171 171 (pin, pout) = os.popen2(cmd, 'b')
172 172 def writer():
173 173 try:
174 174 pin.write(s)
175 175 pin.close()
176 176 except IOError, inst:
177 177 if inst.errno != errno.EPIPE:
178 178 raise
179 179
180 180 # we should use select instead on UNIX, but this will work on most
181 181 # systems, including Windows
182 182 w = threading.Thread(target=writer)
183 183 w.start()
184 184 f = pout.read()
185 185 pout.close()
186 186 w.join()
187 187 return f
188 188
189 189 def tempfilter(s, cmd):
190 190 '''filter string S through a pair of temporary files with CMD.
191 191 CMD is used as a template to create the real command to be run,
192 192 with the strings INFILE and OUTFILE replaced by the real names of
193 193 the temporary files generated.'''
194 194 inname, outname = None, None
195 195 try:
196 196 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
197 197 fp = os.fdopen(infd, 'wb')
198 198 fp.write(s)
199 199 fp.close()
200 200 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
201 201 os.close(outfd)
202 202 cmd = cmd.replace('INFILE', inname)
203 203 cmd = cmd.replace('OUTFILE', outname)
204 204 code = os.system(cmd)
205 205 if sys.platform == 'OpenVMS' and code & 1:
206 206 code = 0
207 207 if code: raise Abort(_("command '%s' failed: %s") %
208 208 (cmd, explain_exit(code)))
209 209 return open(outname, 'rb').read()
210 210 finally:
211 211 try:
212 212 if inname: os.unlink(inname)
213 213 except: pass
214 214 try:
215 215 if outname: os.unlink(outname)
216 216 except: pass
217 217
218 218 filtertable = {
219 219 'tempfile:': tempfilter,
220 220 'pipe:': pipefilter,
221 221 }
222 222
223 223 def filter(s, cmd):
224 224 "filter a string through a command that transforms its input to its output"
225 225 for name, fn in filtertable.iteritems():
226 226 if cmd.startswith(name):
227 227 return fn(s, cmd[len(name):].lstrip())
228 228 return pipefilter(s, cmd)
229 229
230 230 def binary(s):
231 231 """return true if a string is binary data using diff's heuristic"""
232 232 if s and '\0' in s[:4096]:
233 233 return True
234 234 return False
235 235
236 236 def unique(g):
237 237 """return the uniq elements of iterable g"""
238 238 seen = {}
239 239 l = []
240 240 for f in g:
241 241 if f not in seen:
242 242 seen[f] = 1
243 243 l.append(f)
244 244 return l
245 245
246 246 class Abort(Exception):
247 247 """Raised if a command needs to print an error and exit."""
248 248
249 249 class UnexpectedOutput(Abort):
250 250 """Raised to print an error with part of output and exit."""
251 251
252 252 def always(fn): return True
253 253 def never(fn): return False
254 254
255 255 def expand_glob(pats):
256 256 '''On Windows, expand the implicit globs in a list of patterns'''
257 257 if os.name != 'nt':
258 258 return list(pats)
259 259 ret = []
260 260 for p in pats:
261 261 kind, name = patkind(p, None)
262 262 if kind is None:
263 263 globbed = glob.glob(name)
264 264 if globbed:
265 265 ret.extend(globbed)
266 266 continue
267 267 # if we couldn't expand the glob, just keep it around
268 268 ret.append(p)
269 269 return ret
270 270
271 271 def patkind(name, dflt_pat='glob'):
272 272 """Split a string into an optional pattern kind prefix and the
273 273 actual pattern."""
274 274 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
275 275 if name.startswith(prefix + ':'): return name.split(':', 1)
276 276 return dflt_pat, name
277 277
278 278 def globre(pat, head='^', tail='$'):
279 279 "convert a glob pattern into a regexp"
280 280 i, n = 0, len(pat)
281 281 res = ''
282 282 group = False
283 283 def peek(): return i < n and pat[i]
284 284 while i < n:
285 285 c = pat[i]
286 286 i = i+1
287 287 if c == '*':
288 288 if peek() == '*':
289 289 i += 1
290 290 res += '.*'
291 291 else:
292 292 res += '[^/]*'
293 293 elif c == '?':
294 294 res += '.'
295 295 elif c == '[':
296 296 j = i
297 297 if j < n and pat[j] in '!]':
298 298 j += 1
299 299 while j < n and pat[j] != ']':
300 300 j += 1
301 301 if j >= n:
302 302 res += '\\['
303 303 else:
304 304 stuff = pat[i:j].replace('\\','\\\\')
305 305 i = j + 1
306 306 if stuff[0] == '!':
307 307 stuff = '^' + stuff[1:]
308 308 elif stuff[0] == '^':
309 309 stuff = '\\' + stuff
310 310 res = '%s[%s]' % (res, stuff)
311 311 elif c == '{':
312 312 group = True
313 313 res += '(?:'
314 314 elif c == '}' and group:
315 315 res += ')'
316 316 group = False
317 317 elif c == ',' and group:
318 318 res += '|'
319 319 elif c == '\\':
320 320 p = peek()
321 321 if p:
322 322 i += 1
323 323 res += re.escape(p)
324 324 else:
325 325 res += re.escape(c)
326 326 else:
327 327 res += re.escape(c)
328 328 return head + res + tail
329 329
330 330 _globchars = {'[': 1, '{': 1, '*': 1, '?': 1}
331 331
332 332 def pathto(root, n1, n2):
333 333 '''return the relative path from one place to another.
334 334 root should use os.sep to separate directories
335 335 n1 should use os.sep to separate directories
336 336 n2 should use "/" to separate directories
337 337 returns an os.sep-separated path.
338 338
339 339 If n1 is a relative path, it's assumed it's
340 340 relative to root.
341 341 n2 should always be relative to root.
342 342 '''
343 343 if not n1: return localpath(n2)
344 344 if os.path.isabs(n1):
345 345 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
346 346 return os.path.join(root, localpath(n2))
347 347 n2 = '/'.join((pconvert(root), n2))
348 348 a, b = n1.split(os.sep), n2.split('/')
349 349 a.reverse()
350 350 b.reverse()
351 351 while a and b and a[-1] == b[-1]:
352 352 a.pop()
353 353 b.pop()
354 354 b.reverse()
355 355 return os.sep.join((['..'] * len(a)) + b)
356 356
357 357 def canonpath(root, cwd, myname):
358 358 """return the canonical path of myname, given cwd and root"""
359 359 if root == os.sep:
360 360 rootsep = os.sep
361 361 elif root.endswith(os.sep):
362 362 rootsep = root
363 363 else:
364 364 rootsep = root + os.sep
365 365 name = myname
366 366 if not os.path.isabs(name):
367 367 name = os.path.join(root, cwd, name)
368 368 name = os.path.normpath(name)
369 369 audit_path = path_auditor(root)
370 370 if name != rootsep and name.startswith(rootsep):
371 371 name = name[len(rootsep):]
372 372 audit_path(name)
373 373 return pconvert(name)
374 374 elif name == root:
375 375 return ''
376 376 else:
377 377 # Determine whether `name' is in the hierarchy at or beneath `root',
378 378 # by iterating name=dirname(name) until that causes no change (can't
379 379 # check name == '/', because that doesn't work on windows). For each
380 380 # `name', compare dev/inode numbers. If they match, the list `rel'
381 381 # holds the reversed list of components making up the relative file
382 382 # name we want.
383 383 root_st = os.stat(root)
384 384 rel = []
385 385 while True:
386 386 try:
387 387 name_st = os.stat(name)
388 388 except OSError:
389 389 break
390 390 if samestat(name_st, root_st):
391 391 if not rel:
392 392 # name was actually the same as root (maybe a symlink)
393 393 return ''
394 394 rel.reverse()
395 395 name = os.path.join(*rel)
396 396 audit_path(name)
397 397 return pconvert(name)
398 398 dirname, basename = os.path.split(name)
399 399 rel.append(basename)
400 400 if dirname == name:
401 401 break
402 402 name = dirname
403 403
404 404 raise Abort('%s not under root' % myname)
405 405
406 406 def matcher(canonroot, cwd='', names=[], inc=[], exc=[], src=None):
407 407 return _matcher(canonroot, cwd, names, inc, exc, 'glob', src)
408 408
409 409 def cmdmatcher(canonroot, cwd='', names=[], inc=[], exc=[], src=None,
410 410 globbed=False, default=None):
411 411 default = default or 'relpath'
412 412 if default == 'relpath' and not globbed:
413 413 names = expand_glob(names)
414 414 return _matcher(canonroot, cwd, names, inc, exc, default, src)
415 415
416 416 def _matcher(canonroot, cwd, names, inc, exc, dflt_pat, src):
417 417 """build a function to match a set of file patterns
418 418
419 419 arguments:
420 420 canonroot - the canonical root of the tree you're matching against
421 421 cwd - the current working directory, if relevant
422 422 names - patterns to find
423 423 inc - patterns to include
424 424 exc - patterns to exclude
425 425 dflt_pat - if a pattern in names has no explicit type, assume this one
426 426 src - where these patterns came from (e.g. .hgignore)
427 427
428 428 a pattern is one of:
429 429 'glob:<glob>' - a glob relative to cwd
430 430 're:<regexp>' - a regular expression
431 431 'path:<path>' - a path relative to canonroot
432 432 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
433 433 'relpath:<path>' - a path relative to cwd
434 434 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
435 435 '<something>' - one of the cases above, selected by the dflt_pat argument
436 436
437 437 returns:
438 438 a 3-tuple containing
439 439 - list of roots (places where one should start a recursive walk of the fs);
440 440 this often matches the explicit non-pattern names passed in, but also
441 441 includes the initial part of glob: patterns that has no glob characters
442 442 - a bool match(filename) function
443 443 - a bool indicating if any patterns were passed in
444 444 """
445 445
446 446 # a common case: no patterns at all
447 447 if not names and not inc and not exc:
448 448 return [], always, False
449 449
450 450 def contains_glob(name):
451 451 for c in name:
452 452 if c in _globchars: return True
453 453 return False
454 454
455 455 def regex(kind, name, tail):
456 456 '''convert a pattern into a regular expression'''
457 457 if not name:
458 458 return ''
459 459 if kind == 're':
460 460 return name
461 461 elif kind == 'path':
462 462 return '^' + re.escape(name) + '(?:/|$)'
463 463 elif kind == 'relglob':
464 464 return globre(name, '(?:|.*/)', tail)
465 465 elif kind == 'relpath':
466 466 return re.escape(name) + '(?:/|$)'
467 467 elif kind == 'relre':
468 468 if name.startswith('^'):
469 469 return name
470 470 return '.*' + name
471 471 return globre(name, '', tail)
472 472
473 473 def matchfn(pats, tail):
474 474 """build a matching function from a set of patterns"""
475 475 if not pats:
476 476 return
477 477 try:
478 478 pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
479 479 return re.compile(pat).match
480 480 except OverflowError:
481 481 # We're using a Python with a tiny regex engine and we
482 482 # made it explode, so we'll divide the pattern list in two
483 483 # until it works
484 484 l = len(pats)
485 485 if l < 2:
486 486 raise
487 487 a, b = matchfn(pats[:l/2], tail), matchfn(pats[l/2:], tail)
488 488 return lambda s: a(s) or b(s)
489 489 except re.error:
490 490 for k, p in pats:
491 491 try:
492 492 re.compile('(?:%s)' % regex(k, p, tail))
493 493 except re.error:
494 494 if src:
495 495 raise Abort("%s: invalid pattern (%s): %s" %
496 496 (src, k, p))
497 497 else:
498 498 raise Abort("invalid pattern (%s): %s" % (k, p))
499 499 raise Abort("invalid pattern")
500 500
501 501 def globprefix(pat):
502 502 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
503 503 root = []
504 504 for p in pat.split('/'):
505 505 if contains_glob(p): break
506 506 root.append(p)
507 507 return '/'.join(root) or '.'
508 508
509 509 def normalizepats(names, default):
510 510 pats = []
511 511 roots = []
512 512 anypats = False
513 513 for kind, name in [patkind(p, default) for p in names]:
514 514 if kind in ('glob', 'relpath'):
515 515 name = canonpath(canonroot, cwd, name)
516 516 elif kind in ('relglob', 'path'):
517 517 name = normpath(name)
518 518
519 519 pats.append((kind, name))
520 520
521 521 if kind in ('glob', 're', 'relglob', 'relre'):
522 522 anypats = True
523 523
524 524 if kind == 'glob':
525 525 root = globprefix(name)
526 526 roots.append(root)
527 527 elif kind in ('relpath', 'path'):
528 528 roots.append(name or '.')
529 529 elif kind == 'relglob':
530 530 roots.append('.')
531 531 return roots, pats, anypats
532 532
533 533 roots, pats, anypats = normalizepats(names, dflt_pat)
534 534
535 535 patmatch = matchfn(pats, '$') or always
536 536 incmatch = always
537 537 if inc:
538 538 dummy, inckinds, dummy = normalizepats(inc, 'glob')
539 539 incmatch = matchfn(inckinds, '(?:/|$)')
540 540 excmatch = lambda fn: False
541 541 if exc:
542 542 dummy, exckinds, dummy = normalizepats(exc, 'glob')
543 543 excmatch = matchfn(exckinds, '(?:/|$)')
544 544
545 545 if not names and inc and not exc:
546 546 # common case: hgignore patterns
547 547 match = incmatch
548 548 else:
549 549 match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
550 550
551 551 return (roots, match, (inc or exc or anypats) and True)
552 552
553 553 _hgexecutable = None
554 554
555 555 def hgexecutable():
556 556 """return location of the 'hg' executable.
557 557
558 558 Defaults to $HG or 'hg' in the search path.
559 559 """
560 560 if _hgexecutable is None:
561 561 set_hgexecutable(os.environ.get('HG') or find_exe('hg', 'hg'))
562 562 return _hgexecutable
563 563
564 564 def set_hgexecutable(path):
565 565 """set location of the 'hg' executable"""
566 566 global _hgexecutable
567 567 _hgexecutable = path
568 568
569 569 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None):
570 570 '''enhanced shell command execution.
571 571 run with environment maybe modified, maybe in different dir.
572 572
573 573 if command fails and onerr is None, return status. if ui object,
574 574 print error message and return status, else raise onerr object as
575 575 exception.'''
576 576 def py2shell(val):
577 577 'convert python object into string that is useful to shell'
578 578 if val in (None, False):
579 579 return '0'
580 580 if val == True:
581 581 return '1'
582 582 return str(val)
583 583 oldenv = {}
584 584 for k in environ:
585 585 oldenv[k] = os.environ.get(k)
586 586 if cwd is not None:
587 587 oldcwd = os.getcwd()
588 588 origcmd = cmd
589 589 if os.name == 'nt':
590 590 cmd = '"%s"' % cmd
591 591 try:
592 592 for k, v in environ.iteritems():
593 593 os.environ[k] = py2shell(v)
594 594 os.environ['HG'] = hgexecutable()
595 595 if cwd is not None and oldcwd != cwd:
596 596 os.chdir(cwd)
597 597 rc = os.system(cmd)
598 598 if sys.platform == 'OpenVMS' and rc & 1:
599 599 rc = 0
600 600 if rc and onerr:
601 601 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
602 602 explain_exit(rc)[0])
603 603 if errprefix:
604 604 errmsg = '%s: %s' % (errprefix, errmsg)
605 605 try:
606 606 onerr.warn(errmsg + '\n')
607 607 except AttributeError:
608 608 raise onerr(errmsg)
609 609 return rc
610 610 finally:
611 611 for k, v in oldenv.iteritems():
612 612 if v is None:
613 613 del os.environ[k]
614 614 else:
615 615 os.environ[k] = v
616 616 if cwd is not None and oldcwd != cwd:
617 617 os.chdir(oldcwd)
618 618
619 619 # os.path.lexists is not available on python2.3
620 620 def lexists(filename):
621 621 "test whether a file with this name exists. does not follow symlinks"
622 622 try:
623 623 os.lstat(filename)
624 624 except:
625 625 return False
626 626 return True
627 627
628 628 def rename(src, dst):
629 629 """forcibly rename a file"""
630 630 try:
631 631 os.rename(src, dst)
632 632 except OSError, err: # FIXME: check err (EEXIST ?)
633 633 # on windows, rename to existing file is not allowed, so we
634 634 # must delete destination first. but if file is open, unlink
635 635 # schedules it for delete but does not delete it. rename
636 636 # happens immediately even for open files, so we create
637 637 # temporary file, delete it, rename destination to that name,
638 638 # then delete that. then rename is safe to do.
639 639 fd, temp = tempfile.mkstemp(dir=os.path.dirname(dst) or '.')
640 640 os.close(fd)
641 641 os.unlink(temp)
642 642 os.rename(dst, temp)
643 643 os.unlink(temp)
644 644 os.rename(src, dst)
645 645
646 646 def unlink(f):
647 647 """unlink and remove the directory if it is empty"""
648 648 os.unlink(f)
649 649 # try removing directories that might now be empty
650 650 try:
651 651 os.removedirs(os.path.dirname(f))
652 652 except OSError:
653 653 pass
654 654
655 655 def copyfile(src, dest):
656 656 "copy a file, preserving mode"
657 657 if os.path.islink(src):
658 658 try:
659 659 os.unlink(dest)
660 660 except:
661 661 pass
662 662 os.symlink(os.readlink(src), dest)
663 663 else:
664 664 try:
665 665 shutil.copyfile(src, dest)
666 666 shutil.copymode(src, dest)
667 667 except shutil.Error, inst:
668 668 raise Abort(str(inst))
669 669
670 670 def copyfiles(src, dst, hardlink=None):
671 671 """Copy a directory tree using hardlinks if possible"""
672 672
673 673 if hardlink is None:
674 674 hardlink = (os.stat(src).st_dev ==
675 675 os.stat(os.path.dirname(dst)).st_dev)
676 676
677 677 if os.path.isdir(src):
678 678 os.mkdir(dst)
679 679 for name, kind in osutil.listdir(src):
680 680 srcname = os.path.join(src, name)
681 681 dstname = os.path.join(dst, name)
682 682 copyfiles(srcname, dstname, hardlink)
683 683 else:
684 684 if hardlink:
685 685 try:
686 686 os_link(src, dst)
687 687 except (IOError, OSError):
688 688 hardlink = False
689 689 shutil.copy(src, dst)
690 690 else:
691 691 shutil.copy(src, dst)
692 692
693 693 class path_auditor(object):
694 694 '''ensure that a filesystem path contains no banned components.
695 695 the following properties of a path are checked:
696 696
697 697 - under top-level .hg
698 698 - starts at the root of a windows drive
699 699 - contains ".."
700 700 - traverses a symlink (e.g. a/symlink_here/b)
701 701 - inside a nested repository'''
702 702
703 703 def __init__(self, root):
704 704 self.audited = set()
705 705 self.auditeddir = set()
706 706 self.root = root
707 707
708 708 def __call__(self, path):
709 709 if path in self.audited:
710 710 return
711 711 normpath = os.path.normcase(path)
712 712 parts = normpath.split(os.sep)
713 713 if (os.path.splitdrive(path)[0] or parts[0] in ('.hg', '')
714 714 or os.pardir in parts):
715 715 raise Abort(_("path contains illegal component: %s") % path)
716 716 def check(prefix):
717 717 curpath = os.path.join(self.root, prefix)
718 718 try:
719 719 st = os.lstat(curpath)
720 720 except OSError, err:
721 721 # EINVAL can be raised as invalid path syntax under win32.
722 722 # They must be ignored for patterns can be checked too.
723 723 if err.errno not in (errno.ENOENT, errno.EINVAL):
724 724 raise
725 725 else:
726 726 if stat.S_ISLNK(st.st_mode):
727 727 raise Abort(_('path %r traverses symbolic link %r') %
728 728 (path, prefix))
729 729 elif (stat.S_ISDIR(st.st_mode) and
730 730 os.path.isdir(os.path.join(curpath, '.hg'))):
731 731 raise Abort(_('path %r is inside repo %r') %
732 732 (path, prefix))
733 733
734 734 prefixes = []
735 735 for c in strutil.rfindall(normpath, os.sep):
736 736 prefix = normpath[:c]
737 737 if prefix in self.auditeddir:
738 738 break
739 739 check(prefix)
740 740 prefixes.append(prefix)
741 741
742 742 self.audited.add(path)
743 743 # only add prefixes to the cache after checking everything: we don't
744 744 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
745 745 self.auditeddir.update(prefixes)
746 746
747 747 def _makelock_file(info, pathname):
748 748 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
749 749 os.write(ld, info)
750 750 os.close(ld)
751 751
752 752 def _readlock_file(pathname):
753 753 return posixfile(pathname).read()
754 754
755 755 def nlinks(pathname):
756 756 """Return number of hardlinks for the given file."""
757 757 return os.lstat(pathname).st_nlink
758 758
759 759 if hasattr(os, 'link'):
760 760 os_link = os.link
761 761 else:
762 762 def os_link(src, dst):
763 763 raise OSError(0, _("Hardlinks not supported"))
764 764
765 765 def fstat(fp):
766 766 '''stat file object that may not have fileno method.'''
767 767 try:
768 768 return os.fstat(fp.fileno())
769 769 except AttributeError:
770 770 return os.stat(fp.name)
771 771
772 772 posixfile = file
773 773
774 774 def is_win_9x():
775 775 '''return true if run on windows 95, 98 or me.'''
776 776 try:
777 777 return sys.getwindowsversion()[3] == 1
778 778 except AttributeError:
779 779 return os.name == 'nt' and 'command' in os.environ.get('comspec', '')
780 780
781 781 getuser_fallback = None
782 782
783 783 def getuser():
784 784 '''return name of current user'''
785 785 try:
786 786 return getpass.getuser()
787 787 except ImportError:
788 788 # import of pwd will fail on windows - try fallback
789 789 if getuser_fallback:
790 790 return getuser_fallback()
791 791 # raised if win32api not available
792 792 raise Abort(_('user name not available - set USERNAME '
793 793 'environment variable'))
794 794
795 795 def username(uid=None):
796 796 """Return the name of the user with the given uid.
797 797
798 798 If uid is None, return the name of the current user."""
799 799 try:
800 800 import pwd
801 801 if uid is None:
802 802 uid = os.getuid()
803 803 try:
804 804 return pwd.getpwuid(uid)[0]
805 805 except KeyError:
806 806 return str(uid)
807 807 except ImportError:
808 808 return None
809 809
810 810 def groupname(gid=None):
811 811 """Return the name of the group with the given gid.
812 812
813 813 If gid is None, return the name of the current group."""
814 814 try:
815 815 import grp
816 816 if gid is None:
817 817 gid = os.getgid()
818 818 try:
819 819 return grp.getgrgid(gid)[0]
820 820 except KeyError:
821 821 return str(gid)
822 822 except ImportError:
823 823 return None
824 824
825 825 # File system features
826 826
827 827 def checkfolding(path):
828 828 """
829 829 Check whether the given path is on a case-sensitive filesystem
830 830
831 831 Requires a path (like /foo/.hg) ending with a foldable final
832 832 directory component.
833 833 """
834 834 s1 = os.stat(path)
835 835 d, b = os.path.split(path)
836 836 p2 = os.path.join(d, b.upper())
837 837 if path == p2:
838 838 p2 = os.path.join(d, b.lower())
839 839 try:
840 840 s2 = os.stat(p2)
841 841 if s2 == s1:
842 842 return False
843 843 return True
844 844 except:
845 845 return True
846 846
847 847 def checkexec(path):
848 848 """
849 849 Check whether the given path is on a filesystem with UNIX-like exec flags
850 850
851 851 Requires a directory (like /foo/.hg)
852 852 """
853 853 try:
854 854 EXECFLAGS = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
855 855 fh, fn = tempfile.mkstemp("", "", path)
856 856 os.close(fh)
857 857 m = os.stat(fn).st_mode
858 858 # VFAT on Linux can flip mode but it doesn't persist a FS remount.
859 859 # frequently we can detect it if files are created with exec bit on.
860 860 new_file_has_exec = m & EXECFLAGS
861 861 os.chmod(fn, m ^ EXECFLAGS)
862 862 exec_flags_cannot_flip = (os.stat(fn).st_mode == m)
863 863 os.unlink(fn)
864 864 except (IOError,OSError):
865 865 # we don't care, the user probably won't be able to commit anyway
866 866 return False
867 867 return not (new_file_has_exec or exec_flags_cannot_flip)
868 868
869 869 def execfunc(path, fallback):
870 870 '''return an is_exec() function with default to fallback'''
871 871 if checkexec(path):
872 872 return lambda x: is_exec(os.path.join(path, x))
873 873 return fallback
874 874
875 875 def checklink(path):
876 876 """check whether the given path is on a symlink-capable filesystem"""
877 877 # mktemp is not racy because symlink creation will fail if the
878 878 # file already exists
879 879 name = tempfile.mktemp(dir=path)
880 880 try:
881 881 os.symlink(".", name)
882 882 os.unlink(name)
883 883 return True
884 884 except (OSError, AttributeError):
885 885 return False
886 886
887 887 def linkfunc(path, fallback):
888 888 '''return an is_link() function with default to fallback'''
889 889 if checklink(path):
890 890 return lambda x: os.path.islink(os.path.join(path, x))
891 891 return fallback
892 892
893 893 _umask = os.umask(0)
894 894 os.umask(_umask)
895 895
896 896 def needbinarypatch():
897 897 """return True if patches should be applied in binary mode by default."""
898 898 return os.name == 'nt'
899 899
900 900 # Platform specific variants
901 901 if os.name == 'nt':
902 902 import msvcrt
903 903 nulldev = 'NUL:'
904 904
905 905 class winstdout:
906 906 '''stdout on windows misbehaves if sent through a pipe'''
907 907
908 908 def __init__(self, fp):
909 909 self.fp = fp
910 910
911 911 def __getattr__(self, key):
912 912 return getattr(self.fp, key)
913 913
914 914 def close(self):
915 915 try:
916 916 self.fp.close()
917 917 except: pass
918 918
919 919 def write(self, s):
920 920 try:
921 921 return self.fp.write(s)
922 922 except IOError, inst:
923 923 if inst.errno != 0: raise
924 924 self.close()
925 925 raise IOError(errno.EPIPE, 'Broken pipe')
926 926
927 927 def flush(self):
928 928 try:
929 929 return self.fp.flush()
930 930 except IOError, inst:
931 931 if inst.errno != errno.EINVAL: raise
932 932 self.close()
933 933 raise IOError(errno.EPIPE, 'Broken pipe')
934 934
935 935 sys.stdout = winstdout(sys.stdout)
936 936
937 937 def system_rcpath():
938 938 try:
939 939 return system_rcpath_win32()
940 940 except:
941 941 return [r'c:\mercurial\mercurial.ini']
942 942
943 943 def user_rcpath():
944 944 '''return os-specific hgrc search path to the user dir'''
945 945 try:
946 946 userrc = user_rcpath_win32()
947 947 except:
948 948 userrc = os.path.join(os.path.expanduser('~'), 'mercurial.ini')
949 949 path = [userrc]
950 950 userprofile = os.environ.get('USERPROFILE')
951 951 if userprofile:
952 952 path.append(os.path.join(userprofile, 'mercurial.ini'))
953 953 return path
954 954
955 955 def parse_patch_output(output_line):
956 956 """parses the output produced by patch and returns the file name"""
957 957 pf = output_line[14:]
958 958 if pf[0] == '`':
959 959 pf = pf[1:-1] # Remove the quotes
960 960 return pf
961 961
962 962 def testpid(pid):
963 963 '''return False if pid dead, True if running or not known'''
964 964 return True
965 965
966 966 def set_exec(f, mode):
967 967 pass
968 968
969 969 def set_link(f, mode):
970 970 pass
971 971
972 972 def set_binary(fd):
973 973 msvcrt.setmode(fd.fileno(), os.O_BINARY)
974 974
975 975 def pconvert(path):
976 976 return path.replace("\\", "/")
977 977
978 978 def localpath(path):
979 979 return path.replace('/', '\\')
980 980
981 981 def normpath(path):
982 982 return pconvert(os.path.normpath(path))
983 983
984 984 makelock = _makelock_file
985 985 readlock = _readlock_file
986 986
987 987 def samestat(s1, s2):
988 988 return False
989 989
990 990 # A sequence of backslashes is special iff it precedes a double quote:
991 991 # - if there's an even number of backslashes, the double quote is not
992 992 # quoted (i.e. it ends the quoted region)
993 993 # - if there's an odd number of backslashes, the double quote is quoted
994 994 # - in both cases, every pair of backslashes is unquoted into a single
995 995 # backslash
996 996 # (See http://msdn2.microsoft.com/en-us/library/a1y7w461.aspx )
997 997 # So, to quote a string, we must surround it in double quotes, double
998 998 # the number of backslashes that preceed double quotes and add another
999 999 # backslash before every double quote (being careful with the double
1000 1000 # quote we've appended to the end)
1001 1001 _quotere = None
1002 1002 def shellquote(s):
1003 1003 global _quotere
1004 1004 if _quotere is None:
1005 1005 _quotere = re.compile(r'(\\*)("|\\$)')
1006 1006 return '"%s"' % _quotere.sub(r'\1\1\\\2', s)
1007 1007
1008 1008 def quotecommand(cmd):
1009 1009 """Build a command string suitable for os.popen* calls."""
1010 1010 # The extra quotes are needed because popen* runs the command
1011 1011 # through the current COMSPEC. cmd.exe suppress enclosing quotes.
1012 1012 return '"' + cmd + '"'
1013 1013
1014 1014 def explain_exit(code):
1015 1015 return _("exited with status %d") % code, code
1016 1016
1017 1017 # if you change this stub into a real check, please try to implement the
1018 1018 # username and groupname functions above, too.
1019 1019 def isowner(fp, st=None):
1020 1020 return True
1021 1021
1022 1022 def find_in_path(name, path, default=None):
1023 1023 '''find name in search path. path can be string (will be split
1024 1024 with os.pathsep), or iterable thing that returns strings. if name
1025 1025 found, return path to name. else return default. name is looked up
1026 1026 using cmd.exe rules, using PATHEXT.'''
1027 1027 if isinstance(path, str):
1028 1028 path = path.split(os.pathsep)
1029 1029
1030 1030 pathext = os.environ.get('PATHEXT', '.COM;.EXE;.BAT;.CMD')
1031 1031 pathext = pathext.lower().split(os.pathsep)
1032 1032 isexec = os.path.splitext(name)[1].lower() in pathext
1033 1033
1034 1034 for p in path:
1035 1035 p_name = os.path.join(p, name)
1036 1036
1037 1037 if isexec and os.path.exists(p_name):
1038 1038 return p_name
1039 1039
1040 1040 for ext in pathext:
1041 1041 p_name_ext = p_name + ext
1042 1042 if os.path.exists(p_name_ext):
1043 1043 return p_name_ext
1044 1044 return default
1045 1045
1046 1046 def set_signal_handler():
1047 1047 try:
1048 1048 set_signal_handler_win32()
1049 1049 except NameError:
1050 1050 pass
1051 1051
1052 1052 try:
1053 1053 # override functions with win32 versions if possible
1054 1054 from util_win32 import *
1055 1055 if not is_win_9x():
1056 1056 posixfile = posixfile_nt
1057 1057 except ImportError:
1058 1058 pass
1059 1059
1060 1060 else:
1061 1061 nulldev = '/dev/null'
1062 1062
1063 1063 def rcfiles(path):
1064 1064 rcs = [os.path.join(path, 'hgrc')]
1065 1065 rcdir = os.path.join(path, 'hgrc.d')
1066 1066 try:
1067 1067 rcs.extend([os.path.join(rcdir, f)
1068 1068 for f, kind in osutil.listdir(rcdir)
1069 1069 if f.endswith(".rc")])
1070 1070 except OSError:
1071 1071 pass
1072 1072 return rcs
1073 1073
1074 1074 def system_rcpath():
1075 1075 path = []
1076 1076 # old mod_python does not set sys.argv
1077 1077 if len(getattr(sys, 'argv', [])) > 0:
1078 1078 path.extend(rcfiles(os.path.dirname(sys.argv[0]) +
1079 1079 '/../etc/mercurial'))
1080 1080 path.extend(rcfiles('/etc/mercurial'))
1081 1081 return path
1082 1082
1083 1083 def user_rcpath():
1084 1084 return [os.path.expanduser('~/.hgrc')]
1085 1085
1086 1086 def parse_patch_output(output_line):
1087 1087 """parses the output produced by patch and returns the file name"""
1088 1088 pf = output_line[14:]
1089 1089 if os.sys.platform == 'OpenVMS':
1090 1090 if pf[0] == '`':
1091 1091 pf = pf[1:-1] # Remove the quotes
1092 1092 else:
1093 1093 if pf.startswith("'") and pf.endswith("'") and " " in pf:
1094 1094 pf = pf[1:-1] # Remove the quotes
1095 1095 return pf
1096 1096
1097 1097 def is_exec(f):
1098 1098 """check whether a file is executable"""
1099 1099 return (os.lstat(f).st_mode & 0100 != 0)
1100 1100
1101 1101 def set_exec(f, mode):
1102 1102 s = os.lstat(f).st_mode
1103 1103 if stat.S_ISLNK(s) or (s & 0100 != 0) == mode:
1104 1104 return
1105 1105 if mode:
1106 1106 # Turn on +x for every +r bit when making a file executable
1107 1107 # and obey umask.
1108 1108 os.chmod(f, s | (s & 0444) >> 2 & ~_umask)
1109 1109 else:
1110 1110 os.chmod(f, s & 0666)
1111 1111
1112 1112 def set_link(f, mode):
1113 1113 """make a file a symbolic link/regular file
1114 1114
1115 1115 if a file is changed to a link, its contents become the link data
1116 1116 if a link is changed to a file, its link data become its contents
1117 1117 """
1118 1118
1119 1119 m = os.path.islink(f)
1120 1120 if m == bool(mode):
1121 1121 return
1122 1122
1123 1123 if mode: # switch file to link
1124 1124 data = file(f).read()
1125 1125 os.unlink(f)
1126 1126 os.symlink(data, f)
1127 1127 else:
1128 1128 data = os.readlink(f)
1129 1129 os.unlink(f)
1130 1130 file(f, "w").write(data)
1131 1131
1132 1132 def set_binary(fd):
1133 1133 pass
1134 1134
1135 1135 def pconvert(path):
1136 1136 return path
1137 1137
1138 1138 def localpath(path):
1139 1139 return path
1140 1140
1141 1141 normpath = os.path.normpath
1142 1142 samestat = os.path.samestat
1143 1143
1144 1144 def makelock(info, pathname):
1145 1145 try:
1146 1146 os.symlink(info, pathname)
1147 1147 except OSError, why:
1148 1148 if why.errno == errno.EEXIST:
1149 1149 raise
1150 1150 else:
1151 1151 _makelock_file(info, pathname)
1152 1152
1153 1153 def readlock(pathname):
1154 1154 try:
1155 1155 return os.readlink(pathname)
1156 1156 except OSError, why:
1157 1157 if why.errno in (errno.EINVAL, errno.ENOSYS):
1158 1158 return _readlock_file(pathname)
1159 1159 else:
1160 1160 raise
1161 1161
1162 1162 def shellquote(s):
1163 1163 if os.sys.platform == 'OpenVMS':
1164 1164 return '"%s"' % s
1165 1165 else:
1166 1166 return "'%s'" % s.replace("'", "'\\''")
1167 1167
1168 1168 def quotecommand(cmd):
1169 1169 return cmd
1170 1170
1171 1171 def testpid(pid):
1172 1172 '''return False if pid dead, True if running or not sure'''
1173 1173 if os.sys.platform == 'OpenVMS':
1174 1174 return True
1175 1175 try:
1176 1176 os.kill(pid, 0)
1177 1177 return True
1178 1178 except OSError, inst:
1179 1179 return inst.errno != errno.ESRCH
1180 1180
1181 1181 def explain_exit(code):
1182 1182 """return a 2-tuple (desc, code) describing a process's status"""
1183 1183 if os.WIFEXITED(code):
1184 1184 val = os.WEXITSTATUS(code)
1185 1185 return _("exited with status %d") % val, val
1186 1186 elif os.WIFSIGNALED(code):
1187 1187 val = os.WTERMSIG(code)
1188 1188 return _("killed by signal %d") % val, val
1189 1189 elif os.WIFSTOPPED(code):
1190 1190 val = os.WSTOPSIG(code)
1191 1191 return _("stopped by signal %d") % val, val
1192 1192 raise ValueError(_("invalid exit code"))
1193 1193
1194 1194 def isowner(fp, st=None):
1195 1195 """Return True if the file object f belongs to the current user.
1196 1196
1197 1197 The return value of a util.fstat(f) may be passed as the st argument.
1198 1198 """
1199 1199 if st is None:
1200 1200 st = fstat(fp)
1201 1201 return st.st_uid == os.getuid()
1202 1202
1203 1203 def find_in_path(name, path, default=None):
1204 1204 '''find name in search path. path can be string (will be split
1205 1205 with os.pathsep), or iterable thing that returns strings. if name
1206 1206 found, return path to name. else return default.'''
1207 1207 if isinstance(path, str):
1208 1208 path = path.split(os.pathsep)
1209 1209 for p in path:
1210 1210 p_name = os.path.join(p, name)
1211 1211 if os.path.exists(p_name):
1212 1212 return p_name
1213 1213 return default
1214 1214
1215 1215 def set_signal_handler():
1216 1216 pass
1217 1217
1218 1218 def find_exe(name, default=None):
1219 1219 '''find path of an executable.
1220 1220 if name contains a path component, return it as is. otherwise,
1221 1221 use normal executable search path.'''
1222 1222
1223 1223 if os.sep in name or sys.platform == 'OpenVMS':
1224 1224 # don't check the executable bit. if the file isn't
1225 1225 # executable, whoever tries to actually run it will give a
1226 1226 # much more useful error message.
1227 1227 return name
1228 1228 return find_in_path(name, os.environ.get('PATH', ''), default=default)
1229 1229
1230 1230 def _buildencodefun():
1231 1231 e = '_'
1232 1232 win_reserved = [ord(x) for x in '\\:*?"<>|']
1233 1233 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
1234 1234 for x in (range(32) + range(126, 256) + win_reserved):
1235 1235 cmap[chr(x)] = "~%02x" % x
1236 1236 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
1237 1237 cmap[chr(x)] = e + chr(x).lower()
1238 1238 dmap = {}
1239 1239 for k, v in cmap.iteritems():
1240 1240 dmap[v] = k
1241 1241 def decode(s):
1242 1242 i = 0
1243 1243 while i < len(s):
1244 1244 for l in xrange(1, 4):
1245 1245 try:
1246 1246 yield dmap[s[i:i+l]]
1247 1247 i += l
1248 1248 break
1249 1249 except KeyError:
1250 1250 pass
1251 1251 else:
1252 1252 raise KeyError
1253 1253 return (lambda s: "".join([cmap[c] for c in s]),
1254 1254 lambda s: "".join(list(decode(s))))
1255 1255
1256 1256 encodefilename, decodefilename = _buildencodefun()
1257 1257
1258 1258 def encodedopener(openerfn, fn):
1259 1259 def o(path, *args, **kw):
1260 1260 return openerfn(fn(path), *args, **kw)
1261 1261 return o
1262 1262
1263 1263 def mktempcopy(name, emptyok=False):
1264 1264 """Create a temporary file with the same contents from name
1265 1265
1266 1266 The permission bits are copied from the original file.
1267 1267
1268 1268 If the temporary file is going to be truncated immediately, you
1269 1269 can use emptyok=True as an optimization.
1270 1270
1271 1271 Returns the name of the temporary file.
1272 1272 """
1273 1273 d, fn = os.path.split(name)
1274 1274 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1275 1275 os.close(fd)
1276 1276 # Temporary files are created with mode 0600, which is usually not
1277 1277 # what we want. If the original file already exists, just copy
1278 1278 # its mode. Otherwise, manually obey umask.
1279 1279 try:
1280 1280 st_mode = os.lstat(name).st_mode
1281 1281 except OSError, inst:
1282 1282 if inst.errno != errno.ENOENT:
1283 1283 raise
1284 1284 st_mode = 0666 & ~_umask
1285 1285 os.chmod(temp, st_mode)
1286 1286 if emptyok:
1287 1287 return temp
1288 1288 try:
1289 1289 try:
1290 1290 ifp = posixfile(name, "rb")
1291 1291 except IOError, inst:
1292 1292 if inst.errno == errno.ENOENT:
1293 1293 return temp
1294 1294 if not getattr(inst, 'filename', None):
1295 1295 inst.filename = name
1296 1296 raise
1297 1297 ofp = posixfile(temp, "wb")
1298 1298 for chunk in filechunkiter(ifp):
1299 1299 ofp.write(chunk)
1300 1300 ifp.close()
1301 1301 ofp.close()
1302 1302 except:
1303 1303 try: os.unlink(temp)
1304 1304 except: pass
1305 1305 raise
1306 1306 return temp
1307 1307
1308 1308 class atomictempfile(posixfile):
1309 1309 """file-like object that atomically updates a file
1310 1310
1311 1311 All writes will be redirected to a temporary copy of the original
1312 1312 file. When rename is called, the copy is renamed to the original
1313 1313 name, making the changes visible.
1314 1314 """
1315 1315 def __init__(self, name, mode):
1316 1316 self.__name = name
1317 1317 self.temp = mktempcopy(name, emptyok=('w' in mode))
1318 1318 posixfile.__init__(self, self.temp, mode)
1319 1319
1320 1320 def rename(self):
1321 1321 if not self.closed:
1322 1322 posixfile.close(self)
1323 1323 rename(self.temp, localpath(self.__name))
1324 1324
1325 1325 def __del__(self):
1326 1326 if not self.closed:
1327 1327 try:
1328 1328 os.unlink(self.temp)
1329 1329 except: pass
1330 1330 posixfile.close(self)
1331 1331
1332 1332 class opener(object):
1333 1333 """Open files relative to a base directory
1334 1334
1335 1335 This class is used to hide the details of COW semantics and
1336 1336 remote file access from higher level code.
1337 1337 """
1338 1338 def __init__(self, base, audit=True):
1339 1339 self.base = base
1340 1340 if audit:
1341 1341 self.audit_path = path_auditor(base)
1342 1342 else:
1343 1343 self.audit_path = always
1344 1344
1345 1345 def __getattr__(self, name):
1346 1346 if name == '_can_symlink':
1347 1347 self._can_symlink = checklink(self.base)
1348 1348 return self._can_symlink
1349 1349 raise AttributeError(name)
1350 1350
1351 1351 def __call__(self, path, mode="r", text=False, atomictemp=False):
1352 1352 self.audit_path(path)
1353 1353 f = os.path.join(self.base, path)
1354 1354
1355 1355 if not text and "b" not in mode:
1356 1356 mode += "b" # for that other OS
1357 1357
1358 1358 if mode[0] != "r":
1359 1359 try:
1360 1360 nlink = nlinks(f)
1361 1361 except OSError:
1362 1362 nlink = 0
1363 1363 d = os.path.dirname(f)
1364 1364 if not os.path.isdir(d):
1365 1365 os.makedirs(d)
1366 1366 if atomictemp:
1367 1367 return atomictempfile(f, mode)
1368 1368 if nlink > 1:
1369 1369 rename(mktempcopy(f), f)
1370 1370 return posixfile(f, mode)
1371 1371
1372 1372 def symlink(self, src, dst):
1373 1373 self.audit_path(dst)
1374 1374 linkname = os.path.join(self.base, dst)
1375 1375 try:
1376 1376 os.unlink(linkname)
1377 1377 except OSError:
1378 1378 pass
1379 1379
1380 1380 dirname = os.path.dirname(linkname)
1381 1381 if not os.path.exists(dirname):
1382 1382 os.makedirs(dirname)
1383 1383
1384 1384 if self._can_symlink:
1385 1385 try:
1386 1386 os.symlink(src, linkname)
1387 1387 except OSError, err:
1388 1388 raise OSError(err.errno, _('could not symlink to %r: %s') %
1389 1389 (src, err.strerror), linkname)
1390 1390 else:
1391 1391 f = self(dst, "w")
1392 1392 f.write(src)
1393 1393 f.close()
1394 1394
1395 1395 class chunkbuffer(object):
1396 1396 """Allow arbitrary sized chunks of data to be efficiently read from an
1397 1397 iterator over chunks of arbitrary size."""
1398 1398
1399 1399 def __init__(self, in_iter):
1400 1400 """in_iter is the iterator that's iterating over the input chunks.
1401 1401 targetsize is how big a buffer to try to maintain."""
1402 1402 self.iter = iter(in_iter)
1403 1403 self.buf = ''
1404 1404 self.targetsize = 2**16
1405 1405
1406 1406 def read(self, l):
1407 1407 """Read L bytes of data from the iterator of chunks of data.
1408 1408 Returns less than L bytes if the iterator runs dry."""
1409 1409 if l > len(self.buf) and self.iter:
1410 1410 # Clamp to a multiple of self.targetsize
1411 1411 targetsize = max(l, self.targetsize)
1412 1412 collector = cStringIO.StringIO()
1413 1413 collector.write(self.buf)
1414 1414 collected = len(self.buf)
1415 1415 for chunk in self.iter:
1416 1416 collector.write(chunk)
1417 1417 collected += len(chunk)
1418 1418 if collected >= targetsize:
1419 1419 break
1420 1420 if collected < targetsize:
1421 1421 self.iter = False
1422 1422 self.buf = collector.getvalue()
1423 1423 if len(self.buf) == l:
1424 s, self.buf = self.buf, ''
1424 s, self.buf = str(self.buf), ''
1425 1425 else:
1426 1426 s, self.buf = self.buf[:l], buffer(self.buf, l)
1427 1427 return s
1428 1428
1429 1429 def filechunkiter(f, size=65536, limit=None):
1430 1430 """Create a generator that produces the data in the file size
1431 1431 (default 65536) bytes at a time, up to optional limit (default is
1432 1432 to read all data). Chunks may be less than size bytes if the
1433 1433 chunk is the last chunk in the file, or the file is a socket or
1434 1434 some other type of file that sometimes reads less data than is
1435 1435 requested."""
1436 1436 assert size >= 0
1437 1437 assert limit is None or limit >= 0
1438 1438 while True:
1439 1439 if limit is None: nbytes = size
1440 1440 else: nbytes = min(limit, size)
1441 1441 s = nbytes and f.read(nbytes)
1442 1442 if not s: break
1443 1443 if limit: limit -= len(s)
1444 1444 yield s
1445 1445
1446 1446 def makedate():
1447 1447 lt = time.localtime()
1448 1448 if lt[8] == 1 and time.daylight:
1449 1449 tz = time.altzone
1450 1450 else:
1451 1451 tz = time.timezone
1452 1452 return time.mktime(lt), tz
1453 1453
1454 1454 def datestr(date=None, format='%a %b %d %H:%M:%S %Y', timezone=True, timezone_format=" %+03d%02d"):
1455 1455 """represent a (unixtime, offset) tuple as a localized time.
1456 1456 unixtime is seconds since the epoch, and offset is the time zone's
1457 1457 number of seconds away from UTC. if timezone is false, do not
1458 1458 append time zone to string."""
1459 1459 t, tz = date or makedate()
1460 1460 s = time.strftime(format, time.gmtime(float(t) - tz))
1461 1461 if timezone:
1462 1462 s += timezone_format % (-tz / 3600, ((-tz % 3600) / 60))
1463 1463 return s
1464 1464
1465 1465 def strdate(string, format, defaults=[]):
1466 1466 """parse a localized time string and return a (unixtime, offset) tuple.
1467 1467 if the string cannot be parsed, ValueError is raised."""
1468 1468 def timezone(string):
1469 1469 tz = string.split()[-1]
1470 1470 if tz[0] in "+-" and len(tz) == 5 and tz[1:].isdigit():
1471 1471 tz = int(tz)
1472 1472 offset = - 3600 * (tz / 100) - 60 * (tz % 100)
1473 1473 return offset
1474 1474 if tz == "GMT" or tz == "UTC":
1475 1475 return 0
1476 1476 return None
1477 1477
1478 1478 # NOTE: unixtime = localunixtime + offset
1479 1479 offset, date = timezone(string), string
1480 1480 if offset != None:
1481 1481 date = " ".join(string.split()[:-1])
1482 1482
1483 1483 # add missing elements from defaults
1484 1484 for part in defaults:
1485 1485 found = [True for p in part if ("%"+p) in format]
1486 1486 if not found:
1487 1487 date += "@" + defaults[part]
1488 1488 format += "@%" + part[0]
1489 1489
1490 1490 timetuple = time.strptime(date, format)
1491 1491 localunixtime = int(calendar.timegm(timetuple))
1492 1492 if offset is None:
1493 1493 # local timezone
1494 1494 unixtime = int(time.mktime(timetuple))
1495 1495 offset = unixtime - localunixtime
1496 1496 else:
1497 1497 unixtime = localunixtime + offset
1498 1498 return unixtime, offset
1499 1499
1500 1500 def parsedate(string, formats=None, defaults=None):
1501 1501 """parse a localized time string and return a (unixtime, offset) tuple.
1502 1502 The date may be a "unixtime offset" string or in one of the specified
1503 1503 formats."""
1504 1504 if not string:
1505 1505 return 0, 0
1506 1506 if not formats:
1507 1507 formats = defaultdateformats
1508 1508 string = string.strip()
1509 1509 try:
1510 1510 when, offset = map(int, string.split(' '))
1511 1511 except ValueError:
1512 1512 # fill out defaults
1513 1513 if not defaults:
1514 1514 defaults = {}
1515 1515 now = makedate()
1516 1516 for part in "d mb yY HI M S".split():
1517 1517 if part not in defaults:
1518 1518 if part[0] in "HMS":
1519 1519 defaults[part] = "00"
1520 1520 elif part[0] in "dm":
1521 1521 defaults[part] = "1"
1522 1522 else:
1523 1523 defaults[part] = datestr(now, "%" + part[0], False)
1524 1524
1525 1525 for format in formats:
1526 1526 try:
1527 1527 when, offset = strdate(string, format, defaults)
1528 1528 except ValueError:
1529 1529 pass
1530 1530 else:
1531 1531 break
1532 1532 else:
1533 1533 raise Abort(_('invalid date: %r ') % string)
1534 1534 # validate explicit (probably user-specified) date and
1535 1535 # time zone offset. values must fit in signed 32 bits for
1536 1536 # current 32-bit linux runtimes. timezones go from UTC-12
1537 1537 # to UTC+14
1538 1538 if abs(when) > 0x7fffffff:
1539 1539 raise Abort(_('date exceeds 32 bits: %d') % when)
1540 1540 if offset < -50400 or offset > 43200:
1541 1541 raise Abort(_('impossible time zone offset: %d') % offset)
1542 1542 return when, offset
1543 1543
1544 1544 def matchdate(date):
1545 1545 """Return a function that matches a given date match specifier
1546 1546
1547 1547 Formats include:
1548 1548
1549 1549 '{date}' match a given date to the accuracy provided
1550 1550
1551 1551 '<{date}' on or before a given date
1552 1552
1553 1553 '>{date}' on or after a given date
1554 1554
1555 1555 """
1556 1556
1557 1557 def lower(date):
1558 1558 return parsedate(date, extendeddateformats)[0]
1559 1559
1560 1560 def upper(date):
1561 1561 d = dict(mb="12", HI="23", M="59", S="59")
1562 1562 for days in "31 30 29".split():
1563 1563 try:
1564 1564 d["d"] = days
1565 1565 return parsedate(date, extendeddateformats, d)[0]
1566 1566 except:
1567 1567 pass
1568 1568 d["d"] = "28"
1569 1569 return parsedate(date, extendeddateformats, d)[0]
1570 1570
1571 1571 if date[0] == "<":
1572 1572 when = upper(date[1:])
1573 1573 return lambda x: x <= when
1574 1574 elif date[0] == ">":
1575 1575 when = lower(date[1:])
1576 1576 return lambda x: x >= when
1577 1577 elif date[0] == "-":
1578 1578 try:
1579 1579 days = int(date[1:])
1580 1580 except ValueError:
1581 1581 raise Abort(_("invalid day spec: %s") % date[1:])
1582 1582 when = makedate()[0] - days * 3600 * 24
1583 1583 return lambda x: x >= when
1584 1584 elif " to " in date:
1585 1585 a, b = date.split(" to ")
1586 1586 start, stop = lower(a), upper(b)
1587 1587 return lambda x: x >= start and x <= stop
1588 1588 else:
1589 1589 start, stop = lower(date), upper(date)
1590 1590 return lambda x: x >= start and x <= stop
1591 1591
1592 1592 def shortuser(user):
1593 1593 """Return a short representation of a user name or email address."""
1594 1594 f = user.find('@')
1595 1595 if f >= 0:
1596 1596 user = user[:f]
1597 1597 f = user.find('<')
1598 1598 if f >= 0:
1599 1599 user = user[f+1:]
1600 1600 f = user.find(' ')
1601 1601 if f >= 0:
1602 1602 user = user[:f]
1603 1603 f = user.find('.')
1604 1604 if f >= 0:
1605 1605 user = user[:f]
1606 1606 return user
1607 1607
1608 1608 def ellipsis(text, maxlength=400):
1609 1609 """Trim string to at most maxlength (default: 400) characters."""
1610 1610 if len(text) <= maxlength:
1611 1611 return text
1612 1612 else:
1613 1613 return "%s..." % (text[:maxlength-3])
1614 1614
1615 1615 def walkrepos(path):
1616 1616 '''yield every hg repository under path, recursively.'''
1617 1617 def errhandler(err):
1618 1618 if err.filename == path:
1619 1619 raise err
1620 1620
1621 1621 for root, dirs, files in os.walk(path, onerror=errhandler):
1622 1622 for d in dirs:
1623 1623 if d == '.hg':
1624 1624 yield root
1625 1625 dirs[:] = []
1626 1626 break
1627 1627
1628 1628 _rcpath = None
1629 1629
1630 1630 def os_rcpath():
1631 1631 '''return default os-specific hgrc search path'''
1632 1632 path = system_rcpath()
1633 1633 path.extend(user_rcpath())
1634 1634 path = [os.path.normpath(f) for f in path]
1635 1635 return path
1636 1636
1637 1637 def rcpath():
1638 1638 '''return hgrc search path. if env var HGRCPATH is set, use it.
1639 1639 for each item in path, if directory, use files ending in .rc,
1640 1640 else use item.
1641 1641 make HGRCPATH empty to only look in .hg/hgrc of current repo.
1642 1642 if no HGRCPATH, use default os-specific path.'''
1643 1643 global _rcpath
1644 1644 if _rcpath is None:
1645 1645 if 'HGRCPATH' in os.environ:
1646 1646 _rcpath = []
1647 1647 for p in os.environ['HGRCPATH'].split(os.pathsep):
1648 1648 if not p: continue
1649 1649 if os.path.isdir(p):
1650 1650 for f, kind in osutil.listdir(p):
1651 1651 if f.endswith('.rc'):
1652 1652 _rcpath.append(os.path.join(p, f))
1653 1653 else:
1654 1654 _rcpath.append(p)
1655 1655 else:
1656 1656 _rcpath = os_rcpath()
1657 1657 return _rcpath
1658 1658
1659 1659 def bytecount(nbytes):
1660 1660 '''return byte count formatted as readable string, with units'''
1661 1661
1662 1662 units = (
1663 1663 (100, 1<<30, _('%.0f GB')),
1664 1664 (10, 1<<30, _('%.1f GB')),
1665 1665 (1, 1<<30, _('%.2f GB')),
1666 1666 (100, 1<<20, _('%.0f MB')),
1667 1667 (10, 1<<20, _('%.1f MB')),
1668 1668 (1, 1<<20, _('%.2f MB')),
1669 1669 (100, 1<<10, _('%.0f KB')),
1670 1670 (10, 1<<10, _('%.1f KB')),
1671 1671 (1, 1<<10, _('%.2f KB')),
1672 1672 (1, 1, _('%.0f bytes')),
1673 1673 )
1674 1674
1675 1675 for multiplier, divisor, format in units:
1676 1676 if nbytes >= divisor * multiplier:
1677 1677 return format % (nbytes / float(divisor))
1678 1678 return units[-1][2] % nbytes
1679 1679
1680 1680 def drop_scheme(scheme, path):
1681 1681 sc = scheme + ':'
1682 1682 if path.startswith(sc):
1683 1683 path = path[len(sc):]
1684 1684 if path.startswith('//'):
1685 1685 path = path[2:]
1686 1686 return path
1687 1687
1688 1688 def uirepr(s):
1689 1689 # Avoid double backslash in Windows path repr()
1690 1690 return repr(s).replace('\\\\', '\\')
General Comments 0
You need to be logged in to leave comments. Login now