##// END OF EJS Templates
windows: revlog.lazyparser not always safe to use....
Vadim Gelfer -
r2250:45aef5dd default
parent child Browse files
Show More
@@ -1,1246 +1,1254 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90
91 # lazyparser is not safe to use on windows if win32 extensions not
92 # available. it keeps file handle open, which make it not possible
93 # to break hardlinks on local cloned repos.
94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 hasattr(util, 'win32api'))
96
90 97 def __init__(self, dataf, size, indexformat, shaoffset):
91 98 self.dataf = dataf
92 99 self.format = indexformat
93 100 self.s = struct.calcsize(indexformat)
94 101 self.indexformat = indexformat
95 102 self.datasize = size
96 103 self.l = size/self.s
97 104 self.index = [None] * self.l
98 105 self.map = {nullid: -1}
99 106 self.allmap = 0
100 107 self.all = 0
101 108 self.mapfind_count = 0
102 109 self.shaoffset = shaoffset
103 110
104 111 def loadmap(self):
105 112 """
106 113 during a commit, we need to make sure the rev being added is
107 114 not a duplicate. This requires loading the entire index,
108 115 which is fairly slow. loadmap can load up just the node map,
109 116 which takes much less time.
110 117 """
111 118 if self.allmap: return
112 119 start = 0
113 120 end = self.datasize
114 121 self.allmap = 1
115 122 cur = 0
116 123 count = 0
117 124 blocksize = self.s * 256
118 125 self.dataf.seek(0)
119 126 while cur < end:
120 127 data = self.dataf.read(blocksize)
121 128 off = 0
122 129 for x in xrange(256):
123 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
124 131 self.map[n] = count
125 132 count += 1
126 133 if count >= self.l:
127 134 break
128 135 off += self.s
129 136 cur += blocksize
130 137
131 138 def loadblock(self, blockstart, blocksize, data=None):
132 139 if self.all: return
133 140 if data is None:
134 141 self.dataf.seek(blockstart)
135 142 data = self.dataf.read(blocksize)
136 143 lend = len(data) / self.s
137 144 i = blockstart / self.s
138 145 off = 0
139 146 for x in xrange(lend):
140 147 if self.index[i + x] == None:
141 148 b = data[off : off + self.s]
142 149 self.index[i + x] = b
143 150 n = b[self.shaoffset:self.shaoffset + 20]
144 151 self.map[n] = i + x
145 152 off += self.s
146 153
147 154 def findnode(self, node):
148 155 """search backwards through the index file for a specific node"""
149 156 if self.allmap: return None
150 157
151 158 # hg log will cause many many searches for the manifest
152 159 # nodes. After we get called a few times, just load the whole
153 160 # thing.
154 161 if self.mapfind_count > 8:
155 162 self.loadmap()
156 163 if node in self.map:
157 164 return node
158 165 return None
159 166 self.mapfind_count += 1
160 167 last = self.l - 1
161 168 while self.index[last] != None:
162 169 if last == 0:
163 170 self.all = 1
164 171 self.allmap = 1
165 172 return None
166 173 last -= 1
167 174 end = (last + 1) * self.s
168 175 blocksize = self.s * 256
169 176 while end >= 0:
170 177 start = max(end - blocksize, 0)
171 178 self.dataf.seek(start)
172 179 data = self.dataf.read(end - start)
173 180 findend = end - start
174 181 while True:
175 182 # we're searching backwards, so weh have to make sure
176 183 # we don't find a changeset where this node is a parent
177 184 off = data.rfind(node, 0, findend)
178 185 findend = off
179 186 if off >= 0:
180 187 i = off / self.s
181 188 off = i * self.s
182 189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
183 190 if n == node:
184 191 self.map[n] = i + start / self.s
185 192 return node
186 193 else:
187 194 break
188 195 end -= blocksize
189 196 return None
190 197
191 198 def loadindex(self, i=None, end=None):
192 199 if self.all: return
193 200 all = False
194 201 if i == None:
195 202 blockstart = 0
196 203 blocksize = (512 / self.s) * self.s
197 204 end = self.datasize
198 205 all = True
199 206 else:
200 207 if end:
201 208 blockstart = i * self.s
202 209 end = end * self.s
203 210 blocksize = end - blockstart
204 211 else:
205 212 blockstart = (i & ~(32)) * self.s
206 213 blocksize = self.s * 64
207 214 end = blockstart + blocksize
208 215 while blockstart < end:
209 216 self.loadblock(blockstart, blocksize)
210 217 blockstart += blocksize
211 218 if all: self.all = True
212 219
213 220 class lazyindex(object):
214 221 """a lazy version of the index array"""
215 222 def __init__(self, parser):
216 223 self.p = parser
217 224 def __len__(self):
218 225 return len(self.p.index)
219 226 def load(self, pos):
220 227 if pos < 0:
221 228 pos += len(self.p.index)
222 229 self.p.loadindex(pos)
223 230 return self.p.index[pos]
224 231 def __getitem__(self, pos):
225 232 ret = self.p.index[pos] or self.load(pos)
226 233 if isinstance(ret, str):
227 234 ret = struct.unpack(self.p.indexformat, ret)
228 235 return ret
229 236 def __setitem__(self, pos, item):
230 237 self.p.index[pos] = item
231 238 def __delitem__(self, pos):
232 239 del self.p.index[pos]
233 240 def append(self, e):
234 241 self.p.index.append(e)
235 242
236 243 class lazymap(object):
237 244 """a lazy version of the node map"""
238 245 def __init__(self, parser):
239 246 self.p = parser
240 247 def load(self, key):
241 248 n = self.p.findnode(key)
242 249 if n == None:
243 250 raise KeyError(key)
244 251 def __contains__(self, key):
245 252 if key in self.p.map:
246 253 return True
247 254 self.p.loadmap()
248 255 return key in self.p.map
249 256 def __iter__(self):
250 257 yield nullid
251 258 for i in xrange(self.p.l):
252 259 ret = self.p.index[i]
253 260 if not ret:
254 261 self.p.loadindex(i)
255 262 ret = self.p.index[i]
256 263 if isinstance(ret, str):
257 264 ret = struct.unpack(self.p.indexformat, ret)
258 265 yield ret[-1]
259 266 def __getitem__(self, key):
260 267 try:
261 268 return self.p.map[key]
262 269 except KeyError:
263 270 try:
264 271 self.load(key)
265 272 return self.p.map[key]
266 273 except KeyError:
267 274 raise KeyError("node " + hex(key))
268 275 def __setitem__(self, key, val):
269 276 self.p.map[key] = val
270 277 def __delitem__(self, key):
271 278 del self.p.map[key]
272 279
273 280 class RevlogError(Exception): pass
274 281
275 282 class revlog(object):
276 283 """
277 284 the underlying revision storage object
278 285
279 286 A revlog consists of two parts, an index and the revision data.
280 287
281 288 The index is a file with a fixed record size containing
282 289 information on each revision, includings its nodeid (hash), the
283 290 nodeids of its parents, the position and offset of its data within
284 291 the data file, and the revision it's based on. Finally, each entry
285 292 contains a linkrev entry that can serve as a pointer to external
286 293 data.
287 294
288 295 The revision data itself is a linear collection of data chunks.
289 296 Each chunk represents a revision and is usually represented as a
290 297 delta against the previous chunk. To bound lookup time, runs of
291 298 deltas are limited to about 2 times the length of the original
292 299 version data. This makes retrieval of a version proportional to
293 300 its size, or O(1) relative to the number of revisions.
294 301
295 302 Both pieces of the revlog are written to in an append-only
296 303 fashion, which means we never need to rewrite a file to insert or
297 304 remove data, and can use some simple techniques to avoid the need
298 305 for locking while reading.
299 306 """
300 307 def __init__(self, opener, indexfile, datafile,
301 308 defversion=REVLOG_DEFAULT_VERSION):
302 309 """
303 310 create a revlog object
304 311
305 312 opener is a function that abstracts the file opening operation
306 313 and can be used to implement COW semantics or the like.
307 314 """
308 315 self.indexfile = indexfile
309 316 self.datafile = datafile
310 317 self.opener = opener
311 318
312 319 self.indexstat = None
313 320 self.cache = None
314 321 self.chunkcache = None
315 322 self.defversion = defversion
316 323 self.load()
317 324
318 325 def load(self):
319 326 v = self.defversion
320 327 try:
321 328 f = self.opener(self.indexfile)
322 329 i = f.read(4)
323 330 f.seek(0)
324 331 except IOError, inst:
325 332 if inst.errno != errno.ENOENT:
326 333 raise
327 334 i = ""
328 335 else:
329 336 try:
330 337 st = util.fstat(f)
331 338 except AttributeError, inst:
332 339 st = None
333 340 else:
334 341 oldst = self.indexstat
335 342 if (oldst and st.st_dev == oldst.st_dev
336 343 and st.st_ino == oldst.st_ino
337 344 and st.st_mtime == oldst.st_mtime
338 345 and st.st_ctime == oldst.st_ctime):
339 346 return
340 347 self.indexstat = st
341 348 if len(i) > 0:
342 349 v = struct.unpack(versionformat, i)[0]
343 350 flags = v & ~0xFFFF
344 351 fmt = v & 0xFFFF
345 352 if fmt == REVLOGV0:
346 353 if flags:
347 354 raise RevlogError(_("index %s invalid flags %x for format v0" %
348 355 (self.indexfile, flags)))
349 356 elif fmt == REVLOGNG:
350 357 if flags & ~REVLOGNGINLINEDATA:
351 358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
352 359 (self.indexfile, flags)))
353 360 else:
354 361 raise RevlogError(_("index %s invalid format %d" %
355 362 (self.indexfile, fmt)))
356 363 self.version = v
357 364 if v == REVLOGV0:
358 365 self.indexformat = indexformatv0
359 366 shaoffset = v0shaoffset
360 367 else:
361 368 self.indexformat = indexformatng
362 369 shaoffset = ngshaoffset
363 370
364 371 if i:
365 if not self.inlinedata() and st and st.st_size > 10000:
372 if (lazyparser.safe_to_use and not self.inlinedata() and
373 st and st.st_size > 10000):
366 374 # big index, let's parse it on demand
367 375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
368 376 self.index = lazyindex(parser)
369 377 self.nodemap = lazymap(parser)
370 378 else:
371 379 i = f.read()
372 380 self.parseindex(i)
373 381 if self.inlinedata():
374 382 # we've already got the entire data file read in, save it
375 383 # in the chunk data
376 384 self.chunkcache = (0, i)
377 385 if self.version != REVLOGV0:
378 386 e = list(self.index[0])
379 387 type = self.ngtype(e[0])
380 388 e[0] = self.offset_type(0, type)
381 389 self.index[0] = e
382 390 else:
383 391 self.nodemap = { nullid: -1}
384 392 self.index = []
385 393
386 394
387 395 def parseindex(self, data):
388 396 s = struct.calcsize(self.indexformat)
389 397 l = len(data)
390 398 self.index = []
391 399 self.nodemap = {nullid: -1}
392 400 inline = self.inlinedata()
393 401 off = 0
394 402 n = 0
395 403 while off < l:
396 404 e = struct.unpack(self.indexformat, data[off:off + s])
397 405 self.index.append(e)
398 406 self.nodemap[e[-1]] = n
399 407 n += 1
400 408 off += s
401 409 if inline:
402 410 off += e[1]
403 411
404 412 def ngoffset(self, q):
405 413 if q & 0xFFFF:
406 414 raise RevlogError(_('%s: incompatible revision flag %x') %
407 415 (self.indexfile, q))
408 416 return long(q >> 16)
409 417
410 418 def ngtype(self, q):
411 419 return int(q & 0xFFFF)
412 420
413 421 def offset_type(self, offset, type):
414 422 return long(long(offset) << 16 | type)
415 423
416 424 def loadindex(self, start, end):
417 425 """load a block of indexes all at once from the lazy parser"""
418 426 if isinstance(self.index, lazyindex):
419 427 self.index.p.loadindex(start, end)
420 428
421 429 def loadindexmap(self):
422 430 """loads both the map and the index from the lazy parser"""
423 431 if isinstance(self.index, lazyindex):
424 432 p = self.index.p
425 433 p.loadindex()
426 434 self.nodemap = p.map
427 435
428 436 def loadmap(self):
429 437 """loads the map from the lazy parser"""
430 438 if isinstance(self.nodemap, lazymap):
431 439 self.nodemap.p.loadmap()
432 440 self.nodemap = self.nodemap.p.map
433 441
434 442 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
435 443 def tip(self): return self.node(len(self.index) - 1)
436 444 def count(self): return len(self.index)
437 445 def node(self, rev):
438 446 return (rev < 0) and nullid or self.index[rev][-1]
439 447 def rev(self, node):
440 448 try:
441 449 return self.nodemap[node]
442 450 except KeyError:
443 451 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
444 452 def linkrev(self, node): return self.index[self.rev(node)][-4]
445 453 def parents(self, node):
446 454 if node == nullid: return (nullid, nullid)
447 455 r = self.rev(node)
448 456 d = self.index[r][-3:-1]
449 457 if self.version == REVLOGV0:
450 458 return d
451 459 return [ self.node(x) for x in d ]
452 460 def start(self, rev):
453 461 if rev < 0:
454 462 return -1
455 463 if self.version != REVLOGV0:
456 464 return self.ngoffset(self.index[rev][0])
457 465 return self.index[rev][0]
458 466
459 467 def end(self, rev): return self.start(rev) + self.length(rev)
460 468
461 469 def size(self, rev):
462 470 """return the length of the uncompressed text for a given revision"""
463 471 l = -1
464 472 if self.version != REVLOGV0:
465 473 l = self.index[rev][2]
466 474 if l >= 0:
467 475 return l
468 476
469 477 t = self.revision(self.node(rev))
470 478 return len(t)
471 479
472 480 # alternate implementation, The advantage to this code is it
473 481 # will be faster for a single revision. But, the results are not
474 482 # cached, so finding the size of every revision will be slower.
475 483 """
476 484 if self.cache and self.cache[1] == rev:
477 485 return len(self.cache[2])
478 486
479 487 base = self.base(rev)
480 488 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
481 489 base = self.cache[1]
482 490 text = self.cache[2]
483 491 else:
484 492 text = self.revision(self.node(base))
485 493
486 494 l = len(text)
487 495 for x in xrange(base + 1, rev + 1):
488 496 l = mdiff.patchedsize(l, self.chunk(x))
489 497 return l
490 498 """
491 499
492 500 def length(self, rev):
493 501 if rev < 0:
494 502 return 0
495 503 else:
496 504 return self.index[rev][1]
497 505 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
498 506
499 507 def reachable(self, rev, stop=None):
500 508 reachable = {}
501 509 visit = [rev]
502 510 reachable[rev] = 1
503 511 if stop:
504 512 stopn = self.rev(stop)
505 513 else:
506 514 stopn = 0
507 515 while visit:
508 516 n = visit.pop(0)
509 517 if n == stop:
510 518 continue
511 519 if n == nullid:
512 520 continue
513 521 for p in self.parents(n):
514 522 if self.rev(p) < stopn:
515 523 continue
516 524 if p not in reachable:
517 525 reachable[p] = 1
518 526 visit.append(p)
519 527 return reachable
520 528
521 529 def nodesbetween(self, roots=None, heads=None):
522 530 """Return a tuple containing three elements. Elements 1 and 2 contain
523 531 a final list bases and heads after all the unreachable ones have been
524 532 pruned. Element 0 contains a topologically sorted list of all
525 533
526 534 nodes that satisfy these constraints:
527 535 1. All nodes must be descended from a node in roots (the nodes on
528 536 roots are considered descended from themselves).
529 537 2. All nodes must also be ancestors of a node in heads (the nodes in
530 538 heads are considered to be their own ancestors).
531 539
532 540 If roots is unspecified, nullid is assumed as the only root.
533 541 If heads is unspecified, it is taken to be the output of the
534 542 heads method (i.e. a list of all nodes in the repository that
535 543 have no children)."""
536 544 nonodes = ([], [], [])
537 545 if roots is not None:
538 546 roots = list(roots)
539 547 if not roots:
540 548 return nonodes
541 549 lowestrev = min([self.rev(n) for n in roots])
542 550 else:
543 551 roots = [nullid] # Everybody's a descendent of nullid
544 552 lowestrev = -1
545 553 if (lowestrev == -1) and (heads is None):
546 554 # We want _all_ the nodes!
547 555 return ([self.node(r) for r in xrange(0, self.count())],
548 556 [nullid], list(self.heads()))
549 557 if heads is None:
550 558 # All nodes are ancestors, so the latest ancestor is the last
551 559 # node.
552 560 highestrev = self.count() - 1
553 561 # Set ancestors to None to signal that every node is an ancestor.
554 562 ancestors = None
555 563 # Set heads to an empty dictionary for later discovery of heads
556 564 heads = {}
557 565 else:
558 566 heads = list(heads)
559 567 if not heads:
560 568 return nonodes
561 569 ancestors = {}
562 570 # Start at the top and keep marking parents until we're done.
563 571 nodestotag = heads[:]
564 572 # Turn heads into a dictionary so we can remove 'fake' heads.
565 573 # Also, later we will be using it to filter out the heads we can't
566 574 # find from roots.
567 575 heads = dict.fromkeys(heads, 0)
568 576 # Remember where the top was so we can use it as a limit later.
569 577 highestrev = max([self.rev(n) for n in nodestotag])
570 578 while nodestotag:
571 579 # grab a node to tag
572 580 n = nodestotag.pop()
573 581 # Never tag nullid
574 582 if n == nullid:
575 583 continue
576 584 # A node's revision number represents its place in a
577 585 # topologically sorted list of nodes.
578 586 r = self.rev(n)
579 587 if r >= lowestrev:
580 588 if n not in ancestors:
581 589 # If we are possibly a descendent of one of the roots
582 590 # and we haven't already been marked as an ancestor
583 591 ancestors[n] = 1 # Mark as ancestor
584 592 # Add non-nullid parents to list of nodes to tag.
585 593 nodestotag.extend([p for p in self.parents(n) if
586 594 p != nullid])
587 595 elif n in heads: # We've seen it before, is it a fake head?
588 596 # So it is, real heads should not be the ancestors of
589 597 # any other heads.
590 598 heads.pop(n)
591 599 if not ancestors:
592 600 return nonodes
593 601 # Now that we have our set of ancestors, we want to remove any
594 602 # roots that are not ancestors.
595 603
596 604 # If one of the roots was nullid, everything is included anyway.
597 605 if lowestrev > -1:
598 606 # But, since we weren't, let's recompute the lowest rev to not
599 607 # include roots that aren't ancestors.
600 608
601 609 # Filter out roots that aren't ancestors of heads
602 610 roots = [n for n in roots if n in ancestors]
603 611 # Recompute the lowest revision
604 612 if roots:
605 613 lowestrev = min([self.rev(n) for n in roots])
606 614 else:
607 615 # No more roots? Return empty list
608 616 return nonodes
609 617 else:
610 618 # We are descending from nullid, and don't need to care about
611 619 # any other roots.
612 620 lowestrev = -1
613 621 roots = [nullid]
614 622 # Transform our roots list into a 'set' (i.e. a dictionary where the
615 623 # values don't matter.
616 624 descendents = dict.fromkeys(roots, 1)
617 625 # Also, keep the original roots so we can filter out roots that aren't
618 626 # 'real' roots (i.e. are descended from other roots).
619 627 roots = descendents.copy()
620 628 # Our topologically sorted list of output nodes.
621 629 orderedout = []
622 630 # Don't start at nullid since we don't want nullid in our output list,
623 631 # and if nullid shows up in descedents, empty parents will look like
624 632 # they're descendents.
625 633 for r in xrange(max(lowestrev, 0), highestrev + 1):
626 634 n = self.node(r)
627 635 isdescendent = False
628 636 if lowestrev == -1: # Everybody is a descendent of nullid
629 637 isdescendent = True
630 638 elif n in descendents:
631 639 # n is already a descendent
632 640 isdescendent = True
633 641 # This check only needs to be done here because all the roots
634 642 # will start being marked is descendents before the loop.
635 643 if n in roots:
636 644 # If n was a root, check if it's a 'real' root.
637 645 p = tuple(self.parents(n))
638 646 # If any of its parents are descendents, it's not a root.
639 647 if (p[0] in descendents) or (p[1] in descendents):
640 648 roots.pop(n)
641 649 else:
642 650 p = tuple(self.parents(n))
643 651 # A node is a descendent if either of its parents are
644 652 # descendents. (We seeded the dependents list with the roots
645 653 # up there, remember?)
646 654 if (p[0] in descendents) or (p[1] in descendents):
647 655 descendents[n] = 1
648 656 isdescendent = True
649 657 if isdescendent and ((ancestors is None) or (n in ancestors)):
650 658 # Only include nodes that are both descendents and ancestors.
651 659 orderedout.append(n)
652 660 if (ancestors is not None) and (n in heads):
653 661 # We're trying to figure out which heads are reachable
654 662 # from roots.
655 663 # Mark this head as having been reached
656 664 heads[n] = 1
657 665 elif ancestors is None:
658 666 # Otherwise, we're trying to discover the heads.
659 667 # Assume this is a head because if it isn't, the next step
660 668 # will eventually remove it.
661 669 heads[n] = 1
662 670 # But, obviously its parents aren't.
663 671 for p in self.parents(n):
664 672 heads.pop(p, None)
665 673 heads = [n for n in heads.iterkeys() if heads[n] != 0]
666 674 roots = roots.keys()
667 675 assert orderedout
668 676 assert roots
669 677 assert heads
670 678 return (orderedout, roots, heads)
671 679
672 680 def heads(self, start=None):
673 681 """return the list of all nodes that have no children
674 682
675 683 if start is specified, only heads that are descendants of
676 684 start will be returned
677 685
678 686 """
679 687 if start is None:
680 688 start = nullid
681 689 reachable = {start: 1}
682 690 heads = {start: 1}
683 691 startrev = self.rev(start)
684 692
685 693 for r in xrange(startrev + 1, self.count()):
686 694 n = self.node(r)
687 695 for pn in self.parents(n):
688 696 if pn in reachable:
689 697 reachable[n] = 1
690 698 heads[n] = 1
691 699 if pn in heads:
692 700 del heads[pn]
693 701 return heads.keys()
694 702
695 703 def children(self, node):
696 704 """find the children of a given node"""
697 705 c = []
698 706 p = self.rev(node)
699 707 for r in range(p + 1, self.count()):
700 708 n = self.node(r)
701 709 for pn in self.parents(n):
702 710 if pn == node:
703 711 c.append(n)
704 712 continue
705 713 elif pn == nullid:
706 714 continue
707 715 return c
708 716
709 717 def lookup(self, id):
710 718 """locate a node based on revision number or subset of hex nodeid"""
711 719 try:
712 720 rev = int(id)
713 721 if str(rev) != id: raise ValueError
714 722 if rev < 0: rev = self.count() + rev
715 723 if rev < 0 or rev >= self.count(): raise ValueError
716 724 return self.node(rev)
717 725 except (ValueError, OverflowError):
718 726 c = []
719 727 for n in self.nodemap:
720 728 if hex(n).startswith(id):
721 729 c.append(n)
722 730 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
723 731 if len(c) < 1: raise RevlogError(_("No match found"))
724 732 return c[0]
725 733
726 734 return None
727 735
728 736 def diff(self, a, b):
729 737 """return a delta between two revisions"""
730 738 return mdiff.textdiff(a, b)
731 739
732 740 def patches(self, t, pl):
733 741 """apply a list of patches to a string"""
734 742 return mdiff.patches(t, pl)
735 743
736 744 def chunk(self, rev, df=None, cachelen=4096):
737 745 start, length = self.start(rev), self.length(rev)
738 746 inline = self.inlinedata()
739 747 if inline:
740 748 start += (rev + 1) * struct.calcsize(self.indexformat)
741 749 end = start + length
742 750 def loadcache(df):
743 751 cache_length = max(cachelen, length) # 4k
744 752 if not df:
745 753 if inline:
746 754 df = self.opener(self.indexfile)
747 755 else:
748 756 df = self.opener(self.datafile)
749 757 df.seek(start)
750 758 self.chunkcache = (start, df.read(cache_length))
751 759
752 760 if not self.chunkcache:
753 761 loadcache(df)
754 762
755 763 cache_start = self.chunkcache[0]
756 764 cache_end = cache_start + len(self.chunkcache[1])
757 765 if start >= cache_start and end <= cache_end:
758 766 # it is cached
759 767 offset = start - cache_start
760 768 else:
761 769 loadcache(df)
762 770 offset = 0
763 771
764 772 #def checkchunk():
765 773 # df = self.opener(self.datafile)
766 774 # df.seek(start)
767 775 # return df.read(length)
768 776 #assert s == checkchunk()
769 777 return decompress(self.chunkcache[1][offset:offset + length])
770 778
771 779 def delta(self, node):
772 780 """return or calculate a delta between a node and its predecessor"""
773 781 r = self.rev(node)
774 782 return self.revdiff(r - 1, r)
775 783
776 784 def revdiff(self, rev1, rev2):
777 785 """return or calculate a delta between two revisions"""
778 786 b1 = self.base(rev1)
779 787 b2 = self.base(rev2)
780 788 if b1 == b2 and rev1 + 1 == rev2:
781 789 return self.chunk(rev2)
782 790 else:
783 791 return self.diff(self.revision(self.node(rev1)),
784 792 self.revision(self.node(rev2)))
785 793
786 794 def revision(self, node):
787 795 """return an uncompressed revision of a given"""
788 796 if node == nullid: return ""
789 797 if self.cache and self.cache[0] == node: return self.cache[2]
790 798
791 799 # look up what we need to read
792 800 text = None
793 801 rev = self.rev(node)
794 802 base = self.base(rev)
795 803
796 804 if self.inlinedata():
797 805 # we probably have the whole chunk cached
798 806 df = None
799 807 else:
800 808 df = self.opener(self.datafile)
801 809
802 810 # do we have useful data cached?
803 811 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
804 812 base = self.cache[1]
805 813 text = self.cache[2]
806 814 self.loadindex(base, rev + 1)
807 815 else:
808 816 self.loadindex(base, rev + 1)
809 817 text = self.chunk(base, df=df)
810 818
811 819 bins = []
812 820 for r in xrange(base + 1, rev + 1):
813 821 bins.append(self.chunk(r, df=df))
814 822
815 823 text = self.patches(text, bins)
816 824
817 825 p1, p2 = self.parents(node)
818 826 if node != hash(text, p1, p2):
819 827 raise RevlogError(_("integrity check failed on %s:%d")
820 828 % (self.datafile, rev))
821 829
822 830 self.cache = (node, rev, text)
823 831 return text
824 832
825 833 def checkinlinesize(self, tr, fp=None):
826 834 if not self.inlinedata():
827 835 return
828 836 if not fp:
829 837 fp = self.opener(self.indexfile, 'r')
830 838 fp.seek(0, 2)
831 839 size = fp.tell()
832 840 if size < 131072:
833 841 return
834 842 trinfo = tr.find(self.indexfile)
835 843 if trinfo == None:
836 844 raise RevlogError(_("%s not found in the transaction" %
837 845 self.indexfile))
838 846
839 847 trindex = trinfo[2]
840 848 dataoff = self.start(trindex)
841 849
842 850 tr.add(self.datafile, dataoff)
843 851 df = self.opener(self.datafile, 'w')
844 852 calc = struct.calcsize(self.indexformat)
845 853 for r in xrange(self.count()):
846 854 start = self.start(r) + (r + 1) * calc
847 855 length = self.length(r)
848 856 fp.seek(start)
849 857 d = fp.read(length)
850 858 df.write(d)
851 859 fp.close()
852 860 df.close()
853 861 fp = self.opener(self.indexfile, 'w', atomictemp=True)
854 862 self.version &= ~(REVLOGNGINLINEDATA)
855 863 if self.count():
856 864 x = self.index[0]
857 865 e = struct.pack(self.indexformat, *x)[4:]
858 866 l = struct.pack(versionformat, self.version)
859 867 fp.write(l)
860 868 fp.write(e)
861 869
862 870 for i in xrange(1, self.count()):
863 871 x = self.index[i]
864 872 e = struct.pack(self.indexformat, *x)
865 873 fp.write(e)
866 874
867 875 # if we don't call rename, the temp file will never replace the
868 876 # real index
869 877 fp.rename()
870 878
871 879 tr.replace(self.indexfile, trindex * calc)
872 880 self.chunkcache = None
873 881
874 882 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
875 883 """add a revision to the log
876 884
877 885 text - the revision data to add
878 886 transaction - the transaction object used for rollback
879 887 link - the linkrev data to add
880 888 p1, p2 - the parent nodeids of the revision
881 889 d - an optional precomputed delta
882 890 """
883 891 if text is None: text = ""
884 892 if p1 is None: p1 = self.tip()
885 893 if p2 is None: p2 = nullid
886 894
887 895 node = hash(text, p1, p2)
888 896
889 897 if node in self.nodemap:
890 898 return node
891 899
892 900 n = self.count()
893 901 t = n - 1
894 902
895 903 if n:
896 904 base = self.base(t)
897 905 start = self.start(base)
898 906 end = self.end(t)
899 907 if not d:
900 908 prev = self.revision(self.tip())
901 909 d = self.diff(prev, str(text))
902 910 data = compress(d)
903 911 l = len(data[1]) + len(data[0])
904 912 dist = end - start + l
905 913
906 914 # full versions are inserted when the needed deltas
907 915 # become comparable to the uncompressed text
908 916 if not n or dist > len(text) * 2:
909 917 data = compress(text)
910 918 l = len(data[1]) + len(data[0])
911 919 base = n
912 920 else:
913 921 base = self.base(t)
914 922
915 923 offset = 0
916 924 if t >= 0:
917 925 offset = self.end(t)
918 926
919 927 if self.version == REVLOGV0:
920 928 e = (offset, l, base, link, p1, p2, node)
921 929 else:
922 930 e = (self.offset_type(offset, 0), l, len(text),
923 931 base, link, self.rev(p1), self.rev(p2), node)
924 932
925 933 self.index.append(e)
926 934 self.nodemap[node] = n
927 935 entry = struct.pack(self.indexformat, *e)
928 936
929 937 if not self.inlinedata():
930 938 transaction.add(self.datafile, offset)
931 939 transaction.add(self.indexfile, n * len(entry))
932 940 f = self.opener(self.datafile, "a")
933 941 if data[0]:
934 942 f.write(data[0])
935 943 f.write(data[1])
936 944 f.close()
937 945 f = self.opener(self.indexfile, "a")
938 946 else:
939 947 f = self.opener(self.indexfile, "a+")
940 948 f.seek(0, 2)
941 949 transaction.add(self.indexfile, f.tell(), self.count() - 1)
942 950
943 951 if len(self.index) == 1 and self.version != REVLOGV0:
944 952 l = struct.pack(versionformat, self.version)
945 953 f.write(l)
946 954 entry = entry[4:]
947 955
948 956 f.write(entry)
949 957
950 958 if self.inlinedata():
951 959 f.write(data[0])
952 960 f.write(data[1])
953 961 self.checkinlinesize(transaction, f)
954 962
955 963 self.cache = (node, n, text)
956 964 return node
957 965
958 966 def ancestor(self, a, b):
959 967 """calculate the least common ancestor of nodes a and b"""
960 968
961 969 # start with some short cuts for the linear cases
962 970 if a == b:
963 971 return a
964 972 ra = self.rev(a)
965 973 rb = self.rev(b)
966 974 if ra < rb:
967 975 last = b
968 976 first = a
969 977 else:
970 978 last = a
971 979 first = b
972 980
973 981 # reachable won't include stop in the list, so we have to use a parent
974 982 reachable = self.reachable(last, stop=self.parents(first)[0])
975 983 if first in reachable:
976 984 return first
977 985
978 986 # calculate the distance of every node from root
979 987 dist = {nullid: 0}
980 988 for i in xrange(self.count()):
981 989 n = self.node(i)
982 990 p1, p2 = self.parents(n)
983 991 dist[n] = max(dist[p1], dist[p2]) + 1
984 992
985 993 # traverse ancestors in order of decreasing distance from root
986 994 def ancestors(node):
987 995 # we store negative distances because heap returns smallest member
988 996 h = [(-dist[node], node)]
989 997 seen = {}
990 998 while h:
991 999 d, n = heapq.heappop(h)
992 1000 if n not in seen:
993 1001 seen[n] = 1
994 1002 yield (-d, n)
995 1003 for p in self.parents(n):
996 1004 heapq.heappush(h, (-dist[p], p))
997 1005
998 1006 def generations(node):
999 1007 sg, s = None, {}
1000 1008 for g,n in ancestors(node):
1001 1009 if g != sg:
1002 1010 if sg:
1003 1011 yield sg, s
1004 1012 sg, s = g, {n:1}
1005 1013 else:
1006 1014 s[n] = 1
1007 1015 yield sg, s
1008 1016
1009 1017 x = generations(a)
1010 1018 y = generations(b)
1011 1019 gx = x.next()
1012 1020 gy = y.next()
1013 1021
1014 1022 # increment each ancestor list until it is closer to root than
1015 1023 # the other, or they match
1016 1024 while 1:
1017 1025 #print "ancestor gen %s %s" % (gx[0], gy[0])
1018 1026 if gx[0] == gy[0]:
1019 1027 # find the intersection
1020 1028 i = [ n for n in gx[1] if n in gy[1] ]
1021 1029 if i:
1022 1030 return i[0]
1023 1031 else:
1024 1032 #print "next"
1025 1033 gy = y.next()
1026 1034 gx = x.next()
1027 1035 elif gx[0] < gy[0]:
1028 1036 #print "next y"
1029 1037 gy = y.next()
1030 1038 else:
1031 1039 #print "next x"
1032 1040 gx = x.next()
1033 1041
1034 1042 def group(self, nodelist, lookup, infocollect=None):
1035 1043 """calculate a delta group
1036 1044
1037 1045 Given a list of changeset revs, return a set of deltas and
1038 1046 metadata corresponding to nodes. the first delta is
1039 1047 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1040 1048 have this parent as it has all history before these
1041 1049 changesets. parent is parent[0]
1042 1050 """
1043 1051 revs = [self.rev(n) for n in nodelist]
1044 1052
1045 1053 # if we don't have any revisions touched by these changesets, bail
1046 1054 if not revs:
1047 1055 yield changegroup.closechunk()
1048 1056 return
1049 1057
1050 1058 # add the parent of the first rev
1051 1059 p = self.parents(self.node(revs[0]))[0]
1052 1060 revs.insert(0, self.rev(p))
1053 1061
1054 1062 # build deltas
1055 1063 for d in xrange(0, len(revs) - 1):
1056 1064 a, b = revs[d], revs[d + 1]
1057 1065 nb = self.node(b)
1058 1066
1059 1067 if infocollect is not None:
1060 1068 infocollect(nb)
1061 1069
1062 1070 d = self.revdiff(a, b)
1063 1071 p = self.parents(nb)
1064 1072 meta = nb + p[0] + p[1] + lookup(nb)
1065 1073 yield changegroup.genchunk("%s%s" % (meta, d))
1066 1074
1067 1075 yield changegroup.closechunk()
1068 1076
1069 1077 def addgroup(self, revs, linkmapper, transaction, unique=0):
1070 1078 """
1071 1079 add a delta group
1072 1080
1073 1081 given a set of deltas, add them to the revision log. the
1074 1082 first delta is against its parent, which should be in our
1075 1083 log, the rest are against the previous delta.
1076 1084 """
1077 1085
1078 1086 #track the base of the current delta log
1079 1087 r = self.count()
1080 1088 t = r - 1
1081 1089 node = None
1082 1090
1083 1091 base = prev = -1
1084 1092 start = end = textlen = 0
1085 1093 if r:
1086 1094 end = self.end(t)
1087 1095
1088 1096 ifh = self.opener(self.indexfile, "a+")
1089 1097 ifh.seek(0, 2)
1090 1098 transaction.add(self.indexfile, ifh.tell(), self.count())
1091 1099 if self.inlinedata():
1092 1100 dfh = None
1093 1101 else:
1094 1102 transaction.add(self.datafile, end)
1095 1103 dfh = self.opener(self.datafile, "a")
1096 1104
1097 1105 # loop through our set of deltas
1098 1106 chain = None
1099 1107 for chunk in revs:
1100 1108 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1101 1109 link = linkmapper(cs)
1102 1110 if node in self.nodemap:
1103 1111 # this can happen if two branches make the same change
1104 1112 # if unique:
1105 1113 # raise RevlogError(_("already have %s") % hex(node[:4]))
1106 1114 chain = node
1107 1115 continue
1108 1116 delta = chunk[80:]
1109 1117
1110 1118 for p in (p1, p2):
1111 1119 if not p in self.nodemap:
1112 1120 raise RevlogError(_("unknown parent %s") % short(p1))
1113 1121
1114 1122 if not chain:
1115 1123 # retrieve the parent revision of the delta chain
1116 1124 chain = p1
1117 1125 if not chain in self.nodemap:
1118 1126 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1119 1127
1120 1128 # full versions are inserted when the needed deltas become
1121 1129 # comparable to the uncompressed text or when the previous
1122 1130 # version is not the one we have a delta against. We use
1123 1131 # the size of the previous full rev as a proxy for the
1124 1132 # current size.
1125 1133
1126 1134 if chain == prev:
1127 1135 tempd = compress(delta)
1128 1136 cdelta = tempd[0] + tempd[1]
1129 1137 textlen = mdiff.patchedsize(textlen, delta)
1130 1138
1131 1139 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1132 1140 # flush our writes here so we can read it in revision
1133 1141 if dfh:
1134 1142 dfh.flush()
1135 1143 ifh.flush()
1136 1144 text = self.revision(chain)
1137 1145 text = self.patches(text, [delta])
1138 1146 chk = self.addrevision(text, transaction, link, p1, p2)
1139 1147 if chk != node:
1140 1148 raise RevlogError(_("consistency error adding group"))
1141 1149 textlen = len(text)
1142 1150 else:
1143 1151 if self.version == REVLOGV0:
1144 1152 e = (end, len(cdelta), base, link, p1, p2, node)
1145 1153 else:
1146 1154 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1147 1155 link, self.rev(p1), self.rev(p2), node)
1148 1156 self.index.append(e)
1149 1157 self.nodemap[node] = r
1150 1158 if self.inlinedata():
1151 1159 ifh.write(struct.pack(self.indexformat, *e))
1152 1160 ifh.write(cdelta)
1153 1161 self.checkinlinesize(transaction, ifh)
1154 1162 if not self.inlinedata():
1155 1163 dfh = self.opener(self.datafile, "a")
1156 1164 ifh = self.opener(self.indexfile, "a")
1157 1165 else:
1158 1166 if not dfh:
1159 1167 # addrevision switched from inline to conventional
1160 1168 # reopen the index
1161 1169 dfh = self.opener(self.datafile, "a")
1162 1170 ifh = self.opener(self.indexfile, "a")
1163 1171 dfh.write(cdelta)
1164 1172 ifh.write(struct.pack(self.indexformat, *e))
1165 1173
1166 1174 t, r, chain, prev = r, r + 1, node, node
1167 1175 base = self.base(t)
1168 1176 start = self.start(base)
1169 1177 end = self.end(t)
1170 1178
1171 1179 if node is None:
1172 1180 raise RevlogError(_("group to be added is empty"))
1173 1181 return node
1174 1182
1175 1183 def strip(self, rev, minlink):
1176 1184 if self.count() == 0 or rev >= self.count():
1177 1185 return
1178 1186
1179 1187 if isinstance(self.index, lazyindex):
1180 1188 self.loadindexmap()
1181 1189
1182 1190 # When stripping away a revision, we need to make sure it
1183 1191 # does not actually belong to an older changeset.
1184 1192 # The minlink parameter defines the oldest revision
1185 1193 # we're allowed to strip away.
1186 1194 while minlink > self.index[rev][-4]:
1187 1195 rev += 1
1188 1196 if rev >= self.count():
1189 1197 return
1190 1198
1191 1199 # first truncate the files on disk
1192 1200 end = self.start(rev)
1193 1201 if not self.inlinedata():
1194 1202 df = self.opener(self.datafile, "a")
1195 1203 df.truncate(end)
1196 1204 end = rev * struct.calcsize(self.indexformat)
1197 1205 else:
1198 1206 end += rev * struct.calcsize(self.indexformat)
1199 1207
1200 1208 indexf = self.opener(self.indexfile, "a")
1201 1209 indexf.truncate(end)
1202 1210
1203 1211 # then reset internal state in memory to forget those revisions
1204 1212 self.cache = None
1205 1213 self.chunkcache = None
1206 1214 for x in xrange(rev, self.count()):
1207 1215 del self.nodemap[self.node(x)]
1208 1216
1209 1217 del self.index[rev:]
1210 1218
1211 1219 def checksize(self):
1212 1220 expected = 0
1213 1221 if self.count():
1214 1222 expected = self.end(self.count() - 1)
1215 1223
1216 1224 try:
1217 1225 f = self.opener(self.datafile)
1218 1226 f.seek(0, 2)
1219 1227 actual = f.tell()
1220 1228 dd = actual - expected
1221 1229 except IOError, inst:
1222 1230 if inst.errno != errno.ENOENT:
1223 1231 raise
1224 1232 dd = 0
1225 1233
1226 1234 try:
1227 1235 f = self.opener(self.indexfile)
1228 1236 f.seek(0, 2)
1229 1237 actual = f.tell()
1230 1238 s = struct.calcsize(self.indexformat)
1231 1239 i = actual / s
1232 1240 di = actual - (i * s)
1233 1241 if self.inlinedata():
1234 1242 databytes = 0
1235 1243 for r in xrange(self.count()):
1236 1244 databytes += self.length(r)
1237 1245 dd = 0
1238 1246 di = actual - self.count() * s - databytes
1239 1247 except IOError, inst:
1240 1248 if inst.errno != errno.ENOENT:
1241 1249 raise
1242 1250 di = 0
1243 1251
1244 1252 return (dd, di)
1245 1253
1246 1254
@@ -1,880 +1,889 b''
1 1 """
2 2 util.py - Mercurial utility functions and platform specfic implementations
3 3
4 4 Copyright 2005 K. Thananchayan <thananck@yahoo.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8
9 9 This contains helper routines that are independent of the SCM core and hide
10 10 platform-specific details from the core.
11 11 """
12 12
13 13 import os, errno
14 14 from i18n import gettext as _
15 15 from demandload import *
16 16 demandload(globals(), "cStringIO errno popen2 re shutil sys tempfile")
17 17 demandload(globals(), "threading time")
18 18
19 19 class SignalInterrupt(Exception):
20 20 """Exception raised on SIGTERM and SIGHUP."""
21 21
22 22 def pipefilter(s, cmd):
23 23 '''filter string S through command CMD, returning its output'''
24 24 (pout, pin) = popen2.popen2(cmd, -1, 'b')
25 25 def writer():
26 26 try:
27 27 pin.write(s)
28 28 pin.close()
29 29 except IOError, inst:
30 30 if inst.errno != errno.EPIPE:
31 31 raise
32 32
33 33 # we should use select instead on UNIX, but this will work on most
34 34 # systems, including Windows
35 35 w = threading.Thread(target=writer)
36 36 w.start()
37 37 f = pout.read()
38 38 pout.close()
39 39 w.join()
40 40 return f
41 41
42 42 def tempfilter(s, cmd):
43 43 '''filter string S through a pair of temporary files with CMD.
44 44 CMD is used as a template to create the real command to be run,
45 45 with the strings INFILE and OUTFILE replaced by the real names of
46 46 the temporary files generated.'''
47 47 inname, outname = None, None
48 48 try:
49 49 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
50 50 fp = os.fdopen(infd, 'wb')
51 51 fp.write(s)
52 52 fp.close()
53 53 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
54 54 os.close(outfd)
55 55 cmd = cmd.replace('INFILE', inname)
56 56 cmd = cmd.replace('OUTFILE', outname)
57 57 code = os.system(cmd)
58 58 if code: raise Abort(_("command '%s' failed: %s") %
59 59 (cmd, explain_exit(code)))
60 60 return open(outname, 'rb').read()
61 61 finally:
62 62 try:
63 63 if inname: os.unlink(inname)
64 64 except: pass
65 65 try:
66 66 if outname: os.unlink(outname)
67 67 except: pass
68 68
69 69 filtertable = {
70 70 'tempfile:': tempfilter,
71 71 'pipe:': pipefilter,
72 72 }
73 73
74 74 def filter(s, cmd):
75 75 "filter a string through a command that transforms its input to its output"
76 76 for name, fn in filtertable.iteritems():
77 77 if cmd.startswith(name):
78 78 return fn(s, cmd[len(name):].lstrip())
79 79 return pipefilter(s, cmd)
80 80
81 81 def find_in_path(name, path, default=None):
82 82 '''find name in search path. path can be string (will be split
83 83 with os.pathsep), or iterable thing that returns strings. if name
84 84 found, return path to name. else return default.'''
85 85 if isinstance(path, str):
86 86 path = path.split(os.pathsep)
87 87 for p in path:
88 88 p_name = os.path.join(p, name)
89 89 if os.path.exists(p_name):
90 90 return p_name
91 91 return default
92 92
93 93 def patch(strip, patchname, ui):
94 94 """apply the patch <patchname> to the working directory.
95 95 a list of patched files is returned"""
96 96 patcher = find_in_path('gpatch', os.environ.get('PATH', ''), 'patch')
97 97 fp = os.popen('"%s" -p%d < "%s"' % (patcher, strip, patchname))
98 98 files = {}
99 99 for line in fp:
100 100 line = line.rstrip()
101 101 ui.status("%s\n" % line)
102 102 if line.startswith('patching file '):
103 103 pf = parse_patch_output(line)
104 104 files.setdefault(pf, 1)
105 105 code = fp.close()
106 106 if code:
107 107 raise Abort(_("patch command failed: %s") % explain_exit(code)[0])
108 108 return files.keys()
109 109
110 110 def binary(s):
111 111 """return true if a string is binary data using diff's heuristic"""
112 112 if s and '\0' in s[:4096]:
113 113 return True
114 114 return False
115 115
116 116 def unique(g):
117 117 """return the uniq elements of iterable g"""
118 118 seen = {}
119 119 for f in g:
120 120 if f not in seen:
121 121 seen[f] = 1
122 122 yield f
123 123
124 124 class Abort(Exception):
125 125 """Raised if a command needs to print an error and exit."""
126 126
127 127 def always(fn): return True
128 128 def never(fn): return False
129 129
130 130 def patkind(name, dflt_pat='glob'):
131 131 """Split a string into an optional pattern kind prefix and the
132 132 actual pattern."""
133 133 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
134 134 if name.startswith(prefix + ':'): return name.split(':', 1)
135 135 return dflt_pat, name
136 136
137 137 def globre(pat, head='^', tail='$'):
138 138 "convert a glob pattern into a regexp"
139 139 i, n = 0, len(pat)
140 140 res = ''
141 141 group = False
142 142 def peek(): return i < n and pat[i]
143 143 while i < n:
144 144 c = pat[i]
145 145 i = i+1
146 146 if c == '*':
147 147 if peek() == '*':
148 148 i += 1
149 149 res += '.*'
150 150 else:
151 151 res += '[^/]*'
152 152 elif c == '?':
153 153 res += '.'
154 154 elif c == '[':
155 155 j = i
156 156 if j < n and pat[j] in '!]':
157 157 j += 1
158 158 while j < n and pat[j] != ']':
159 159 j += 1
160 160 if j >= n:
161 161 res += '\\['
162 162 else:
163 163 stuff = pat[i:j].replace('\\','\\\\')
164 164 i = j + 1
165 165 if stuff[0] == '!':
166 166 stuff = '^' + stuff[1:]
167 167 elif stuff[0] == '^':
168 168 stuff = '\\' + stuff
169 169 res = '%s[%s]' % (res, stuff)
170 170 elif c == '{':
171 171 group = True
172 172 res += '(?:'
173 173 elif c == '}' and group:
174 174 res += ')'
175 175 group = False
176 176 elif c == ',' and group:
177 177 res += '|'
178 178 elif c == '\\':
179 179 p = peek()
180 180 if p:
181 181 i += 1
182 182 res += re.escape(p)
183 183 else:
184 184 res += re.escape(c)
185 185 else:
186 186 res += re.escape(c)
187 187 return head + res + tail
188 188
189 189 _globchars = {'[': 1, '{': 1, '*': 1, '?': 1}
190 190
191 191 def pathto(n1, n2):
192 192 '''return the relative path from one place to another.
193 193 this returns a path in the form used by the local filesystem, not hg.'''
194 194 if not n1: return localpath(n2)
195 195 a, b = n1.split('/'), n2.split('/')
196 196 a.reverse()
197 197 b.reverse()
198 198 while a and b and a[-1] == b[-1]:
199 199 a.pop()
200 200 b.pop()
201 201 b.reverse()
202 202 return os.sep.join((['..'] * len(a)) + b)
203 203
204 204 def canonpath(root, cwd, myname):
205 205 """return the canonical path of myname, given cwd and root"""
206 206 if root == os.sep:
207 207 rootsep = os.sep
208 208 else:
209 209 rootsep = root + os.sep
210 210 name = myname
211 211 if not os.path.isabs(name):
212 212 name = os.path.join(root, cwd, name)
213 213 name = os.path.normpath(name)
214 214 if name.startswith(rootsep):
215 215 name = name[len(rootsep):]
216 216 audit_path(name)
217 217 return pconvert(name)
218 218 elif name == root:
219 219 return ''
220 220 else:
221 221 # Determine whether `name' is in the hierarchy at or beneath `root',
222 222 # by iterating name=dirname(name) until that causes no change (can't
223 223 # check name == '/', because that doesn't work on windows). For each
224 224 # `name', compare dev/inode numbers. If they match, the list `rel'
225 225 # holds the reversed list of components making up the relative file
226 226 # name we want.
227 227 root_st = os.stat(root)
228 228 rel = []
229 229 while True:
230 230 try:
231 231 name_st = os.stat(name)
232 232 except OSError:
233 233 break
234 234 if samestat(name_st, root_st):
235 235 rel.reverse()
236 236 name = os.path.join(*rel)
237 237 audit_path(name)
238 238 return pconvert(name)
239 239 dirname, basename = os.path.split(name)
240 240 rel.append(basename)
241 241 if dirname == name:
242 242 break
243 243 name = dirname
244 244
245 245 raise Abort('%s not under root' % myname)
246 246
247 247 def matcher(canonroot, cwd='', names=['.'], inc=[], exc=[], head='', src=None):
248 248 return _matcher(canonroot, cwd, names, inc, exc, head, 'glob', src)
249 249
250 250 def cmdmatcher(canonroot, cwd='', names=['.'], inc=[], exc=[], head='', src=None):
251 251 if os.name == 'nt':
252 252 dflt_pat = 'glob'
253 253 else:
254 254 dflt_pat = 'relpath'
255 255 return _matcher(canonroot, cwd, names, inc, exc, head, dflt_pat, src)
256 256
257 257 def _matcher(canonroot, cwd, names, inc, exc, head, dflt_pat, src):
258 258 """build a function to match a set of file patterns
259 259
260 260 arguments:
261 261 canonroot - the canonical root of the tree you're matching against
262 262 cwd - the current working directory, if relevant
263 263 names - patterns to find
264 264 inc - patterns to include
265 265 exc - patterns to exclude
266 266 head - a regex to prepend to patterns to control whether a match is rooted
267 267
268 268 a pattern is one of:
269 269 'glob:<rooted glob>'
270 270 're:<rooted regexp>'
271 271 'path:<rooted path>'
272 272 'relglob:<relative glob>'
273 273 'relpath:<relative path>'
274 274 'relre:<relative regexp>'
275 275 '<rooted path or regexp>'
276 276
277 277 returns:
278 278 a 3-tuple containing
279 279 - list of explicit non-pattern names passed in
280 280 - a bool match(filename) function
281 281 - a bool indicating if any patterns were passed in
282 282
283 283 todo:
284 284 make head regex a rooted bool
285 285 """
286 286
287 287 def contains_glob(name):
288 288 for c in name:
289 289 if c in _globchars: return True
290 290 return False
291 291
292 292 def regex(kind, name, tail):
293 293 '''convert a pattern into a regular expression'''
294 294 if kind == 're':
295 295 return name
296 296 elif kind == 'path':
297 297 return '^' + re.escape(name) + '(?:/|$)'
298 298 elif kind == 'relglob':
299 299 return head + globre(name, '(?:|.*/)', tail)
300 300 elif kind == 'relpath':
301 301 return head + re.escape(name) + tail
302 302 elif kind == 'relre':
303 303 if name.startswith('^'):
304 304 return name
305 305 return '.*' + name
306 306 return head + globre(name, '', tail)
307 307
308 308 def matchfn(pats, tail):
309 309 """build a matching function from a set of patterns"""
310 310 if not pats:
311 311 return
312 312 matches = []
313 313 for k, p in pats:
314 314 try:
315 315 pat = '(?:%s)' % regex(k, p, tail)
316 316 matches.append(re.compile(pat).match)
317 317 except re.error:
318 318 if src: raise Abort("%s: invalid pattern (%s): %s" % (src, k, p))
319 319 else: raise Abort("invalid pattern (%s): %s" % (k, p))
320 320
321 321 def buildfn(text):
322 322 for m in matches:
323 323 r = m(text)
324 324 if r:
325 325 return r
326 326
327 327 return buildfn
328 328
329 329 def globprefix(pat):
330 330 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
331 331 root = []
332 332 for p in pat.split(os.sep):
333 333 if contains_glob(p): break
334 334 root.append(p)
335 335 return '/'.join(root)
336 336
337 337 pats = []
338 338 files = []
339 339 roots = []
340 340 for kind, name in [patkind(p, dflt_pat) for p in names]:
341 341 if kind in ('glob', 'relpath'):
342 342 name = canonpath(canonroot, cwd, name)
343 343 if name == '':
344 344 kind, name = 'glob', '**'
345 345 if kind in ('glob', 'path', 're'):
346 346 pats.append((kind, name))
347 347 if kind == 'glob':
348 348 root = globprefix(name)
349 349 if root: roots.append(root)
350 350 elif kind == 'relpath':
351 351 files.append((kind, name))
352 352 roots.append(name)
353 353
354 354 patmatch = matchfn(pats, '$') or always
355 355 filematch = matchfn(files, '(?:/|$)') or always
356 356 incmatch = always
357 357 if inc:
358 358 incmatch = matchfn(map(patkind, inc), '(?:/|$)')
359 359 excmatch = lambda fn: False
360 360 if exc:
361 361 excmatch = matchfn(map(patkind, exc), '(?:/|$)')
362 362
363 363 return (roots,
364 364 lambda fn: (incmatch(fn) and not excmatch(fn) and
365 365 (fn.endswith('/') or
366 366 (not pats and not files) or
367 367 (pats and patmatch(fn)) or
368 368 (files and filematch(fn)))),
369 369 (inc or exc or (pats and pats != [('glob', '**')])) and True)
370 370
371 371 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None):
372 372 '''enhanced shell command execution.
373 373 run with environment maybe modified, maybe in different dir.
374 374
375 375 if command fails and onerr is None, return status. if ui object,
376 376 print error message and return status, else raise onerr object as
377 377 exception.'''
378 378 oldenv = {}
379 379 for k in environ:
380 380 oldenv[k] = os.environ.get(k)
381 381 if cwd is not None:
382 382 oldcwd = os.getcwd()
383 383 try:
384 384 for k, v in environ.iteritems():
385 385 os.environ[k] = str(v)
386 386 if cwd is not None and oldcwd != cwd:
387 387 os.chdir(cwd)
388 388 rc = os.system(cmd)
389 389 if rc and onerr:
390 390 errmsg = '%s %s' % (os.path.basename(cmd.split(None, 1)[0]),
391 391 explain_exit(rc)[0])
392 392 if errprefix:
393 393 errmsg = '%s: %s' % (errprefix, errmsg)
394 394 try:
395 395 onerr.warn(errmsg + '\n')
396 396 except AttributeError:
397 397 raise onerr(errmsg)
398 398 return rc
399 399 finally:
400 400 for k, v in oldenv.iteritems():
401 401 if v is None:
402 402 del os.environ[k]
403 403 else:
404 404 os.environ[k] = v
405 405 if cwd is not None and oldcwd != cwd:
406 406 os.chdir(oldcwd)
407 407
408 408 def rename(src, dst):
409 409 """forcibly rename a file"""
410 410 try:
411 411 os.rename(src, dst)
412 412 except OSError, err:
413 413 # on windows, rename to existing file is not allowed, so we
414 414 # must delete destination first. but if file is open, unlink
415 415 # schedules it for delete but does not delete it. rename
416 416 # happens immediately even for open files, so we create
417 417 # temporary file, delete it, rename destination to that name,
418 418 # then delete that. then rename is safe to do.
419 419 fd, temp = tempfile.mkstemp(dir=os.path.dirname(dst) or '.')
420 420 os.close(fd)
421 421 os.unlink(temp)
422 422 os.rename(dst, temp)
423 423 os.unlink(temp)
424 424 os.rename(src, dst)
425 425
426 426 def unlink(f):
427 427 """unlink and remove the directory if it is empty"""
428 428 os.unlink(f)
429 429 # try removing directories that might now be empty
430 430 try:
431 431 os.removedirs(os.path.dirname(f))
432 432 except OSError:
433 433 pass
434 434
435 435 def copyfiles(src, dst, hardlink=None):
436 436 """Copy a directory tree using hardlinks if possible"""
437 437
438 438 if hardlink is None:
439 439 hardlink = (os.stat(src).st_dev ==
440 440 os.stat(os.path.dirname(dst)).st_dev)
441 441
442 442 if os.path.isdir(src):
443 443 os.mkdir(dst)
444 444 for name in os.listdir(src):
445 445 srcname = os.path.join(src, name)
446 446 dstname = os.path.join(dst, name)
447 447 copyfiles(srcname, dstname, hardlink)
448 448 else:
449 449 if hardlink:
450 450 try:
451 451 os_link(src, dst)
452 452 except (IOError, OSError):
453 453 hardlink = False
454 454 shutil.copy(src, dst)
455 455 else:
456 456 shutil.copy(src, dst)
457 457
458 458 def audit_path(path):
459 459 """Abort if path contains dangerous components"""
460 460 parts = os.path.normcase(path).split(os.sep)
461 461 if (os.path.splitdrive(path)[0] or parts[0] in ('.hg', '')
462 462 or os.pardir in parts):
463 463 raise Abort(_("path contains illegal component: %s\n") % path)
464 464
465 465 def _makelock_file(info, pathname):
466 466 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
467 467 os.write(ld, info)
468 468 os.close(ld)
469 469
470 470 def _readlock_file(pathname):
471 471 return posixfile(pathname).read()
472 472
473 473 def nlinks(pathname):
474 474 """Return number of hardlinks for the given file."""
475 475 return os.stat(pathname).st_nlink
476 476
477 477 if hasattr(os, 'link'):
478 478 os_link = os.link
479 479 else:
480 480 def os_link(src, dst):
481 481 raise OSError(0, _("Hardlinks not supported"))
482 482
483 483 def fstat(fp):
484 484 '''stat file object that may not have fileno method.'''
485 485 try:
486 486 return os.fstat(fp.fileno())
487 487 except AttributeError:
488 488 return os.stat(fp.name)
489 489
490 490 posixfile = file
491 491
492 def is_win_9x():
493 '''return true if run on windows 95, 98 or me.'''
494 try:
495 return sys.getwindowsversion()[3] == 1
496 except AttributeError:
497 return os.name == 'nt' and 'command' in os.environ.get('comspec', '')
498
492 499 # Platform specific variants
493 500 if os.name == 'nt':
494 501 demandload(globals(), "msvcrt")
495 502 nulldev = 'NUL:'
496 503
497 504 class winstdout:
498 505 '''stdout on windows misbehaves if sent through a pipe'''
499 506
500 507 def __init__(self, fp):
501 508 self.fp = fp
502 509
503 510 def __getattr__(self, key):
504 511 return getattr(self.fp, key)
505 512
506 513 def close(self):
507 514 try:
508 515 self.fp.close()
509 516 except: pass
510 517
511 518 def write(self, s):
512 519 try:
513 520 return self.fp.write(s)
514 521 except IOError, inst:
515 522 if inst.errno != 0: raise
516 523 self.close()
517 524 raise IOError(errno.EPIPE, 'Broken pipe')
518 525
519 526 sys.stdout = winstdout(sys.stdout)
520 527
521 528 def system_rcpath():
522 529 try:
523 530 return system_rcpath_win32()
524 531 except:
525 532 return [r'c:\mercurial\mercurial.ini']
526 533
527 534 def os_rcpath():
528 535 '''return default os-specific hgrc search path'''
529 536 return system_rcpath() + [os.path.join(os.path.expanduser('~'),
530 537 'mercurial.ini')]
531 538
532 539 def parse_patch_output(output_line):
533 540 """parses the output produced by patch and returns the file name"""
534 541 pf = output_line[14:]
535 542 if pf[0] == '`':
536 543 pf = pf[1:-1] # Remove the quotes
537 544 return pf
538 545
539 546 def testpid(pid):
540 547 '''return False if pid dead, True if running or not known'''
541 548 return True
542 549
543 550 def is_exec(f, last):
544 551 return last
545 552
546 553 def set_exec(f, mode):
547 554 pass
548 555
549 556 def set_binary(fd):
550 557 msvcrt.setmode(fd.fileno(), os.O_BINARY)
551 558
552 559 def pconvert(path):
553 560 return path.replace("\\", "/")
554 561
555 562 def localpath(path):
556 563 return path.replace('/', '\\')
557 564
558 565 def normpath(path):
559 566 return pconvert(os.path.normpath(path))
560 567
561 568 makelock = _makelock_file
562 569 readlock = _readlock_file
563 570
564 571 def samestat(s1, s2):
565 572 return False
566 573
567 574 def explain_exit(code):
568 575 return _("exited with status %d") % code, code
569 576
570 577 try:
571 578 # override functions with win32 versions if possible
572 579 from util_win32 import *
580 if not is_win_9x():
581 posixfile = posixfile_nt
573 582 except ImportError:
574 583 pass
575 584
576 585 else:
577 586 nulldev = '/dev/null'
578 587
579 588 def rcfiles(path):
580 589 rcs = [os.path.join(path, 'hgrc')]
581 590 rcdir = os.path.join(path, 'hgrc.d')
582 591 try:
583 592 rcs.extend([os.path.join(rcdir, f) for f in os.listdir(rcdir)
584 593 if f.endswith(".rc")])
585 594 except OSError, inst: pass
586 595 return rcs
587 596
588 597 def os_rcpath():
589 598 '''return default os-specific hgrc search path'''
590 599 path = []
591 600 if len(sys.argv) > 0:
592 601 path.extend(rcfiles(os.path.dirname(sys.argv[0]) +
593 602 '/../etc/mercurial'))
594 603 path.extend(rcfiles('/etc/mercurial'))
595 604 path.append(os.path.expanduser('~/.hgrc'))
596 605 path = [os.path.normpath(f) for f in path]
597 606 return path
598 607
599 608 def parse_patch_output(output_line):
600 609 """parses the output produced by patch and returns the file name"""
601 610 pf = output_line[14:]
602 611 if pf.startswith("'") and pf.endswith("'") and pf.find(" ") >= 0:
603 612 pf = pf[1:-1] # Remove the quotes
604 613 return pf
605 614
606 615 def is_exec(f, last):
607 616 """check whether a file is executable"""
608 617 return (os.stat(f).st_mode & 0100 != 0)
609 618
610 619 def set_exec(f, mode):
611 620 s = os.stat(f).st_mode
612 621 if (s & 0100 != 0) == mode:
613 622 return
614 623 if mode:
615 624 # Turn on +x for every +r bit when making a file executable
616 625 # and obey umask.
617 626 umask = os.umask(0)
618 627 os.umask(umask)
619 628 os.chmod(f, s | (s & 0444) >> 2 & ~umask)
620 629 else:
621 630 os.chmod(f, s & 0666)
622 631
623 632 def set_binary(fd):
624 633 pass
625 634
626 635 def pconvert(path):
627 636 return path
628 637
629 638 def localpath(path):
630 639 return path
631 640
632 641 normpath = os.path.normpath
633 642 samestat = os.path.samestat
634 643
635 644 def makelock(info, pathname):
636 645 try:
637 646 os.symlink(info, pathname)
638 647 except OSError, why:
639 648 if why.errno == errno.EEXIST:
640 649 raise
641 650 else:
642 651 _makelock_file(info, pathname)
643 652
644 653 def readlock(pathname):
645 654 try:
646 655 return os.readlink(pathname)
647 656 except OSError, why:
648 657 if why.errno == errno.EINVAL:
649 658 return _readlock_file(pathname)
650 659 else:
651 660 raise
652 661
653 662 def testpid(pid):
654 663 '''return False if pid dead, True if running or not sure'''
655 664 try:
656 665 os.kill(pid, 0)
657 666 return True
658 667 except OSError, inst:
659 668 return inst.errno != errno.ESRCH
660 669
661 670 def explain_exit(code):
662 671 """return a 2-tuple (desc, code) describing a process's status"""
663 672 if os.WIFEXITED(code):
664 673 val = os.WEXITSTATUS(code)
665 674 return _("exited with status %d") % val, val
666 675 elif os.WIFSIGNALED(code):
667 676 val = os.WTERMSIG(code)
668 677 return _("killed by signal %d") % val, val
669 678 elif os.WIFSTOPPED(code):
670 679 val = os.WSTOPSIG(code)
671 680 return _("stopped by signal %d") % val, val
672 681 raise ValueError(_("invalid exit code"))
673 682
674 683 def opener(base, audit=True):
675 684 """
676 685 return a function that opens files relative to base
677 686
678 687 this function is used to hide the details of COW semantics and
679 688 remote file access from higher level code.
680 689 """
681 690 p = base
682 691 audit_p = audit
683 692
684 693 def mktempcopy(name):
685 694 d, fn = os.path.split(name)
686 695 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
687 696 os.close(fd)
688 697 ofp = posixfile(temp, "wb")
689 698 try:
690 699 try:
691 700 ifp = posixfile(name, "rb")
692 701 except IOError, inst:
693 702 if not getattr(inst, 'filename', None):
694 703 inst.filename = name
695 704 raise
696 705 for chunk in filechunkiter(ifp):
697 706 ofp.write(chunk)
698 707 ifp.close()
699 708 ofp.close()
700 709 except:
701 710 try: os.unlink(temp)
702 711 except: pass
703 712 raise
704 713 st = os.lstat(name)
705 714 os.chmod(temp, st.st_mode)
706 715 return temp
707 716
708 717 class atomictempfile(posixfile):
709 718 """the file will only be copied when rename is called"""
710 719 def __init__(self, name, mode):
711 720 self.__name = name
712 721 self.temp = mktempcopy(name)
713 722 posixfile.__init__(self, self.temp, mode)
714 723 def rename(self):
715 724 if not self.closed:
716 725 posixfile.close(self)
717 726 rename(self.temp, self.__name)
718 727 def __del__(self):
719 728 if not self.closed:
720 729 try:
721 730 os.unlink(self.temp)
722 731 except: pass
723 732 posixfile.close(self)
724 733
725 734 class atomicfile(atomictempfile):
726 735 """the file will only be copied on close"""
727 736 def __init__(self, name, mode):
728 737 atomictempfile.__init__(self, name, mode)
729 738 def close(self):
730 739 self.rename()
731 740 def __del__(self):
732 741 self.rename()
733 742
734 743 def o(path, mode="r", text=False, atomic=False, atomictemp=False):
735 744 if audit_p:
736 745 audit_path(path)
737 746 f = os.path.join(p, path)
738 747
739 748 if not text:
740 749 mode += "b" # for that other OS
741 750
742 751 if mode[0] != "r":
743 752 try:
744 753 nlink = nlinks(f)
745 754 except OSError:
746 755 d = os.path.dirname(f)
747 756 if not os.path.isdir(d):
748 757 os.makedirs(d)
749 758 else:
750 759 if atomic:
751 760 return atomicfile(f, mode)
752 761 elif atomictemp:
753 762 return atomictempfile(f, mode)
754 763 if nlink > 1:
755 764 rename(mktempcopy(f), f)
756 765 return posixfile(f, mode)
757 766
758 767 return o
759 768
760 769 class chunkbuffer(object):
761 770 """Allow arbitrary sized chunks of data to be efficiently read from an
762 771 iterator over chunks of arbitrary size."""
763 772
764 773 def __init__(self, in_iter, targetsize = 2**16):
765 774 """in_iter is the iterator that's iterating over the input chunks.
766 775 targetsize is how big a buffer to try to maintain."""
767 776 self.in_iter = iter(in_iter)
768 777 self.buf = ''
769 778 self.targetsize = int(targetsize)
770 779 if self.targetsize <= 0:
771 780 raise ValueError(_("targetsize must be greater than 0, was %d") %
772 781 targetsize)
773 782 self.iterempty = False
774 783
775 784 def fillbuf(self):
776 785 """Ignore target size; read every chunk from iterator until empty."""
777 786 if not self.iterempty:
778 787 collector = cStringIO.StringIO()
779 788 collector.write(self.buf)
780 789 for ch in self.in_iter:
781 790 collector.write(ch)
782 791 self.buf = collector.getvalue()
783 792 self.iterempty = True
784 793
785 794 def read(self, l):
786 795 """Read L bytes of data from the iterator of chunks of data.
787 796 Returns less than L bytes if the iterator runs dry."""
788 797 if l > len(self.buf) and not self.iterempty:
789 798 # Clamp to a multiple of self.targetsize
790 799 targetsize = self.targetsize * ((l // self.targetsize) + 1)
791 800 collector = cStringIO.StringIO()
792 801 collector.write(self.buf)
793 802 collected = len(self.buf)
794 803 for chunk in self.in_iter:
795 804 collector.write(chunk)
796 805 collected += len(chunk)
797 806 if collected >= targetsize:
798 807 break
799 808 if collected < targetsize:
800 809 self.iterempty = True
801 810 self.buf = collector.getvalue()
802 811 s, self.buf = self.buf[:l], buffer(self.buf, l)
803 812 return s
804 813
805 814 def filechunkiter(f, size = 65536):
806 815 """Create a generator that produces all the data in the file size
807 816 (default 65536) bytes at a time. Chunks may be less than size
808 817 bytes if the chunk is the last chunk in the file, or the file is a
809 818 socket or some other type of file that sometimes reads less data
810 819 than is requested."""
811 820 s = f.read(size)
812 821 while len(s) > 0:
813 822 yield s
814 823 s = f.read(size)
815 824
816 825 def makedate():
817 826 lt = time.localtime()
818 827 if lt[8] == 1 and time.daylight:
819 828 tz = time.altzone
820 829 else:
821 830 tz = time.timezone
822 831 return time.mktime(lt), tz
823 832
824 833 def datestr(date=None, format='%a %b %d %H:%M:%S %Y', timezone=True):
825 834 """represent a (unixtime, offset) tuple as a localized time.
826 835 unixtime is seconds since the epoch, and offset is the time zone's
827 836 number of seconds away from UTC. if timezone is false, do not
828 837 append time zone to string."""
829 838 t, tz = date or makedate()
830 839 s = time.strftime(format, time.gmtime(float(t) - tz))
831 840 if timezone:
832 841 s += " %+03d%02d" % (-tz / 3600, ((-tz % 3600) / 60))
833 842 return s
834 843
835 844 def shortuser(user):
836 845 """Return a short representation of a user name or email address."""
837 846 f = user.find('@')
838 847 if f >= 0:
839 848 user = user[:f]
840 849 f = user.find('<')
841 850 if f >= 0:
842 851 user = user[f+1:]
843 852 return user
844 853
845 854 def walkrepos(path):
846 855 '''yield every hg repository under path, recursively.'''
847 856 def errhandler(err):
848 857 if err.filename == path:
849 858 raise err
850 859
851 860 for root, dirs, files in os.walk(path, onerror=errhandler):
852 861 for d in dirs:
853 862 if d == '.hg':
854 863 yield root
855 864 dirs[:] = []
856 865 break
857 866
858 867 _rcpath = None
859 868
860 869 def rcpath():
861 870 '''return hgrc search path. if env var HGRCPATH is set, use it.
862 871 for each item in path, if directory, use files ending in .rc,
863 872 else use item.
864 873 make HGRCPATH empty to only look in .hg/hgrc of current repo.
865 874 if no HGRCPATH, use default os-specific path.'''
866 875 global _rcpath
867 876 if _rcpath is None:
868 877 if 'HGRCPATH' in os.environ:
869 878 _rcpath = []
870 879 for p in os.environ['HGRCPATH'].split(os.pathsep):
871 880 if not p: continue
872 881 if os.path.isdir(p):
873 882 for f in os.listdir(p):
874 883 if f.endswith('.rc'):
875 884 _rcpath.append(os.path.join(p, f))
876 885 else:
877 886 _rcpath.append(p)
878 887 else:
879 888 _rcpath = os_rcpath()
880 889 return _rcpath
@@ -1,284 +1,284 b''
1 1 # util_win32.py - utility functions that use win32 API
2 2 #
3 3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of
7 7 # the GNU General Public License, incorporated herein by reference.
8 8
9 9 # Mark Hammond's win32all package allows better functionality on
10 10 # Windows. this module overrides definitions in util.py. if not
11 11 # available, import of this module will fail, and generic code will be
12 12 # used.
13 13
14 14 import win32api
15 15
16 16 from demandload import *
17 17 from i18n import gettext as _
18 18 demandload(globals(), 'errno os pywintypes win32con win32file win32process')
19 19 demandload(globals(), 'cStringIO winerror')
20 20
21 21 class WinError:
22 22 winerror_map = {
23 23 winerror.ERROR_ACCESS_DENIED: errno.EACCES,
24 24 winerror.ERROR_ACCOUNT_DISABLED: errno.EACCES,
25 25 winerror.ERROR_ACCOUNT_RESTRICTION: errno.EACCES,
26 26 winerror.ERROR_ALREADY_ASSIGNED: errno.EBUSY,
27 27 winerror.ERROR_ALREADY_EXISTS: errno.EEXIST,
28 28 winerror.ERROR_ARITHMETIC_OVERFLOW: errno.ERANGE,
29 29 winerror.ERROR_BAD_COMMAND: errno.EIO,
30 30 winerror.ERROR_BAD_DEVICE: errno.ENODEV,
31 31 winerror.ERROR_BAD_DRIVER_LEVEL: errno.ENXIO,
32 32 winerror.ERROR_BAD_EXE_FORMAT: errno.ENOEXEC,
33 33 winerror.ERROR_BAD_FORMAT: errno.ENOEXEC,
34 34 winerror.ERROR_BAD_LENGTH: errno.EINVAL,
35 35 winerror.ERROR_BAD_PATHNAME: errno.ENOENT,
36 36 winerror.ERROR_BAD_PIPE: errno.EPIPE,
37 37 winerror.ERROR_BAD_UNIT: errno.ENODEV,
38 38 winerror.ERROR_BAD_USERNAME: errno.EINVAL,
39 39 winerror.ERROR_BROKEN_PIPE: errno.EPIPE,
40 40 winerror.ERROR_BUFFER_OVERFLOW: errno.ENAMETOOLONG,
41 41 winerror.ERROR_BUSY: errno.EBUSY,
42 42 winerror.ERROR_BUSY_DRIVE: errno.EBUSY,
43 43 winerror.ERROR_CALL_NOT_IMPLEMENTED: errno.ENOSYS,
44 44 winerror.ERROR_CANNOT_MAKE: errno.EACCES,
45 45 winerror.ERROR_CANTOPEN: errno.EIO,
46 46 winerror.ERROR_CANTREAD: errno.EIO,
47 47 winerror.ERROR_CANTWRITE: errno.EIO,
48 48 winerror.ERROR_CRC: errno.EIO,
49 49 winerror.ERROR_CURRENT_DIRECTORY: errno.EACCES,
50 50 winerror.ERROR_DEVICE_IN_USE: errno.EBUSY,
51 51 winerror.ERROR_DEV_NOT_EXIST: errno.ENODEV,
52 52 winerror.ERROR_DIRECTORY: errno.EINVAL,
53 53 winerror.ERROR_DIR_NOT_EMPTY: errno.ENOTEMPTY,
54 54 winerror.ERROR_DISK_CHANGE: errno.EIO,
55 55 winerror.ERROR_DISK_FULL: errno.ENOSPC,
56 56 winerror.ERROR_DRIVE_LOCKED: errno.EBUSY,
57 57 winerror.ERROR_ENVVAR_NOT_FOUND: errno.EINVAL,
58 58 winerror.ERROR_EXE_MARKED_INVALID: errno.ENOEXEC,
59 59 winerror.ERROR_FILENAME_EXCED_RANGE: errno.ENAMETOOLONG,
60 60 winerror.ERROR_FILE_EXISTS: errno.EEXIST,
61 61 winerror.ERROR_FILE_INVALID: errno.ENODEV,
62 62 winerror.ERROR_FILE_NOT_FOUND: errno.ENOENT,
63 63 winerror.ERROR_GEN_FAILURE: errno.EIO,
64 64 winerror.ERROR_HANDLE_DISK_FULL: errno.ENOSPC,
65 65 winerror.ERROR_INSUFFICIENT_BUFFER: errno.ENOMEM,
66 66 winerror.ERROR_INVALID_ACCESS: errno.EACCES,
67 67 winerror.ERROR_INVALID_ADDRESS: errno.EFAULT,
68 68 winerror.ERROR_INVALID_BLOCK: errno.EFAULT,
69 69 winerror.ERROR_INVALID_DATA: errno.EINVAL,
70 70 winerror.ERROR_INVALID_DRIVE: errno.ENODEV,
71 71 winerror.ERROR_INVALID_EXE_SIGNATURE: errno.ENOEXEC,
72 72 winerror.ERROR_INVALID_FLAGS: errno.EINVAL,
73 73 winerror.ERROR_INVALID_FUNCTION: errno.ENOSYS,
74 74 winerror.ERROR_INVALID_HANDLE: errno.EBADF,
75 75 winerror.ERROR_INVALID_LOGON_HOURS: errno.EACCES,
76 76 winerror.ERROR_INVALID_NAME: errno.EINVAL,
77 77 winerror.ERROR_INVALID_OWNER: errno.EINVAL,
78 78 winerror.ERROR_INVALID_PARAMETER: errno.EINVAL,
79 79 winerror.ERROR_INVALID_PASSWORD: errno.EPERM,
80 80 winerror.ERROR_INVALID_PRIMARY_GROUP: errno.EINVAL,
81 81 winerror.ERROR_INVALID_SIGNAL_NUMBER: errno.EINVAL,
82 82 winerror.ERROR_INVALID_TARGET_HANDLE: errno.EIO,
83 83 winerror.ERROR_INVALID_WORKSTATION: errno.EACCES,
84 84 winerror.ERROR_IO_DEVICE: errno.EIO,
85 85 winerror.ERROR_IO_INCOMPLETE: errno.EINTR,
86 86 winerror.ERROR_LOCKED: errno.EBUSY,
87 87 winerror.ERROR_LOCK_VIOLATION: errno.EACCES,
88 88 winerror.ERROR_LOGON_FAILURE: errno.EACCES,
89 89 winerror.ERROR_MAPPED_ALIGNMENT: errno.EINVAL,
90 90 winerror.ERROR_META_EXPANSION_TOO_LONG: errno.E2BIG,
91 91 winerror.ERROR_MORE_DATA: errno.EPIPE,
92 92 winerror.ERROR_NEGATIVE_SEEK: errno.ESPIPE,
93 93 winerror.ERROR_NOACCESS: errno.EFAULT,
94 94 winerror.ERROR_NONE_MAPPED: errno.EINVAL,
95 95 winerror.ERROR_NOT_ENOUGH_MEMORY: errno.ENOMEM,
96 96 winerror.ERROR_NOT_READY: errno.EAGAIN,
97 97 winerror.ERROR_NOT_SAME_DEVICE: errno.EXDEV,
98 98 winerror.ERROR_NO_DATA: errno.EPIPE,
99 99 winerror.ERROR_NO_MORE_SEARCH_HANDLES: errno.EIO,
100 100 winerror.ERROR_NO_PROC_SLOTS: errno.EAGAIN,
101 101 winerror.ERROR_NO_SUCH_PRIVILEGE: errno.EACCES,
102 102 winerror.ERROR_OPEN_FAILED: errno.EIO,
103 103 winerror.ERROR_OPEN_FILES: errno.EBUSY,
104 104 winerror.ERROR_OPERATION_ABORTED: errno.EINTR,
105 105 winerror.ERROR_OUTOFMEMORY: errno.ENOMEM,
106 106 winerror.ERROR_PASSWORD_EXPIRED: errno.EACCES,
107 107 winerror.ERROR_PATH_BUSY: errno.EBUSY,
108 108 winerror.ERROR_PATH_NOT_FOUND: errno.ENOENT,
109 109 winerror.ERROR_PIPE_BUSY: errno.EBUSY,
110 110 winerror.ERROR_PIPE_CONNECTED: errno.EPIPE,
111 111 winerror.ERROR_PIPE_LISTENING: errno.EPIPE,
112 112 winerror.ERROR_PIPE_NOT_CONNECTED: errno.EPIPE,
113 113 winerror.ERROR_PRIVILEGE_NOT_HELD: errno.EACCES,
114 114 winerror.ERROR_READ_FAULT: errno.EIO,
115 115 winerror.ERROR_SEEK: errno.EIO,
116 116 winerror.ERROR_SEEK_ON_DEVICE: errno.ESPIPE,
117 117 winerror.ERROR_SHARING_BUFFER_EXCEEDED: errno.ENFILE,
118 118 winerror.ERROR_SHARING_VIOLATION: errno.EACCES,
119 119 winerror.ERROR_STACK_OVERFLOW: errno.ENOMEM,
120 120 winerror.ERROR_SWAPERROR: errno.ENOENT,
121 121 winerror.ERROR_TOO_MANY_MODULES: errno.EMFILE,
122 122 winerror.ERROR_TOO_MANY_OPEN_FILES: errno.EMFILE,
123 123 winerror.ERROR_UNRECOGNIZED_MEDIA: errno.ENXIO,
124 124 winerror.ERROR_UNRECOGNIZED_VOLUME: errno.ENODEV,
125 125 winerror.ERROR_WAIT_NO_CHILDREN: errno.ECHILD,
126 126 winerror.ERROR_WRITE_FAULT: errno.EIO,
127 127 winerror.ERROR_WRITE_PROTECT: errno.EROFS,
128 128 }
129 129
130 130 def __init__(self, err):
131 131 self.win_errno, self.win_function, self.win_strerror = err
132 132 if self.win_strerror.endswith('.'):
133 133 self.win_strerror = self.win_strerror[:-1]
134 134
135 135 class WinIOError(WinError, IOError):
136 136 def __init__(self, err, filename=None):
137 137 WinError.__init__(self, err)
138 138 IOError.__init__(self, self.winerror_map.get(self.win_errno, 0),
139 139 self.win_strerror)
140 140 self.filename = filename
141 141
142 142 class WinOSError(WinError, OSError):
143 143 def __init__(self, err):
144 144 WinError.__init__(self, err)
145 145 OSError.__init__(self, self.winerror_map.get(self.win_errno, 0),
146 146 self.win_strerror)
147 147
148 148 def os_link(src, dst):
149 149 # NB will only succeed on NTFS
150 150 try:
151 151 win32file.CreateHardLink(dst, src)
152 152 except pywintypes.error, details:
153 153 raise WinOSError(details)
154 154
155 155 def nlinks(pathname):
156 156 """Return number of hardlinks for the given file."""
157 157 try:
158 158 fh = win32file.CreateFile(pathname,
159 159 win32file.GENERIC_READ, win32file.FILE_SHARE_READ,
160 160 None, win32file.OPEN_EXISTING, 0, None)
161 161 res = win32file.GetFileInformationByHandle(fh)
162 162 fh.Close()
163 163 return res[7]
164 164 except pywintypes.error:
165 165 return os.stat(pathname).st_nlink
166 166
167 167 def testpid(pid):
168 168 '''return True if pid is still running or unable to
169 169 determine, False otherwise'''
170 170 try:
171 171 handle = win32api.OpenProcess(
172 172 win32con.PROCESS_QUERY_INFORMATION, False, pid)
173 173 if handle:
174 174 status = win32process.GetExitCodeProcess(handle)
175 175 return status == win32con.STILL_ACTIVE
176 176 except pywintypes.error, details:
177 177 return details[0] != winerror.ERROR_INVALID_PARAMETER
178 178 return True
179 179
180 180 def system_rcpath_win32():
181 181 '''return default os-specific hgrc search path'''
182 182 proc = win32api.GetCurrentProcess()
183 183 filename = win32process.GetModuleFileNameEx(proc, 0)
184 184 return [os.path.join(os.path.dirname(filename), 'mercurial.ini')]
185 185
186 class posixfile(object):
186 class posixfile_nt(object):
187 187 '''file object with posix-like semantics. on windows, normal
188 188 files can not be deleted or renamed if they are open. must open
189 189 with win32file.FILE_SHARE_DELETE. this flag does not exist on
190 windows <= nt.'''
190 windows < nt, so do not use this class there.'''
191 191
192 192 # tried to use win32file._open_osfhandle to pass fd to os.fdopen,
193 193 # but does not work at all. wrap win32 file api instead.
194 194
195 195 def __init__(self, name, mode='rb'):
196 196 access = 0
197 197 if 'r' in mode or '+' in mode:
198 198 access |= win32file.GENERIC_READ
199 199 if 'w' in mode or 'a' in mode:
200 200 access |= win32file.GENERIC_WRITE
201 201 if 'r' in mode:
202 202 creation = win32file.OPEN_EXISTING
203 203 elif 'a' in mode:
204 204 creation = win32file.OPEN_ALWAYS
205 205 else:
206 206 creation = win32file.CREATE_ALWAYS
207 207 try:
208 208 self.handle = win32file.CreateFile(name,
209 209 access,
210 210 win32file.FILE_SHARE_READ |
211 211 win32file.FILE_SHARE_WRITE |
212 212 win32file.FILE_SHARE_DELETE,
213 213 None,
214 214 creation,
215 215 win32file.FILE_ATTRIBUTE_NORMAL,
216 216 0)
217 217 except pywintypes.error, err:
218 218 raise WinIOError(err, name)
219 219 self.closed = False
220 220 self.name = name
221 221 self.mode = mode
222 222
223 223 def __iter__(self):
224 224 for line in self.read().splitlines(True):
225 225 yield line
226 226
227 227 def read(self, count=-1):
228 228 try:
229 229 cs = cStringIO.StringIO()
230 230 while count:
231 231 wincount = int(count)
232 232 if wincount == -1:
233 233 wincount = 1048576
234 234 val, data = win32file.ReadFile(self.handle, wincount)
235 235 if not data: break
236 236 cs.write(data)
237 237 if count != -1:
238 238 count -= len(data)
239 239 return cs.getvalue()
240 240 except pywintypes.error, err:
241 241 raise WinIOError(err)
242 242
243 243 def write(self, data):
244 244 try:
245 245 if 'a' in self.mode:
246 246 win32file.SetFilePointer(self.handle, 0, win32file.FILE_END)
247 247 nwrit = 0
248 248 while nwrit < len(data):
249 249 val, nwrit = win32file.WriteFile(self.handle, data)
250 250 data = data[nwrit:]
251 251 except pywintypes.error, err:
252 252 raise WinIOError(err)
253 253
254 254 def seek(self, pos, whence=0):
255 255 try:
256 256 win32file.SetFilePointer(self.handle, int(pos), whence)
257 257 except pywintypes.error, err:
258 258 raise WinIOError(err)
259 259
260 260 def tell(self):
261 261 try:
262 262 return win32file.SetFilePointer(self.handle, 0,
263 263 win32file.FILE_CURRENT)
264 264 except pywintypes.error, err:
265 265 raise WinIOError(err)
266 266
267 267 def close(self):
268 268 if not self.closed:
269 269 self.handle = None
270 270 self.closed = True
271 271
272 272 def flush(self):
273 273 try:
274 274 win32file.FlushFileBuffers(self.handle)
275 275 except pywintypes.error, err:
276 276 raise WinIOError(err)
277 277
278 278 def truncate(self, pos=0):
279 279 try:
280 280 win32file.SetFilePointer(self.handle, int(pos),
281 281 win32file.FILE_BEGIN)
282 282 win32file.SetEndOfFile(self.handle)
283 283 except pywintypes.error, err:
284 284 raise WinIOError(err)
General Comments 0
You need to be logged in to leave comments. Login now