##// END OF EJS Templates
Create an atomic opener that does not automatically rename on close...
mason@suse.com -
r2076:d007df6d default
parent child Browse files
Show More
@@ -1,1064 +1,1066 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25
26 26 def flagstr(flag):
27 27 if flag == "inline":
28 28 return REVLOGNGINLINEDATA
29 29 raise RevlogError(_("unknown revlog flag %s" % flag))
30 30
31 31 def hash(text, p1, p2):
32 32 """generate a hash from the given text and its parent hashes
33 33
34 34 This hash combines both the current file contents and its history
35 35 in a manner that makes it easy to distinguish nodes with the same
36 36 content in the revision graph.
37 37 """
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 def compress(text):
46 46 """ generate a possibly-compressed representation of text """
47 47 if not text: return ("", text)
48 48 if len(text) < 44:
49 49 if text[0] == '\0': return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 return ("", bin)
56 56
57 57 def decompress(bin):
58 58 """ decompress the given input """
59 59 if not bin: return bin
60 60 t = bin[0]
61 61 if t == '\0': return bin
62 62 if t == 'x': return zlib.decompress(bin)
63 63 if t == 'u': return bin[1:]
64 64 raise RevlogError(_("unknown compression type %r") % t)
65 65
66 66 indexformatv0 = ">4l20s20s20s"
67 67 # index ng:
68 68 # 6 bytes offset
69 69 # 2 bytes flags
70 70 # 4 bytes compressed length
71 71 # 4 bytes uncompressed length
72 72 # 4 bytes: base rev
73 73 # 4 bytes link rev
74 74 # 4 bytes parent 1 rev
75 75 # 4 bytes parent 2 rev
76 76 # 32 bytes: nodeid
77 77 indexformatng = ">Qiiiiii20s12x"
78 78 versionformat = ">i"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83
84 84 By default we parse and load 1000 entries at a time.
85 85
86 86 If no position is specified, we load the whole index, and replace
87 87 the lazy objects in revlog with the underlying objects for
88 88 efficiency in cases where we look at most of the nodes.
89 89 """
90 90 def __init__(self, data, revlog, indexformat):
91 91 self.data = data
92 92 self.s = struct.calcsize(indexformat)
93 93 self.indexformat = indexformat
94 94 self.l = len(data)/self.s
95 95 self.index = [None] * self.l
96 96 self.map = {nullid: -1}
97 97 self.all = 0
98 98 self.revlog = revlog
99 99
100 100 def load(self, pos=None):
101 101 if self.all: return
102 102 if pos is not None:
103 103 block = pos / 1000
104 104 i = block * 1000
105 105 end = min(self.l, i + 1000)
106 106 else:
107 107 self.all = 1
108 108 i = 0
109 109 end = self.l
110 110 self.revlog.index = self.index
111 111 self.revlog.nodemap = self.map
112 112
113 113 while i < end:
114 114 if not self.index[i]:
115 115 d = self.data[i * self.s: (i + 1) * self.s]
116 116 e = struct.unpack(self.indexformat, d)
117 117 self.index[i] = e
118 118 self.map[e[-1]] = i
119 119 i += 1
120 120
121 121 class lazyindex(object):
122 122 """a lazy version of the index array"""
123 123 def __init__(self, parser):
124 124 self.p = parser
125 125 def __len__(self):
126 126 return len(self.p.index)
127 127 def load(self, pos):
128 128 if pos < 0:
129 129 pos += len(self.p.index)
130 130 self.p.load(pos)
131 131 return self.p.index[pos]
132 132 def __getitem__(self, pos):
133 133 return self.p.index[pos] or self.load(pos)
134 134 def __setitem__(self, pos, item):
135 135 self.p.index[pos] = item
136 136 def __delitem__(self, pos):
137 137 del self.p.index[pos]
138 138 def append(self, e):
139 139 self.p.index.append(e)
140 140
141 141 class lazymap(object):
142 142 """a lazy version of the node map"""
143 143 def __init__(self, parser):
144 144 self.p = parser
145 145 def load(self, key):
146 146 if self.p.all: return
147 147 n = self.p.data.find(key)
148 148 if n < 0:
149 149 raise KeyError(key)
150 150 pos = n / self.p.s
151 151 self.p.load(pos)
152 152 def __contains__(self, key):
153 153 self.p.load()
154 154 return key in self.p.map
155 155 def __iter__(self):
156 156 yield nullid
157 157 for i in xrange(self.p.l):
158 158 try:
159 159 yield self.p.index[i][-1]
160 160 except:
161 161 self.p.load(i)
162 162 yield self.p.index[i][-1]
163 163 def __getitem__(self, key):
164 164 try:
165 165 return self.p.map[key]
166 166 except KeyError:
167 167 try:
168 168 self.load(key)
169 169 return self.p.map[key]
170 170 except KeyError:
171 171 raise KeyError("node " + hex(key))
172 172 def __setitem__(self, key, val):
173 173 self.p.map[key] = val
174 174 def __delitem__(self, key):
175 175 del self.p.map[key]
176 176
177 177 class RevlogError(Exception): pass
178 178
179 179 class revlog(object):
180 180 """
181 181 the underlying revision storage object
182 182
183 183 A revlog consists of two parts, an index and the revision data.
184 184
185 185 The index is a file with a fixed record size containing
186 186 information on each revision, includings its nodeid (hash), the
187 187 nodeids of its parents, the position and offset of its data within
188 188 the data file, and the revision it's based on. Finally, each entry
189 189 contains a linkrev entry that can serve as a pointer to external
190 190 data.
191 191
192 192 The revision data itself is a linear collection of data chunks.
193 193 Each chunk represents a revision and is usually represented as a
194 194 delta against the previous chunk. To bound lookup time, runs of
195 195 deltas are limited to about 2 times the length of the original
196 196 version data. This makes retrieval of a version proportional to
197 197 its size, or O(1) relative to the number of revisions.
198 198
199 199 Both pieces of the revlog are written to in an append-only
200 200 fashion, which means we never need to rewrite a file to insert or
201 201 remove data, and can use some simple techniques to avoid the need
202 202 for locking while reading.
203 203 """
204 204 def __init__(self, opener, indexfile, datafile, defversion=0):
205 205 """
206 206 create a revlog object
207 207
208 208 opener is a function that abstracts the file opening operation
209 209 and can be used to implement COW semantics or the like.
210 210 """
211 211 self.indexfile = indexfile
212 212 self.datafile = datafile
213 213 self.opener = opener
214 214
215 215 self.indexstat = None
216 216 self.cache = None
217 217 self.chunkcache = None
218 218 self.defversion = defversion
219 219 self.load()
220 220
221 221 def load(self):
222 222 v = self.defversion
223 223 try:
224 224 f = self.opener(self.indexfile)
225 225 i = f.read()
226 226 except IOError, inst:
227 227 if inst.errno != errno.ENOENT:
228 228 raise
229 229 i = ""
230 230 else:
231 231 try:
232 232 st = os.fstat(f.fileno())
233 233 except AttributeError, inst:
234 234 st = None
235 235 else:
236 236 oldst = self.indexstat
237 237 if (oldst and st.st_dev == oldst.st_dev
238 238 and st.st_ino == oldst.st_ino
239 239 and st.st_mtime == oldst.st_mtime
240 240 and st.st_ctime == oldst.st_ctime):
241 241 return
242 242 self.indexstat = st
243 243 if len(i) > 0:
244 244 v = struct.unpack(versionformat, i[:4])[0]
245 245 flags = v & ~0xFFFF
246 246 fmt = v & 0xFFFF
247 247 if fmt == 0:
248 248 if flags:
249 249 raise RevlogError(_("index %s invalid flags %x for format v0" %
250 250 (self.indexfile, flags)))
251 251 elif fmt == REVLOGNG:
252 252 if flags & ~REVLOGNGINLINEDATA:
253 253 raise RevlogError(_("index %s invalid flags %x for revlogng" %
254 254 (self.indexfile, flags)))
255 255 else:
256 256 raise RevlogError(_("index %s invalid format %d" %
257 257 (self.indexfile, fmt)))
258 258 self.version = v
259 259 if v == 0:
260 260 self.indexformat = indexformatv0
261 261 else:
262 262 self.indexformat = indexformatng
263 263
264 264 if i:
265 265 if not self.inlinedata() and st and st.st_size > 10000:
266 266 # big index, let's parse it on demand
267 267 parser = lazyparser(i, self, self.indexformat)
268 268 self.index = lazyindex(parser)
269 269 self.nodemap = lazymap(parser)
270 270 else:
271 271 self.parseindex(i)
272 272 if self.inlinedata():
273 273 # we've already got the entire data file read in, save it
274 274 # in the chunk data
275 275 self.chunkcache = (0, i)
276 276 if self.version != 0:
277 277 e = list(self.index[0])
278 278 type = self.ngtype(e[0])
279 279 e[0] = self.offset_type(0, type)
280 280 self.index[0] = e
281 281 else:
282 282 self.nodemap = { nullid: -1}
283 283 self.index = []
284 284
285 285
286 286 def parseindex(self, data):
287 287 s = struct.calcsize(self.indexformat)
288 288 l = len(data)
289 289 self.index = []
290 290 self.nodemap = {nullid: -1}
291 291 inline = self.inlinedata()
292 292 off = 0
293 293 n = 0
294 294 while off < l:
295 295 e = struct.unpack(self.indexformat, data[off:off + s])
296 296 self.index.append(e)
297 297 self.nodemap[e[-1]] = n
298 298 n += 1
299 299 off += s
300 300 if inline:
301 301 off += e[1]
302 302
303 303 def ngoffset(self, q):
304 304 if q & 0xFFFF:
305 305 raise RevlogError(_('%s: incompatible revision flag %x') %
306 306 (self.indexfile, type))
307 307 return long(q >> 16)
308 308
309 309 def ngtype(self, q):
310 310 return int(q & 0xFFFF)
311 311
312 312 def offset_type(self, offset, type):
313 313 return long(long(offset) << 16 | type)
314 314
315 315 def loadindexmap(self):
316 316 """loads both the map and the index from the lazy parser"""
317 317 if isinstance(self.index, lazyindex):
318 318 p = self.index.p
319 319 p.load()
320 320
321 321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
322 322 def tip(self): return self.node(len(self.index) - 1)
323 323 def count(self): return len(self.index)
324 324 def node(self, rev):
325 325 return (rev < 0) and nullid or self.index[rev][-1]
326 326 def rev(self, node):
327 327 try:
328 328 return self.nodemap[node]
329 329 except KeyError:
330 330 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
331 331 def linkrev(self, node): return self.index[self.rev(node)][-4]
332 332 def parents(self, node):
333 333 if node == nullid: return (nullid, nullid)
334 334 r = self.rev(node)
335 335 d = self.index[r][-3:-1]
336 336 if self.version == 0:
337 337 return d
338 338 return [ self.node(x) for x in d ]
339 339 def start(self, rev):
340 340 if rev < 0:
341 341 return -1
342 342 if self.version != 0:
343 343 return self.ngoffset(self.index[rev][0])
344 344 return self.index[rev][0]
345 345 def end(self, rev): return self.start(rev) + self.length(rev)
346 346
347 347 def length(self, rev):
348 348 if rev < 0:
349 349 return 0
350 350 else:
351 351 return self.index[rev][1]
352 352 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
353 353
354 354 def reachable(self, rev, stop=None):
355 355 reachable = {}
356 356 visit = [rev]
357 357 reachable[rev] = 1
358 358 if stop:
359 359 stopn = self.rev(stop)
360 360 else:
361 361 stopn = 0
362 362 while visit:
363 363 n = visit.pop(0)
364 364 if n == stop:
365 365 continue
366 366 if n == nullid:
367 367 continue
368 368 for p in self.parents(n):
369 369 if self.rev(p) < stopn:
370 370 continue
371 371 if p not in reachable:
372 372 reachable[p] = 1
373 373 visit.append(p)
374 374 return reachable
375 375
376 376 def nodesbetween(self, roots=None, heads=None):
377 377 """Return a tuple containing three elements. Elements 1 and 2 contain
378 378 a final list bases and heads after all the unreachable ones have been
379 379 pruned. Element 0 contains a topologically sorted list of all
380 380
381 381 nodes that satisfy these constraints:
382 382 1. All nodes must be descended from a node in roots (the nodes on
383 383 roots are considered descended from themselves).
384 384 2. All nodes must also be ancestors of a node in heads (the nodes in
385 385 heads are considered to be their own ancestors).
386 386
387 387 If roots is unspecified, nullid is assumed as the only root.
388 388 If heads is unspecified, it is taken to be the output of the
389 389 heads method (i.e. a list of all nodes in the repository that
390 390 have no children)."""
391 391 nonodes = ([], [], [])
392 392 if roots is not None:
393 393 roots = list(roots)
394 394 if not roots:
395 395 return nonodes
396 396 lowestrev = min([self.rev(n) for n in roots])
397 397 else:
398 398 roots = [nullid] # Everybody's a descendent of nullid
399 399 lowestrev = -1
400 400 if (lowestrev == -1) and (heads is None):
401 401 # We want _all_ the nodes!
402 402 return ([self.node(r) for r in xrange(0, self.count())],
403 403 [nullid], list(self.heads()))
404 404 if heads is None:
405 405 # All nodes are ancestors, so the latest ancestor is the last
406 406 # node.
407 407 highestrev = self.count() - 1
408 408 # Set ancestors to None to signal that every node is an ancestor.
409 409 ancestors = None
410 410 # Set heads to an empty dictionary for later discovery of heads
411 411 heads = {}
412 412 else:
413 413 heads = list(heads)
414 414 if not heads:
415 415 return nonodes
416 416 ancestors = {}
417 417 # Start at the top and keep marking parents until we're done.
418 418 nodestotag = heads[:]
419 419 # Turn heads into a dictionary so we can remove 'fake' heads.
420 420 # Also, later we will be using it to filter out the heads we can't
421 421 # find from roots.
422 422 heads = dict.fromkeys(heads, 0)
423 423 # Remember where the top was so we can use it as a limit later.
424 424 highestrev = max([self.rev(n) for n in nodestotag])
425 425 while nodestotag:
426 426 # grab a node to tag
427 427 n = nodestotag.pop()
428 428 # Never tag nullid
429 429 if n == nullid:
430 430 continue
431 431 # A node's revision number represents its place in a
432 432 # topologically sorted list of nodes.
433 433 r = self.rev(n)
434 434 if r >= lowestrev:
435 435 if n not in ancestors:
436 436 # If we are possibly a descendent of one of the roots
437 437 # and we haven't already been marked as an ancestor
438 438 ancestors[n] = 1 # Mark as ancestor
439 439 # Add non-nullid parents to list of nodes to tag.
440 440 nodestotag.extend([p for p in self.parents(n) if
441 441 p != nullid])
442 442 elif n in heads: # We've seen it before, is it a fake head?
443 443 # So it is, real heads should not be the ancestors of
444 444 # any other heads.
445 445 heads.pop(n)
446 446 if not ancestors:
447 447 return nonodes
448 448 # Now that we have our set of ancestors, we want to remove any
449 449 # roots that are not ancestors.
450 450
451 451 # If one of the roots was nullid, everything is included anyway.
452 452 if lowestrev > -1:
453 453 # But, since we weren't, let's recompute the lowest rev to not
454 454 # include roots that aren't ancestors.
455 455
456 456 # Filter out roots that aren't ancestors of heads
457 457 roots = [n for n in roots if n in ancestors]
458 458 # Recompute the lowest revision
459 459 if roots:
460 460 lowestrev = min([self.rev(n) for n in roots])
461 461 else:
462 462 # No more roots? Return empty list
463 463 return nonodes
464 464 else:
465 465 # We are descending from nullid, and don't need to care about
466 466 # any other roots.
467 467 lowestrev = -1
468 468 roots = [nullid]
469 469 # Transform our roots list into a 'set' (i.e. a dictionary where the
470 470 # values don't matter.
471 471 descendents = dict.fromkeys(roots, 1)
472 472 # Also, keep the original roots so we can filter out roots that aren't
473 473 # 'real' roots (i.e. are descended from other roots).
474 474 roots = descendents.copy()
475 475 # Our topologically sorted list of output nodes.
476 476 orderedout = []
477 477 # Don't start at nullid since we don't want nullid in our output list,
478 478 # and if nullid shows up in descedents, empty parents will look like
479 479 # they're descendents.
480 480 for r in xrange(max(lowestrev, 0), highestrev + 1):
481 481 n = self.node(r)
482 482 isdescendent = False
483 483 if lowestrev == -1: # Everybody is a descendent of nullid
484 484 isdescendent = True
485 485 elif n in descendents:
486 486 # n is already a descendent
487 487 isdescendent = True
488 488 # This check only needs to be done here because all the roots
489 489 # will start being marked is descendents before the loop.
490 490 if n in roots:
491 491 # If n was a root, check if it's a 'real' root.
492 492 p = tuple(self.parents(n))
493 493 # If any of its parents are descendents, it's not a root.
494 494 if (p[0] in descendents) or (p[1] in descendents):
495 495 roots.pop(n)
496 496 else:
497 497 p = tuple(self.parents(n))
498 498 # A node is a descendent if either of its parents are
499 499 # descendents. (We seeded the dependents list with the roots
500 500 # up there, remember?)
501 501 if (p[0] in descendents) or (p[1] in descendents):
502 502 descendents[n] = 1
503 503 isdescendent = True
504 504 if isdescendent and ((ancestors is None) or (n in ancestors)):
505 505 # Only include nodes that are both descendents and ancestors.
506 506 orderedout.append(n)
507 507 if (ancestors is not None) and (n in heads):
508 508 # We're trying to figure out which heads are reachable
509 509 # from roots.
510 510 # Mark this head as having been reached
511 511 heads[n] = 1
512 512 elif ancestors is None:
513 513 # Otherwise, we're trying to discover the heads.
514 514 # Assume this is a head because if it isn't, the next step
515 515 # will eventually remove it.
516 516 heads[n] = 1
517 517 # But, obviously its parents aren't.
518 518 for p in self.parents(n):
519 519 heads.pop(p, None)
520 520 heads = [n for n in heads.iterkeys() if heads[n] != 0]
521 521 roots = roots.keys()
522 522 assert orderedout
523 523 assert roots
524 524 assert heads
525 525 return (orderedout, roots, heads)
526 526
527 527 def heads(self, start=None):
528 528 """return the list of all nodes that have no children
529 529
530 530 if start is specified, only heads that are descendants of
531 531 start will be returned
532 532
533 533 """
534 534 if start is None:
535 535 start = nullid
536 536 reachable = {start: 1}
537 537 heads = {start: 1}
538 538 startrev = self.rev(start)
539 539
540 540 for r in xrange(startrev + 1, self.count()):
541 541 n = self.node(r)
542 542 for pn in self.parents(n):
543 543 if pn in reachable:
544 544 reachable[n] = 1
545 545 heads[n] = 1
546 546 if pn in heads:
547 547 del heads[pn]
548 548 return heads.keys()
549 549
550 550 def children(self, node):
551 551 """find the children of a given node"""
552 552 c = []
553 553 p = self.rev(node)
554 554 for r in range(p + 1, self.count()):
555 555 n = self.node(r)
556 556 for pn in self.parents(n):
557 557 if pn == node:
558 558 c.append(n)
559 559 continue
560 560 elif pn == nullid:
561 561 continue
562 562 return c
563 563
564 564 def lookup(self, id):
565 565 """locate a node based on revision number or subset of hex nodeid"""
566 566 try:
567 567 rev = int(id)
568 568 if str(rev) != id: raise ValueError
569 569 if rev < 0: rev = self.count() + rev
570 570 if rev < 0 or rev >= self.count(): raise ValueError
571 571 return self.node(rev)
572 572 except (ValueError, OverflowError):
573 573 c = []
574 574 for n in self.nodemap:
575 575 if hex(n).startswith(id):
576 576 c.append(n)
577 577 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
578 578 if len(c) < 1: raise RevlogError(_("No match found"))
579 579 return c[0]
580 580
581 581 return None
582 582
583 583 def diff(self, a, b):
584 584 """return a delta between two revisions"""
585 585 return mdiff.textdiff(a, b)
586 586
587 587 def patches(self, t, pl):
588 588 """apply a list of patches to a string"""
589 589 return mdiff.patches(t, pl)
590 590
591 591 def chunk(self, rev, df=None, cachelen=4096):
592 592 start, length = self.start(rev), self.length(rev)
593 593 inline = self.inlinedata()
594 594 if inline:
595 595 start += (rev + 1) * struct.calcsize(self.indexformat)
596 596 end = start + length
597 597 def loadcache(df):
598 598 cache_length = max(cachelen, length) # 4k
599 599 if not df:
600 600 if inline:
601 601 df = self.opener(self.indexfile)
602 602 else:
603 603 df = self.opener(self.datafile)
604 604 df.seek(start)
605 605 self.chunkcache = (start, df.read(cache_length))
606 606
607 607 if not self.chunkcache:
608 608 loadcache(df)
609 609
610 610 cache_start = self.chunkcache[0]
611 611 cache_end = cache_start + len(self.chunkcache[1])
612 612 if start >= cache_start and end <= cache_end:
613 613 # it is cached
614 614 offset = start - cache_start
615 615 else:
616 616 loadcache(df)
617 617 offset = 0
618 618
619 619 #def checkchunk():
620 620 # df = self.opener(self.datafile)
621 621 # df.seek(start)
622 622 # return df.read(length)
623 623 #assert s == checkchunk()
624 624 return decompress(self.chunkcache[1][offset:offset + length])
625 625
626 626 def delta(self, node):
627 627 """return or calculate a delta between a node and its predecessor"""
628 628 r = self.rev(node)
629 629 return self.revdiff(r - 1, r)
630 630
631 631 def revdiff(self, rev1, rev2):
632 632 """return or calculate a delta between two revisions"""
633 633 b1 = self.base(rev1)
634 634 b2 = self.base(rev2)
635 635 if b1 == b2 and rev1 + 1 == rev2:
636 636 return self.chunk(rev2)
637 637 else:
638 638 return self.diff(self.revision(self.node(rev1)),
639 639 self.revision(self.node(rev2)))
640 640
641 641 def revision(self, node):
642 642 """return an uncompressed revision of a given"""
643 643 if node == nullid: return ""
644 644 if self.cache and self.cache[0] == node: return self.cache[2]
645 645
646 646 # look up what we need to read
647 647 text = None
648 648 rev = self.rev(node)
649 649 base = self.base(rev)
650 650
651 651 if self.inlinedata():
652 652 # we probably have the whole chunk cached
653 653 df = None
654 654 else:
655 655 df = self.opener(self.datafile)
656 656
657 657 # do we have useful data cached?
658 658 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
659 659 base = self.cache[1]
660 660 text = self.cache[2]
661 661 else:
662 662 text = self.chunk(base, df=df)
663 663
664 664 bins = []
665 665 for r in xrange(base + 1, rev + 1):
666 666 bins.append(self.chunk(r, df=df))
667 667
668 668 text = self.patches(text, bins)
669 669
670 670 p1, p2 = self.parents(node)
671 671 if node != hash(text, p1, p2):
672 672 raise RevlogError(_("integrity check failed on %s:%d")
673 673 % (self.datafile, rev))
674 674
675 675 self.cache = (node, rev, text)
676 676 return text
677 677
678 678 def checkinlinesize(self, tr, fp=None):
679 679 if not self.inlinedata():
680 680 return
681 681 if not fp:
682 682 fp = self.opener(self.indexfile, 'r')
683 683 size = fp.tell()
684 684 if size < 131072:
685 685 return
686 686 tr.add(self.datafile, 0)
687 687 df = self.opener(self.datafile, 'w')
688 688 calc = struct.calcsize(self.indexformat)
689 689 for r in xrange(self.count()):
690 690 start = self.start(r) + (r + 1) * calc
691 691 length = self.length(r)
692 692 fp.seek(start)
693 693 d = fp.read(length)
694 694 df.write(d)
695 695 fp.close()
696 696 df.close()
697 fp = self.opener(self.indexfile, 'w', atomic=True)
697 fp = self.opener(self.indexfile, 'w', atomictemp=True)
698 698 self.version &= ~(REVLOGNGINLINEDATA)
699 699 if self.count():
700 700 x = self.index[0]
701 701 e = struct.pack(self.indexformat, *x)[4:]
702 702 l = struct.pack(versionformat, self.version)
703 703 fp.write(l)
704 704 fp.write(e)
705 705
706 706 for i in xrange(1, self.count()):
707 707 x = self.index[i]
708 708 e = struct.pack(self.indexformat, *x)
709 709 fp.write(e)
710 710
711 fp.close()
711 # if we don't call rename, the temp file will never replace the
712 # real index
713 fp.rename()
712 714 self.chunkcache = None
713 715
714 716 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
715 717 """add a revision to the log
716 718
717 719 text - the revision data to add
718 720 transaction - the transaction object used for rollback
719 721 link - the linkrev data to add
720 722 p1, p2 - the parent nodeids of the revision
721 723 d - an optional precomputed delta
722 724 """
723 725 if text is None: text = ""
724 726 if p1 is None: p1 = self.tip()
725 727 if p2 is None: p2 = nullid
726 728
727 729 node = hash(text, p1, p2)
728 730
729 731 if node in self.nodemap:
730 732 return node
731 733
732 734 n = self.count()
733 735 t = n - 1
734 736
735 737 if n:
736 738 base = self.base(t)
737 739 start = self.start(base)
738 740 end = self.end(t)
739 741 if not d:
740 742 prev = self.revision(self.tip())
741 743 d = self.diff(prev, str(text))
742 744 data = compress(d)
743 745 l = len(data[1]) + len(data[0])
744 746 dist = end - start + l
745 747
746 748 # full versions are inserted when the needed deltas
747 749 # become comparable to the uncompressed text
748 750 if not n or dist > len(text) * 2:
749 751 data = compress(text)
750 752 l = len(data[1]) + len(data[0])
751 753 base = n
752 754 else:
753 755 base = self.base(t)
754 756
755 757 offset = 0
756 758 if t >= 0:
757 759 offset = self.end(t)
758 760
759 761 if self.version == 0:
760 762 e = (offset, l, base, link, p1, p2, node)
761 763 else:
762 764 e = (self.offset_type(offset, 0), l, len(text),
763 765 base, link, self.rev(p1), self.rev(p2), node)
764 766
765 767 self.index.append(e)
766 768 self.nodemap[node] = n
767 769 entry = struct.pack(self.indexformat, *e)
768 770
769 771 if not self.inlinedata():
770 772 transaction.add(self.datafile, offset)
771 773 transaction.add(self.indexfile, n * len(entry))
772 774 f = self.opener(self.datafile, "a")
773 775 if data[0]:
774 776 f.write(data[0])
775 777 f.write(data[1])
776 778 f = self.opener(self.indexfile, "a")
777 779 else:
778 780 f = self.opener(self.indexfile, "a+")
779 781 transaction.add(self.indexfile, f.tell())
780 782
781 783 if len(self.index) == 1 and self.version != 0:
782 784 l = struct.pack(versionformat, self.version)
783 785 f.write(l)
784 786 entry = entry[4:]
785 787
786 788 f.write(entry)
787 789
788 790 if self.inlinedata():
789 791 f.write(data[0])
790 792 f.write(data[1])
791 793 self.checkinlinesize(transaction, f)
792 794
793 795 self.cache = (node, n, text)
794 796 return node
795 797
796 798 def ancestor(self, a, b):
797 799 """calculate the least common ancestor of nodes a and b"""
798 800 # calculate the distance of every node from root
799 801 dist = {nullid: 0}
800 802 for i in xrange(self.count()):
801 803 n = self.node(i)
802 804 p1, p2 = self.parents(n)
803 805 dist[n] = max(dist[p1], dist[p2]) + 1
804 806
805 807 # traverse ancestors in order of decreasing distance from root
806 808 def ancestors(node):
807 809 # we store negative distances because heap returns smallest member
808 810 h = [(-dist[node], node)]
809 811 seen = {}
810 812 while h:
811 813 d, n = heapq.heappop(h)
812 814 if n not in seen:
813 815 seen[n] = 1
814 816 yield (-d, n)
815 817 for p in self.parents(n):
816 818 heapq.heappush(h, (-dist[p], p))
817 819
818 820 def generations(node):
819 821 sg, s = None, {}
820 822 for g,n in ancestors(node):
821 823 if g != sg:
822 824 if sg:
823 825 yield sg, s
824 826 sg, s = g, {n:1}
825 827 else:
826 828 s[n] = 1
827 829 yield sg, s
828 830
829 831 x = generations(a)
830 832 y = generations(b)
831 833 gx = x.next()
832 834 gy = y.next()
833 835
834 836 # increment each ancestor list until it is closer to root than
835 837 # the other, or they match
836 838 while 1:
837 839 #print "ancestor gen %s %s" % (gx[0], gy[0])
838 840 if gx[0] == gy[0]:
839 841 # find the intersection
840 842 i = [ n for n in gx[1] if n in gy[1] ]
841 843 if i:
842 844 return i[0]
843 845 else:
844 846 #print "next"
845 847 gy = y.next()
846 848 gx = x.next()
847 849 elif gx[0] < gy[0]:
848 850 #print "next y"
849 851 gy = y.next()
850 852 else:
851 853 #print "next x"
852 854 gx = x.next()
853 855
854 856 def group(self, nodelist, lookup, infocollect=None):
855 857 """calculate a delta group
856 858
857 859 Given a list of changeset revs, return a set of deltas and
858 860 metadata corresponding to nodes. the first delta is
859 861 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
860 862 have this parent as it has all history before these
861 863 changesets. parent is parent[0]
862 864 """
863 865 revs = [self.rev(n) for n in nodelist]
864 866
865 867 # if we don't have any revisions touched by these changesets, bail
866 868 if not revs:
867 869 yield changegroup.closechunk()
868 870 return
869 871
870 872 # add the parent of the first rev
871 873 p = self.parents(self.node(revs[0]))[0]
872 874 revs.insert(0, self.rev(p))
873 875
874 876 # build deltas
875 877 for d in xrange(0, len(revs) - 1):
876 878 a, b = revs[d], revs[d + 1]
877 879 nb = self.node(b)
878 880
879 881 if infocollect is not None:
880 882 infocollect(nb)
881 883
882 884 d = self.revdiff(a, b)
883 885 p = self.parents(nb)
884 886 meta = nb + p[0] + p[1] + lookup(nb)
885 887 yield changegroup.genchunk("%s%s" % (meta, d))
886 888
887 889 yield changegroup.closechunk()
888 890
889 891 def addgroup(self, revs, linkmapper, transaction, unique=0):
890 892 """
891 893 add a delta group
892 894
893 895 given a set of deltas, add them to the revision log. the
894 896 first delta is against its parent, which should be in our
895 897 log, the rest are against the previous delta.
896 898 """
897 899
898 900 #track the base of the current delta log
899 901 r = self.count()
900 902 t = r - 1
901 903 node = None
902 904
903 905 base = prev = -1
904 906 start = end = measure = 0
905 907 if r:
906 908 end = self.end(t)
907 909
908 910 ifh = self.opener(self.indexfile, "a+")
909 911 transaction.add(self.indexfile, ifh.tell())
910 912 if self.inlinedata():
911 913 dfh = None
912 914 else:
913 915 transaction.add(self.datafile, end)
914 916 dfh = self.opener(self.datafile, "a")
915 917
916 918 # loop through our set of deltas
917 919 chain = None
918 920 for chunk in revs:
919 921 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
920 922 link = linkmapper(cs)
921 923 if node in self.nodemap:
922 924 # this can happen if two branches make the same change
923 925 # if unique:
924 926 # raise RevlogError(_("already have %s") % hex(node[:4]))
925 927 chain = node
926 928 continue
927 929 delta = chunk[80:]
928 930
929 931 for p in (p1, p2):
930 932 if not p in self.nodemap:
931 933 raise RevlogError(_("unknown parent %s") % short(p1))
932 934
933 935 if not chain:
934 936 # retrieve the parent revision of the delta chain
935 937 chain = p1
936 938 if not chain in self.nodemap:
937 939 raise RevlogError(_("unknown base %s") % short(chain[:4]))
938 940
939 941 # full versions are inserted when the needed deltas become
940 942 # comparable to the uncompressed text or when the previous
941 943 # version is not the one we have a delta against. We use
942 944 # the size of the previous full rev as a proxy for the
943 945 # current size.
944 946
945 947 if chain == prev:
946 948 tempd = compress(delta)
947 949 cdelta = tempd[0] + tempd[1]
948 950
949 951 if chain != prev or (end - start + len(cdelta)) > measure * 2:
950 952 # flush our writes here so we can read it in revision
951 953 if dfh:
952 954 dfh.flush()
953 955 ifh.flush()
954 956 text = self.revision(chain)
955 957 text = self.patches(text, [delta])
956 958 chk = self.addrevision(text, transaction, link, p1, p2)
957 959 if chk != node:
958 960 raise RevlogError(_("consistency error adding group"))
959 961 measure = len(text)
960 962 else:
961 963 if self.version == 0:
962 964 e = (end, len(cdelta), base, link, p1, p2, node)
963 965 else:
964 966 e = (self.offset_type(end, 0), len(cdelta), -1, base,
965 967 link, self.rev(p1), self.rev(p2), node)
966 968 self.index.append(e)
967 969 self.nodemap[node] = r
968 970 if self.inlinedata():
969 971 ifh.write(struct.pack(self.indexformat, *e))
970 972 ifh.write(cdelta)
971 973 self.checkinlinesize(transaction, ifh)
972 974 if not self.inlinedata():
973 975 dfh = self.opener(self.datafile, "a")
974 976 ifh = self.opener(self.indexfile, "a")
975 977 else:
976 978 if not dfh:
977 979 # addrevision switched from inline to conventional
978 980 # reopen the index
979 981 dfh = self.opener(self.datafile, "a")
980 982 ifh = self.opener(self.indexfile, "a")
981 983 dfh.write(cdelta)
982 984 ifh.write(struct.pack(self.indexformat, *e))
983 985
984 986 t, r, chain, prev = r, r + 1, node, node
985 987 base = self.base(t)
986 988 start = self.start(base)
987 989 end = self.end(t)
988 990
989 991 if node is None:
990 992 raise RevlogError(_("group to be added is empty"))
991 993 return node
992 994
993 995 def strip(self, rev, minlink):
994 996 if self.count() == 0 or rev >= self.count():
995 997 return
996 998
997 999 if isinstance(self.index, lazyindex):
998 1000 self.loadindexmap()
999 1001
1000 1002 # When stripping away a revision, we need to make sure it
1001 1003 # does not actually belong to an older changeset.
1002 1004 # The minlink parameter defines the oldest revision
1003 1005 # we're allowed to strip away.
1004 1006 while minlink > self.index[rev][-4]:
1005 1007 rev += 1
1006 1008 if rev >= self.count():
1007 1009 return
1008 1010
1009 1011 # first truncate the files on disk
1010 1012 end = self.start(rev)
1011 1013 if not self.inlinedata():
1012 1014 df = self.opener(self.datafile, "a")
1013 1015 df.truncate(end)
1014 1016 end = rev * struct.calcsize(self.indexformat)
1015 1017 else:
1016 1018 end += rev * struct.calcsize(self.indexformat)
1017 1019
1018 1020 indexf = self.opener(self.indexfile, "a")
1019 1021 indexf.truncate(end)
1020 1022
1021 1023 # then reset internal state in memory to forget those revisions
1022 1024 self.cache = None
1023 1025 self.chunkcache = None
1024 1026 for x in xrange(rev, self.count()):
1025 1027 del self.nodemap[self.node(x)]
1026 1028
1027 1029 del self.index[rev:]
1028 1030
1029 1031 def checksize(self):
1030 1032 expected = 0
1031 1033 if self.count():
1032 1034 expected = self.end(self.count() - 1)
1033 1035
1034 1036 try:
1035 1037 f = self.opener(self.datafile)
1036 1038 f.seek(0, 2)
1037 1039 actual = f.tell()
1038 1040 dd = actual - expected
1039 1041 except IOError, inst:
1040 1042 if inst.errno != errno.ENOENT:
1041 1043 raise
1042 1044 dd = 0
1043 1045
1044 1046 try:
1045 1047 f = self.opener(self.indexfile)
1046 1048 f.seek(0, 2)
1047 1049 actual = f.tell()
1048 1050 s = struct.calcsize(self.indexformat)
1049 1051 i = actual / s
1050 1052 di = actual - (i * s)
1051 1053 if self.inlinedata():
1052 1054 databytes = 0
1053 1055 for r in xrange(self.count()):
1054 1056 databytes += self.length(r)
1055 1057 dd = 0
1056 1058 di = actual - self.count() * s - databytes
1057 1059 except IOError, inst:
1058 1060 if inst.errno != errno.ENOENT:
1059 1061 raise
1060 1062 di = 0
1061 1063
1062 1064 return (dd, di)
1063 1065
1064 1066
@@ -1,817 +1,832 b''
1 1 """
2 2 util.py - Mercurial utility functions and platform specfic implementations
3 3
4 4 Copyright 2005 K. Thananchayan <thananck@yahoo.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8
9 9 This contains helper routines that are independent of the SCM core and hide
10 10 platform-specific details from the core.
11 11 """
12 12
13 13 import os, errno
14 14 from i18n import gettext as _
15 15 from demandload import *
16 16 demandload(globals(), "cStringIO errno popen2 re shutil sys tempfile")
17 17 demandload(globals(), "threading time")
18 18
19 19 def pipefilter(s, cmd):
20 20 '''filter string S through command CMD, returning its output'''
21 21 (pout, pin) = popen2.popen2(cmd, -1, 'b')
22 22 def writer():
23 23 pin.write(s)
24 24 pin.close()
25 25
26 26 # we should use select instead on UNIX, but this will work on most
27 27 # systems, including Windows
28 28 w = threading.Thread(target=writer)
29 29 w.start()
30 30 f = pout.read()
31 31 pout.close()
32 32 w.join()
33 33 return f
34 34
35 35 def tempfilter(s, cmd):
36 36 '''filter string S through a pair of temporary files with CMD.
37 37 CMD is used as a template to create the real command to be run,
38 38 with the strings INFILE and OUTFILE replaced by the real names of
39 39 the temporary files generated.'''
40 40 inname, outname = None, None
41 41 try:
42 42 infd, inname = tempfile.mkstemp(prefix='hgfin')
43 43 fp = os.fdopen(infd, 'wb')
44 44 fp.write(s)
45 45 fp.close()
46 46 outfd, outname = tempfile.mkstemp(prefix='hgfout')
47 47 os.close(outfd)
48 48 cmd = cmd.replace('INFILE', inname)
49 49 cmd = cmd.replace('OUTFILE', outname)
50 50 code = os.system(cmd)
51 51 if code: raise Abort(_("command '%s' failed: %s") %
52 52 (cmd, explain_exit(code)))
53 53 return open(outname, 'rb').read()
54 54 finally:
55 55 try:
56 56 if inname: os.unlink(inname)
57 57 except: pass
58 58 try:
59 59 if outname: os.unlink(outname)
60 60 except: pass
61 61
62 62 filtertable = {
63 63 'tempfile:': tempfilter,
64 64 'pipe:': pipefilter,
65 65 }
66 66
67 67 def filter(s, cmd):
68 68 "filter a string through a command that transforms its input to its output"
69 69 for name, fn in filtertable.iteritems():
70 70 if cmd.startswith(name):
71 71 return fn(s, cmd[len(name):].lstrip())
72 72 return pipefilter(s, cmd)
73 73
74 74 def patch(strip, patchname, ui):
75 75 """apply the patch <patchname> to the working directory.
76 76 a list of patched files is returned"""
77 77 fp = os.popen('patch -p%d < "%s"' % (strip, patchname))
78 78 files = {}
79 79 for line in fp:
80 80 line = line.rstrip()
81 81 ui.status("%s\n" % line)
82 82 if line.startswith('patching file '):
83 83 pf = parse_patch_output(line)
84 84 files.setdefault(pf, 1)
85 85 code = fp.close()
86 86 if code:
87 87 raise Abort(_("patch command failed: %s") % explain_exit(code)[0])
88 88 return files.keys()
89 89
90 90 def binary(s):
91 91 """return true if a string is binary data using diff's heuristic"""
92 92 if s and '\0' in s[:4096]:
93 93 return True
94 94 return False
95 95
96 96 def unique(g):
97 97 """return the uniq elements of iterable g"""
98 98 seen = {}
99 99 for f in g:
100 100 if f not in seen:
101 101 seen[f] = 1
102 102 yield f
103 103
104 104 class Abort(Exception):
105 105 """Raised if a command needs to print an error and exit."""
106 106
107 107 def always(fn): return True
108 108 def never(fn): return False
109 109
110 110 def patkind(name, dflt_pat='glob'):
111 111 """Split a string into an optional pattern kind prefix and the
112 112 actual pattern."""
113 113 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
114 114 if name.startswith(prefix + ':'): return name.split(':', 1)
115 115 return dflt_pat, name
116 116
117 117 def globre(pat, head='^', tail='$'):
118 118 "convert a glob pattern into a regexp"
119 119 i, n = 0, len(pat)
120 120 res = ''
121 121 group = False
122 122 def peek(): return i < n and pat[i]
123 123 while i < n:
124 124 c = pat[i]
125 125 i = i+1
126 126 if c == '*':
127 127 if peek() == '*':
128 128 i += 1
129 129 res += '.*'
130 130 else:
131 131 res += '[^/]*'
132 132 elif c == '?':
133 133 res += '.'
134 134 elif c == '[':
135 135 j = i
136 136 if j < n and pat[j] in '!]':
137 137 j += 1
138 138 while j < n and pat[j] != ']':
139 139 j += 1
140 140 if j >= n:
141 141 res += '\\['
142 142 else:
143 143 stuff = pat[i:j].replace('\\','\\\\')
144 144 i = j + 1
145 145 if stuff[0] == '!':
146 146 stuff = '^' + stuff[1:]
147 147 elif stuff[0] == '^':
148 148 stuff = '\\' + stuff
149 149 res = '%s[%s]' % (res, stuff)
150 150 elif c == '{':
151 151 group = True
152 152 res += '(?:'
153 153 elif c == '}' and group:
154 154 res += ')'
155 155 group = False
156 156 elif c == ',' and group:
157 157 res += '|'
158 158 elif c == '\\':
159 159 p = peek()
160 160 if p:
161 161 i += 1
162 162 res += re.escape(p)
163 163 else:
164 164 res += re.escape(c)
165 165 else:
166 166 res += re.escape(c)
167 167 return head + res + tail
168 168
169 169 _globchars = {'[': 1, '{': 1, '*': 1, '?': 1}
170 170
171 171 def pathto(n1, n2):
172 172 '''return the relative path from one place to another.
173 173 this returns a path in the form used by the local filesystem, not hg.'''
174 174 if not n1: return localpath(n2)
175 175 a, b = n1.split('/'), n2.split('/')
176 176 a.reverse()
177 177 b.reverse()
178 178 while a and b and a[-1] == b[-1]:
179 179 a.pop()
180 180 b.pop()
181 181 b.reverse()
182 182 return os.sep.join((['..'] * len(a)) + b)
183 183
184 184 def canonpath(root, cwd, myname):
185 185 """return the canonical path of myname, given cwd and root"""
186 186 if root == os.sep:
187 187 rootsep = os.sep
188 188 else:
189 189 rootsep = root + os.sep
190 190 name = myname
191 191 if not name.startswith(os.sep):
192 192 name = os.path.join(root, cwd, name)
193 193 name = os.path.normpath(name)
194 194 if name.startswith(rootsep):
195 195 name = name[len(rootsep):]
196 196 audit_path(name)
197 197 return pconvert(name)
198 198 elif name == root:
199 199 return ''
200 200 else:
201 201 raise Abort('%s not under root' % myname)
202 202
203 203 def matcher(canonroot, cwd='', names=['.'], inc=[], exc=[], head='', src=None):
204 204 return _matcher(canonroot, cwd, names, inc, exc, head, 'glob', src)
205 205
206 206 def cmdmatcher(canonroot, cwd='', names=['.'], inc=[], exc=[], head='', src=None):
207 207 if os.name == 'nt':
208 208 dflt_pat = 'glob'
209 209 else:
210 210 dflt_pat = 'relpath'
211 211 return _matcher(canonroot, cwd, names, inc, exc, head, dflt_pat, src)
212 212
213 213 def _matcher(canonroot, cwd, names, inc, exc, head, dflt_pat, src):
214 214 """build a function to match a set of file patterns
215 215
216 216 arguments:
217 217 canonroot - the canonical root of the tree you're matching against
218 218 cwd - the current working directory, if relevant
219 219 names - patterns to find
220 220 inc - patterns to include
221 221 exc - patterns to exclude
222 222 head - a regex to prepend to patterns to control whether a match is rooted
223 223
224 224 a pattern is one of:
225 225 'glob:<rooted glob>'
226 226 're:<rooted regexp>'
227 227 'path:<rooted path>'
228 228 'relglob:<relative glob>'
229 229 'relpath:<relative path>'
230 230 'relre:<relative regexp>'
231 231 '<rooted path or regexp>'
232 232
233 233 returns:
234 234 a 3-tuple containing
235 235 - list of explicit non-pattern names passed in
236 236 - a bool match(filename) function
237 237 - a bool indicating if any patterns were passed in
238 238
239 239 todo:
240 240 make head regex a rooted bool
241 241 """
242 242
243 243 def contains_glob(name):
244 244 for c in name:
245 245 if c in _globchars: return True
246 246 return False
247 247
248 248 def regex(kind, name, tail):
249 249 '''convert a pattern into a regular expression'''
250 250 if kind == 're':
251 251 return name
252 252 elif kind == 'path':
253 253 return '^' + re.escape(name) + '(?:/|$)'
254 254 elif kind == 'relglob':
255 255 return head + globre(name, '(?:|.*/)', tail)
256 256 elif kind == 'relpath':
257 257 return head + re.escape(name) + tail
258 258 elif kind == 'relre':
259 259 if name.startswith('^'):
260 260 return name
261 261 return '.*' + name
262 262 return head + globre(name, '', tail)
263 263
264 264 def matchfn(pats, tail):
265 265 """build a matching function from a set of patterns"""
266 266 if not pats:
267 267 return
268 268 matches = []
269 269 for k, p in pats:
270 270 try:
271 271 pat = '(?:%s)' % regex(k, p, tail)
272 272 matches.append(re.compile(pat).match)
273 273 except re.error:
274 274 if src: raise Abort("%s: invalid pattern (%s): %s" % (src, k, p))
275 275 else: raise Abort("invalid pattern (%s): %s" % (k, p))
276 276
277 277 def buildfn(text):
278 278 for m in matches:
279 279 r = m(text)
280 280 if r:
281 281 return r
282 282
283 283 return buildfn
284 284
285 285 def globprefix(pat):
286 286 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
287 287 root = []
288 288 for p in pat.split(os.sep):
289 289 if contains_glob(p): break
290 290 root.append(p)
291 291 return '/'.join(root)
292 292
293 293 pats = []
294 294 files = []
295 295 roots = []
296 296 for kind, name in [patkind(p, dflt_pat) for p in names]:
297 297 if kind in ('glob', 'relpath'):
298 298 name = canonpath(canonroot, cwd, name)
299 299 if name == '':
300 300 kind, name = 'glob', '**'
301 301 if kind in ('glob', 'path', 're'):
302 302 pats.append((kind, name))
303 303 if kind == 'glob':
304 304 root = globprefix(name)
305 305 if root: roots.append(root)
306 306 elif kind == 'relpath':
307 307 files.append((kind, name))
308 308 roots.append(name)
309 309
310 310 patmatch = matchfn(pats, '$') or always
311 311 filematch = matchfn(files, '(?:/|$)') or always
312 312 incmatch = always
313 313 if inc:
314 314 incmatch = matchfn(map(patkind, inc), '(?:/|$)')
315 315 excmatch = lambda fn: False
316 316 if exc:
317 317 excmatch = matchfn(map(patkind, exc), '(?:/|$)')
318 318
319 319 return (roots,
320 320 lambda fn: (incmatch(fn) and not excmatch(fn) and
321 321 (fn.endswith('/') or
322 322 (not pats and not files) or
323 323 (pats and patmatch(fn)) or
324 324 (files and filematch(fn)))),
325 325 (inc or exc or (pats and pats != [('glob', '**')])) and True)
326 326
327 327 def system(cmd, environ={}, cwd=None, onerr=None, errprefix=None):
328 328 '''enhanced shell command execution.
329 329 run with environment maybe modified, maybe in different dir.
330 330
331 331 if command fails and onerr is None, return status. if ui object,
332 332 print error message and return status, else raise onerr object as
333 333 exception.'''
334 334 oldenv = {}
335 335 for k in environ:
336 336 oldenv[k] = os.environ.get(k)
337 337 if cwd is not None:
338 338 oldcwd = os.getcwd()
339 339 try:
340 340 for k, v in environ.iteritems():
341 341 os.environ[k] = str(v)
342 342 if cwd is not None and oldcwd != cwd:
343 343 os.chdir(cwd)
344 344 rc = os.system(cmd)
345 345 if rc and onerr:
346 346 errmsg = '%s %s' % (os.path.basename(cmd.split(None, 1)[0]),
347 347 explain_exit(rc)[0])
348 348 if errprefix:
349 349 errmsg = '%s: %s' % (errprefix, errmsg)
350 350 try:
351 351 onerr.warn(errmsg + '\n')
352 352 except AttributeError:
353 353 raise onerr(errmsg)
354 354 return rc
355 355 finally:
356 356 for k, v in oldenv.iteritems():
357 357 if v is None:
358 358 del os.environ[k]
359 359 else:
360 360 os.environ[k] = v
361 361 if cwd is not None and oldcwd != cwd:
362 362 os.chdir(oldcwd)
363 363
364 364 def rename(src, dst):
365 365 """forcibly rename a file"""
366 366 try:
367 367 os.rename(src, dst)
368 368 except:
369 369 os.unlink(dst)
370 370 os.rename(src, dst)
371 371
372 372 def unlink(f):
373 373 """unlink and remove the directory if it is empty"""
374 374 os.unlink(f)
375 375 # try removing directories that might now be empty
376 376 try: os.removedirs(os.path.dirname(f))
377 377 except: pass
378 378
379 379 def copyfiles(src, dst, hardlink=None):
380 380 """Copy a directory tree using hardlinks if possible"""
381 381
382 382 if hardlink is None:
383 383 hardlink = (os.stat(src).st_dev ==
384 384 os.stat(os.path.dirname(dst)).st_dev)
385 385
386 386 if os.path.isdir(src):
387 387 os.mkdir(dst)
388 388 for name in os.listdir(src):
389 389 srcname = os.path.join(src, name)
390 390 dstname = os.path.join(dst, name)
391 391 copyfiles(srcname, dstname, hardlink)
392 392 else:
393 393 if hardlink:
394 394 try:
395 395 os_link(src, dst)
396 396 except:
397 397 hardlink = False
398 398 shutil.copy(src, dst)
399 399 else:
400 400 shutil.copy(src, dst)
401 401
402 402 def audit_path(path):
403 403 """Abort if path contains dangerous components"""
404 404 parts = os.path.normcase(path).split(os.sep)
405 405 if (os.path.splitdrive(path)[0] or parts[0] in ('.hg', '')
406 406 or os.pardir in parts):
407 407 raise Abort(_("path contains illegal component: %s\n") % path)
408 408
409 409 def opener(base, audit=True):
410 410 """
411 411 return a function that opens files relative to base
412 412
413 413 this function is used to hide the details of COW semantics and
414 414 remote file access from higher level code.
415 415 """
416 416 p = base
417 417 audit_p = audit
418 418
419 419 def mktempcopy(name):
420 420 d, fn = os.path.split(name)
421 421 fd, temp = tempfile.mkstemp(prefix=fn, dir=d)
422 422 fp = os.fdopen(fd, "wb")
423 423 try:
424 424 fp.write(file(name, "rb").read())
425 425 except:
426 426 try: os.unlink(temp)
427 427 except: pass
428 428 raise
429 429 fp.close()
430 430 st = os.lstat(name)
431 431 os.chmod(temp, st.st_mode)
432 432 return temp
433 433
434 class atomicfile(file):
435 """the file will only be copied on close"""
436 def __init__(self, name, mode, atomic=False):
434 class atomictempfile(file):
435 """the file will only be copied when rename is called"""
436 def __init__(self, name, mode):
437 437 self.__name = name
438 438 self.temp = mktempcopy(name)
439 439 file.__init__(self, self.temp, mode)
440 def close(self):
440 def rename(self):
441 441 if not self.closed:
442 442 file.close(self)
443 443 rename(self.temp, self.__name)
444 444 def __del__(self):
445 self.close()
445 if not self.closed:
446 try:
447 os.unlink(self.temp)
448 except: pass
449 file.close(self)
446 450
447 def o(path, mode="r", text=False, atomic=False):
451 class atomicfile(atomictempfile):
452 """the file will only be copied on close"""
453 def __init__(self, name, mode):
454 atomictempfile.__init__(self, name, mode)
455 def close(self):
456 self.rename()
457 def __del__(self):
458 self.rename()
459
460 def o(path, mode="r", text=False, atomic=False, atomictemp=False):
448 461 if audit_p:
449 462 audit_path(path)
450 463 f = os.path.join(p, path)
451 464
452 465 if not text:
453 466 mode += "b" # for that other OS
454 467
455 468 if mode[0] != "r":
456 469 try:
457 470 nlink = nlinks(f)
458 471 except OSError:
459 472 d = os.path.dirname(f)
460 473 if not os.path.isdir(d):
461 474 os.makedirs(d)
462 475 else:
463 476 if atomic:
464 477 return atomicfile(f, mode)
478 elif atomictemp:
479 return atomictempfile(f, mode)
465 480 if nlink > 1:
466 481 rename(mktempcopy(f), f)
467 482 return file(f, mode)
468 483
469 484 return o
470 485
471 486 def _makelock_file(info, pathname):
472 487 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
473 488 os.write(ld, info)
474 489 os.close(ld)
475 490
476 491 def _readlock_file(pathname):
477 492 return file(pathname).read()
478 493
479 494 def nlinks(pathname):
480 495 """Return number of hardlinks for the given file."""
481 496 return os.stat(pathname).st_nlink
482 497
483 498 if hasattr(os, 'link'):
484 499 os_link = os.link
485 500 else:
486 501 def os_link(src, dst):
487 502 raise OSError(0, _("Hardlinks not supported"))
488 503
489 504 # Platform specific variants
490 505 if os.name == 'nt':
491 506 demandload(globals(), "msvcrt")
492 507 nulldev = 'NUL:'
493 508
494 509 class winstdout:
495 510 '''stdout on windows misbehaves if sent through a pipe'''
496 511
497 512 def __init__(self, fp):
498 513 self.fp = fp
499 514
500 515 def __getattr__(self, key):
501 516 return getattr(self.fp, key)
502 517
503 518 def close(self):
504 519 try:
505 520 self.fp.close()
506 521 except: pass
507 522
508 523 def write(self, s):
509 524 try:
510 525 return self.fp.write(s)
511 526 except IOError, inst:
512 527 if inst.errno != 0: raise
513 528 self.close()
514 529 raise IOError(errno.EPIPE, 'Broken pipe')
515 530
516 531 sys.stdout = winstdout(sys.stdout)
517 532
518 533 def os_rcpath():
519 534 '''return default os-specific hgrc search path'''
520 535 try:
521 536 import win32api, win32process
522 537 proc = win32api.GetCurrentProcess()
523 538 filename = win32process.GetModuleFileNameEx(proc, 0)
524 539 systemrc = os.path.join(os.path.dirname(filename), 'mercurial.ini')
525 540 except ImportError:
526 541 systemrc = r'c:\mercurial\mercurial.ini'
527 542
528 543 return [systemrc,
529 544 os.path.join(os.path.expanduser('~'), 'mercurial.ini')]
530 545
531 546 def parse_patch_output(output_line):
532 547 """parses the output produced by patch and returns the file name"""
533 548 pf = output_line[14:]
534 549 if pf[0] == '`':
535 550 pf = pf[1:-1] # Remove the quotes
536 551 return pf
537 552
538 553 try: # Mark Hammond's win32all package allows better functionality on Windows
539 554 import win32api, win32con, win32file, pywintypes
540 555
541 556 # create hard links using win32file module
542 557 def os_link(src, dst): # NB will only succeed on NTFS
543 558 win32file.CreateHardLink(dst, src)
544 559
545 560 def nlinks(pathname):
546 561 """Return number of hardlinks for the given file."""
547 562 try:
548 563 fh = win32file.CreateFile(pathname,
549 564 win32file.GENERIC_READ, win32file.FILE_SHARE_READ,
550 565 None, win32file.OPEN_EXISTING, 0, None)
551 566 res = win32file.GetFileInformationByHandle(fh)
552 567 fh.Close()
553 568 return res[7]
554 569 except:
555 570 return os.stat(pathname).st_nlink
556 571
557 572 def testpid(pid):
558 573 '''return True if pid is still running or unable to
559 574 determine, False otherwise'''
560 575 try:
561 576 import win32process, winerror
562 577 handle = win32api.OpenProcess(
563 578 win32con.PROCESS_QUERY_INFORMATION, False, pid)
564 579 if handle:
565 580 status = win32process.GetExitCodeProcess(handle)
566 581 return status == win32con.STILL_ACTIVE
567 582 except pywintypes.error, details:
568 583 return details[0] != winerror.ERROR_INVALID_PARAMETER
569 584 return True
570 585
571 586 except ImportError:
572 587 def testpid(pid):
573 588 '''return False if pid dead, True if running or not known'''
574 589 return True
575 590
576 591 def is_exec(f, last):
577 592 return last
578 593
579 594 def set_exec(f, mode):
580 595 pass
581 596
582 597 def set_binary(fd):
583 598 msvcrt.setmode(fd.fileno(), os.O_BINARY)
584 599
585 600 def pconvert(path):
586 601 return path.replace("\\", "/")
587 602
588 603 def localpath(path):
589 604 return path.replace('/', '\\')
590 605
591 606 def normpath(path):
592 607 return pconvert(os.path.normpath(path))
593 608
594 609 makelock = _makelock_file
595 610 readlock = _readlock_file
596 611
597 612 def explain_exit(code):
598 613 return _("exited with status %d") % code, code
599 614
600 615 else:
601 616 nulldev = '/dev/null'
602 617
603 618 def rcfiles(path):
604 619 rcs = [os.path.join(path, 'hgrc')]
605 620 rcdir = os.path.join(path, 'hgrc.d')
606 621 try:
607 622 rcs.extend([os.path.join(rcdir, f) for f in os.listdir(rcdir)
608 623 if f.endswith(".rc")])
609 624 except OSError, inst: pass
610 625 return rcs
611 626
612 627 def os_rcpath():
613 628 '''return default os-specific hgrc search path'''
614 629 path = []
615 630 if len(sys.argv) > 0:
616 631 path.extend(rcfiles(os.path.dirname(sys.argv[0]) +
617 632 '/../etc/mercurial'))
618 633 path.extend(rcfiles('/etc/mercurial'))
619 634 path.append(os.path.expanduser('~/.hgrc'))
620 635 path = [os.path.normpath(f) for f in path]
621 636 return path
622 637
623 638 def parse_patch_output(output_line):
624 639 """parses the output produced by patch and returns the file name"""
625 640 pf = output_line[14:]
626 641 if pf.startswith("'") and pf.endswith("'") and pf.find(" ") >= 0:
627 642 pf = pf[1:-1] # Remove the quotes
628 643 return pf
629 644
630 645 def is_exec(f, last):
631 646 """check whether a file is executable"""
632 647 return (os.stat(f).st_mode & 0100 != 0)
633 648
634 649 def set_exec(f, mode):
635 650 s = os.stat(f).st_mode
636 651 if (s & 0100 != 0) == mode:
637 652 return
638 653 if mode:
639 654 # Turn on +x for every +r bit when making a file executable
640 655 # and obey umask.
641 656 umask = os.umask(0)
642 657 os.umask(umask)
643 658 os.chmod(f, s | (s & 0444) >> 2 & ~umask)
644 659 else:
645 660 os.chmod(f, s & 0666)
646 661
647 662 def set_binary(fd):
648 663 pass
649 664
650 665 def pconvert(path):
651 666 return path
652 667
653 668 def localpath(path):
654 669 return path
655 670
656 671 normpath = os.path.normpath
657 672
658 673 def makelock(info, pathname):
659 674 try:
660 675 os.symlink(info, pathname)
661 676 except OSError, why:
662 677 if why.errno == errno.EEXIST:
663 678 raise
664 679 else:
665 680 _makelock_file(info, pathname)
666 681
667 682 def readlock(pathname):
668 683 try:
669 684 return os.readlink(pathname)
670 685 except OSError, why:
671 686 if why.errno == errno.EINVAL:
672 687 return _readlock_file(pathname)
673 688 else:
674 689 raise
675 690
676 691 def testpid(pid):
677 692 '''return False if pid dead, True if running or not sure'''
678 693 try:
679 694 os.kill(pid, 0)
680 695 return True
681 696 except OSError, inst:
682 697 return inst.errno != errno.ESRCH
683 698
684 699 def explain_exit(code):
685 700 """return a 2-tuple (desc, code) describing a process's status"""
686 701 if os.WIFEXITED(code):
687 702 val = os.WEXITSTATUS(code)
688 703 return _("exited with status %d") % val, val
689 704 elif os.WIFSIGNALED(code):
690 705 val = os.WTERMSIG(code)
691 706 return _("killed by signal %d") % val, val
692 707 elif os.WIFSTOPPED(code):
693 708 val = os.WSTOPSIG(code)
694 709 return _("stopped by signal %d") % val, val
695 710 raise ValueError(_("invalid exit code"))
696 711
697 712 class chunkbuffer(object):
698 713 """Allow arbitrary sized chunks of data to be efficiently read from an
699 714 iterator over chunks of arbitrary size."""
700 715
701 716 def __init__(self, in_iter, targetsize = 2**16):
702 717 """in_iter is the iterator that's iterating over the input chunks.
703 718 targetsize is how big a buffer to try to maintain."""
704 719 self.in_iter = iter(in_iter)
705 720 self.buf = ''
706 721 self.targetsize = int(targetsize)
707 722 if self.targetsize <= 0:
708 723 raise ValueError(_("targetsize must be greater than 0, was %d") %
709 724 targetsize)
710 725 self.iterempty = False
711 726
712 727 def fillbuf(self):
713 728 """Ignore target size; read every chunk from iterator until empty."""
714 729 if not self.iterempty:
715 730 collector = cStringIO.StringIO()
716 731 collector.write(self.buf)
717 732 for ch in self.in_iter:
718 733 collector.write(ch)
719 734 self.buf = collector.getvalue()
720 735 self.iterempty = True
721 736
722 737 def read(self, l):
723 738 """Read L bytes of data from the iterator of chunks of data.
724 739 Returns less than L bytes if the iterator runs dry."""
725 740 if l > len(self.buf) and not self.iterempty:
726 741 # Clamp to a multiple of self.targetsize
727 742 targetsize = self.targetsize * ((l // self.targetsize) + 1)
728 743 collector = cStringIO.StringIO()
729 744 collector.write(self.buf)
730 745 collected = len(self.buf)
731 746 for chunk in self.in_iter:
732 747 collector.write(chunk)
733 748 collected += len(chunk)
734 749 if collected >= targetsize:
735 750 break
736 751 if collected < targetsize:
737 752 self.iterempty = True
738 753 self.buf = collector.getvalue()
739 754 s, self.buf = self.buf[:l], buffer(self.buf, l)
740 755 return s
741 756
742 757 def filechunkiter(f, size = 65536):
743 758 """Create a generator that produces all the data in the file size
744 759 (default 65536) bytes at a time. Chunks may be less than size
745 760 bytes if the chunk is the last chunk in the file, or the file is a
746 761 socket or some other type of file that sometimes reads less data
747 762 than is requested."""
748 763 s = f.read(size)
749 764 while len(s) > 0:
750 765 yield s
751 766 s = f.read(size)
752 767
753 768 def makedate():
754 769 lt = time.localtime()
755 770 if lt[8] == 1 and time.daylight:
756 771 tz = time.altzone
757 772 else:
758 773 tz = time.timezone
759 774 return time.mktime(lt), tz
760 775
761 776 def datestr(date=None, format='%a %b %d %H:%M:%S %Y', timezone=True):
762 777 """represent a (unixtime, offset) tuple as a localized time.
763 778 unixtime is seconds since the epoch, and offset is the time zone's
764 779 number of seconds away from UTC. if timezone is false, do not
765 780 append time zone to string."""
766 781 t, tz = date or makedate()
767 782 s = time.strftime(format, time.gmtime(float(t) - tz))
768 783 if timezone:
769 784 s += " %+03d%02d" % (-tz / 3600, ((-tz % 3600) / 60))
770 785 return s
771 786
772 787 def shortuser(user):
773 788 """Return a short representation of a user name or email address."""
774 789 f = user.find('@')
775 790 if f >= 0:
776 791 user = user[:f]
777 792 f = user.find('<')
778 793 if f >= 0:
779 794 user = user[f+1:]
780 795 return user
781 796
782 797 def walkrepos(path):
783 798 '''yield every hg repository under path, recursively.'''
784 799 def errhandler(err):
785 800 if err.filename == path:
786 801 raise err
787 802
788 803 for root, dirs, files in os.walk(path, onerror=errhandler):
789 804 for d in dirs:
790 805 if d == '.hg':
791 806 yield root
792 807 dirs[:] = []
793 808 break
794 809
795 810 _rcpath = None
796 811
797 812 def rcpath():
798 813 '''return hgrc search path. if env var HGRCPATH is set, use it.
799 814 for each item in path, if directory, use files ending in .rc,
800 815 else use item.
801 816 make HGRCPATH empty to only look in .hg/hgrc of current repo.
802 817 if no HGRCPATH, use default os-specific path.'''
803 818 global _rcpath
804 819 if _rcpath is None:
805 820 if 'HGRCPATH' in os.environ:
806 821 _rcpath = []
807 822 for p in os.environ['HGRCPATH'].split(os.pathsep):
808 823 if not p: continue
809 824 if os.path.isdir(p):
810 825 for f in os.listdir(p):
811 826 if f.endswith('.rc'):
812 827 _rcpath.append(os.path.join(p, f))
813 828 else:
814 829 _rcpath.append(p)
815 830 else:
816 831 _rcpath = os_rcpath()
817 832 return _rcpath
General Comments 0
You need to be logged in to leave comments. Login now