##// END OF EJS Templates
revlog: generate trivial deltas against null revision...
Matt Mackall -
r5367:7530334b default
parent child Browse files
Show More
@@ -1,250 +1,253 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 import bdiff, mpatch, re, struct, util, md5
8 import bdiff, mpatch, re, struct, util, md5
9
9
10 def splitnewlines(text):
10 def splitnewlines(text):
11 '''like str.splitlines, but only split on newlines.'''
11 '''like str.splitlines, but only split on newlines.'''
12 lines = [l + '\n' for l in text.split('\n')]
12 lines = [l + '\n' for l in text.split('\n')]
13 if lines:
13 if lines:
14 if lines[-1] == '\n':
14 if lines[-1] == '\n':
15 lines.pop()
15 lines.pop()
16 else:
16 else:
17 lines[-1] = lines[-1][:-1]
17 lines[-1] = lines[-1][:-1]
18 return lines
18 return lines
19
19
20 class diffopts(object):
20 class diffopts(object):
21 '''context is the number of context lines
21 '''context is the number of context lines
22 text treats all files as text
22 text treats all files as text
23 showfunc enables diff -p output
23 showfunc enables diff -p output
24 git enables the git extended patch format
24 git enables the git extended patch format
25 nodates removes dates from diff headers
25 nodates removes dates from diff headers
26 ignorews ignores all whitespace changes in the diff
26 ignorews ignores all whitespace changes in the diff
27 ignorewsamount ignores changes in the amount of whitespace
27 ignorewsamount ignores changes in the amount of whitespace
28 ignoreblanklines ignores changes whose lines are all blank'''
28 ignoreblanklines ignores changes whose lines are all blank'''
29
29
30 defaults = {
30 defaults = {
31 'context': 3,
31 'context': 3,
32 'text': False,
32 'text': False,
33 'showfunc': True,
33 'showfunc': True,
34 'git': False,
34 'git': False,
35 'nodates': False,
35 'nodates': False,
36 'ignorews': False,
36 'ignorews': False,
37 'ignorewsamount': False,
37 'ignorewsamount': False,
38 'ignoreblanklines': False,
38 'ignoreblanklines': False,
39 }
39 }
40
40
41 __slots__ = defaults.keys()
41 __slots__ = defaults.keys()
42
42
43 def __init__(self, **opts):
43 def __init__(self, **opts):
44 for k in self.__slots__:
44 for k in self.__slots__:
45 v = opts.get(k)
45 v = opts.get(k)
46 if v is None:
46 if v is None:
47 v = self.defaults[k]
47 v = self.defaults[k]
48 setattr(self, k, v)
48 setattr(self, k, v)
49
49
50 defaultopts = diffopts()
50 defaultopts = diffopts()
51
51
52 def wsclean(opts, text):
52 def wsclean(opts, text):
53 if opts.ignorews:
53 if opts.ignorews:
54 text = re.sub('[ \t]+', '', text)
54 text = re.sub('[ \t]+', '', text)
55 elif opts.ignorewsamount:
55 elif opts.ignorewsamount:
56 text = re.sub('[ \t]+', ' ', text)
56 text = re.sub('[ \t]+', ' ', text)
57 text = re.sub('[ \t]+\n', '\n', text)
57 text = re.sub('[ \t]+\n', '\n', text)
58 if opts.ignoreblanklines:
58 if opts.ignoreblanklines:
59 text = re.sub('\n+', '', text)
59 text = re.sub('\n+', '', text)
60 return text
60 return text
61
61
62 def unidiff(a, ad, b, bd, fn, r=None, opts=defaultopts):
62 def unidiff(a, ad, b, bd, fn, r=None, opts=defaultopts):
63 def datetag(date, addtab=True):
63 def datetag(date, addtab=True):
64 if not opts.git and not opts.nodates:
64 if not opts.git and not opts.nodates:
65 return '\t%s\n' % date
65 return '\t%s\n' % date
66 if addtab and ' ' in fn:
66 if addtab and ' ' in fn:
67 return '\t\n'
67 return '\t\n'
68 return '\n'
68 return '\n'
69
69
70 if not a and not b: return ""
70 if not a and not b: return ""
71 epoch = util.datestr((0, 0))
71 epoch = util.datestr((0, 0))
72
72
73 if not opts.text and (util.binary(a) or util.binary(b)):
73 if not opts.text and (util.binary(a) or util.binary(b)):
74 def h(v):
74 def h(v):
75 # md5 is used instead of sha1 because md5 is supposedly faster
75 # md5 is used instead of sha1 because md5 is supposedly faster
76 return md5.new(v).digest()
76 return md5.new(v).digest()
77 if a and b and len(a) == len(b) and h(a) == h(b):
77 if a and b and len(a) == len(b) and h(a) == h(b):
78 return ""
78 return ""
79 l = ['Binary file %s has changed\n' % fn]
79 l = ['Binary file %s has changed\n' % fn]
80 elif not a:
80 elif not a:
81 b = splitnewlines(b)
81 b = splitnewlines(b)
82 if a is None:
82 if a is None:
83 l1 = '--- /dev/null%s' % datetag(epoch, False)
83 l1 = '--- /dev/null%s' % datetag(epoch, False)
84 else:
84 else:
85 l1 = "--- %s%s" % ("a/" + fn, datetag(ad))
85 l1 = "--- %s%s" % ("a/" + fn, datetag(ad))
86 l2 = "+++ %s%s" % ("b/" + fn, datetag(bd))
86 l2 = "+++ %s%s" % ("b/" + fn, datetag(bd))
87 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
87 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
88 l = [l1, l2, l3] + ["+" + e for e in b]
88 l = [l1, l2, l3] + ["+" + e for e in b]
89 elif not b:
89 elif not b:
90 a = splitnewlines(a)
90 a = splitnewlines(a)
91 l1 = "--- %s%s" % ("a/" + fn, datetag(ad))
91 l1 = "--- %s%s" % ("a/" + fn, datetag(ad))
92 if b is None:
92 if b is None:
93 l2 = '+++ /dev/null%s' % datetag(epoch, False)
93 l2 = '+++ /dev/null%s' % datetag(epoch, False)
94 else:
94 else:
95 l2 = "+++ %s%s" % ("b/" + fn, datetag(bd))
95 l2 = "+++ %s%s" % ("b/" + fn, datetag(bd))
96 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
96 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
97 l = [l1, l2, l3] + ["-" + e for e in a]
97 l = [l1, l2, l3] + ["-" + e for e in a]
98 else:
98 else:
99 al = splitnewlines(a)
99 al = splitnewlines(a)
100 bl = splitnewlines(b)
100 bl = splitnewlines(b)
101 l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn, opts=opts))
101 l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn, opts=opts))
102 if not l: return ""
102 if not l: return ""
103 # difflib uses a space, rather than a tab
103 # difflib uses a space, rather than a tab
104 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
104 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
105 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
105 l[1] = "%s%s" % (l[1][:-2], datetag(bd))
106
106
107 for ln in xrange(len(l)):
107 for ln in xrange(len(l)):
108 if l[ln][-1] != '\n':
108 if l[ln][-1] != '\n':
109 l[ln] += "\n\ No newline at end of file\n"
109 l[ln] += "\n\ No newline at end of file\n"
110
110
111 if r:
111 if r:
112 l.insert(0, "diff %s %s\n" %
112 l.insert(0, "diff %s %s\n" %
113 (' '.join(["-r %s" % rev for rev in r]), fn))
113 (' '.join(["-r %s" % rev for rev in r]), fn))
114
114
115 return "".join(l)
115 return "".join(l)
116
116
117 # somewhat self contained replacement for difflib.unified_diff
117 # somewhat self contained replacement for difflib.unified_diff
118 # t1 and t2 are the text to be diffed
118 # t1 and t2 are the text to be diffed
119 # l1 and l2 are the text broken up into lines
119 # l1 and l2 are the text broken up into lines
120 # header1 and header2 are the filenames for the diff output
120 # header1 and header2 are the filenames for the diff output
121 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
121 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
122 def contextend(l, len):
122 def contextend(l, len):
123 ret = l + opts.context
123 ret = l + opts.context
124 if ret > len:
124 if ret > len:
125 ret = len
125 ret = len
126 return ret
126 return ret
127
127
128 def contextstart(l):
128 def contextstart(l):
129 ret = l - opts.context
129 ret = l - opts.context
130 if ret < 0:
130 if ret < 0:
131 return 0
131 return 0
132 return ret
132 return ret
133
133
134 def yieldhunk(hunk, header):
134 def yieldhunk(hunk, header):
135 if header:
135 if header:
136 for x in header:
136 for x in header:
137 yield x
137 yield x
138 (astart, a2, bstart, b2, delta) = hunk
138 (astart, a2, bstart, b2, delta) = hunk
139 aend = contextend(a2, len(l1))
139 aend = contextend(a2, len(l1))
140 alen = aend - astart
140 alen = aend - astart
141 blen = b2 - bstart + aend - a2
141 blen = b2 - bstart + aend - a2
142
142
143 func = ""
143 func = ""
144 if opts.showfunc:
144 if opts.showfunc:
145 # walk backwards from the start of the context
145 # walk backwards from the start of the context
146 # to find a line starting with an alphanumeric char.
146 # to find a line starting with an alphanumeric char.
147 for x in xrange(astart, -1, -1):
147 for x in xrange(astart, -1, -1):
148 t = l1[x].rstrip()
148 t = l1[x].rstrip()
149 if funcre.match(t):
149 if funcre.match(t):
150 func = ' ' + t[:40]
150 func = ' ' + t[:40]
151 break
151 break
152
152
153 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
153 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
154 bstart + 1, blen, func)
154 bstart + 1, blen, func)
155 for x in delta:
155 for x in delta:
156 yield x
156 yield x
157 for x in xrange(a2, aend):
157 for x in xrange(a2, aend):
158 yield ' ' + l1[x]
158 yield ' ' + l1[x]
159
159
160 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
160 header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
161
161
162 if opts.showfunc:
162 if opts.showfunc:
163 funcre = re.compile('\w')
163 funcre = re.compile('\w')
164
164
165 # bdiff.blocks gives us the matching sequences in the files. The loop
165 # bdiff.blocks gives us the matching sequences in the files. The loop
166 # below finds the spaces between those matching sequences and translates
166 # below finds the spaces between those matching sequences and translates
167 # them into diff output.
167 # them into diff output.
168 #
168 #
169 diff = bdiff.blocks(t1, t2)
169 diff = bdiff.blocks(t1, t2)
170 hunk = None
170 hunk = None
171 for i in xrange(len(diff)):
171 for i in xrange(len(diff)):
172 # The first match is special.
172 # The first match is special.
173 # we've either found a match starting at line 0 or a match later
173 # we've either found a match starting at line 0 or a match later
174 # in the file. If it starts later, old and new below will both be
174 # in the file. If it starts later, old and new below will both be
175 # empty and we'll continue to the next match.
175 # empty and we'll continue to the next match.
176 if i > 0:
176 if i > 0:
177 s = diff[i-1]
177 s = diff[i-1]
178 else:
178 else:
179 s = [0, 0, 0, 0]
179 s = [0, 0, 0, 0]
180 delta = []
180 delta = []
181 s1 = diff[i]
181 s1 = diff[i]
182 a1 = s[1]
182 a1 = s[1]
183 a2 = s1[0]
183 a2 = s1[0]
184 b1 = s[3]
184 b1 = s[3]
185 b2 = s1[2]
185 b2 = s1[2]
186
186
187 old = l1[a1:a2]
187 old = l1[a1:a2]
188 new = l2[b1:b2]
188 new = l2[b1:b2]
189
189
190 # bdiff sometimes gives huge matches past eof, this check eats them,
190 # bdiff sometimes gives huge matches past eof, this check eats them,
191 # and deals with the special first match case described above
191 # and deals with the special first match case described above
192 if not old and not new:
192 if not old and not new:
193 continue
193 continue
194
194
195 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
195 if opts.ignorews or opts.ignorewsamount or opts.ignoreblanklines:
196 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
196 if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)):
197 continue
197 continue
198
198
199 astart = contextstart(a1)
199 astart = contextstart(a1)
200 bstart = contextstart(b1)
200 bstart = contextstart(b1)
201 prev = None
201 prev = None
202 if hunk:
202 if hunk:
203 # join with the previous hunk if it falls inside the context
203 # join with the previous hunk if it falls inside the context
204 if astart < hunk[1] + opts.context + 1:
204 if astart < hunk[1] + opts.context + 1:
205 prev = hunk
205 prev = hunk
206 astart = hunk[1]
206 astart = hunk[1]
207 bstart = hunk[3]
207 bstart = hunk[3]
208 else:
208 else:
209 for x in yieldhunk(hunk, header):
209 for x in yieldhunk(hunk, header):
210 yield x
210 yield x
211 # we only want to yield the header if the files differ, and
211 # we only want to yield the header if the files differ, and
212 # we only want to yield it once.
212 # we only want to yield it once.
213 header = None
213 header = None
214 if prev:
214 if prev:
215 # we've joined the previous hunk, record the new ending points.
215 # we've joined the previous hunk, record the new ending points.
216 hunk[1] = a2
216 hunk[1] = a2
217 hunk[3] = b2
217 hunk[3] = b2
218 delta = hunk[4]
218 delta = hunk[4]
219 else:
219 else:
220 # create a new hunk
220 # create a new hunk
221 hunk = [ astart, a2, bstart, b2, delta ]
221 hunk = [ astart, a2, bstart, b2, delta ]
222
222
223 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
223 delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
224 delta[len(delta):] = [ '-' + x for x in old ]
224 delta[len(delta):] = [ '-' + x for x in old ]
225 delta[len(delta):] = [ '+' + x for x in new ]
225 delta[len(delta):] = [ '+' + x for x in new ]
226
226
227 if hunk:
227 if hunk:
228 for x in yieldhunk(hunk, header):
228 for x in yieldhunk(hunk, header):
229 yield x
229 yield x
230
230
231 def patchtext(bin):
231 def patchtext(bin):
232 pos = 0
232 pos = 0
233 t = []
233 t = []
234 while pos < len(bin):
234 while pos < len(bin):
235 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
235 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
236 pos += 12
236 pos += 12
237 t.append(bin[pos:pos + l])
237 t.append(bin[pos:pos + l])
238 pos += l
238 pos += l
239 return "".join(t)
239 return "".join(t)
240
240
241 def patch(a, bin):
241 def patch(a, bin):
242 return mpatch.patches(a, [bin])
242 return mpatch.patches(a, [bin])
243
243
244 # similar to difflib.SequenceMatcher.get_matching_blocks
244 # similar to difflib.SequenceMatcher.get_matching_blocks
245 def get_matching_blocks(a, b):
245 def get_matching_blocks(a, b):
246 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
246 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
247
247
248 def trivialdiffheader(length):
249 return struct.pack(">lll", 0, 0, length)
250
248 patches = mpatch.patches
251 patches = mpatch.patches
249 patchedsize = mpatch.patchedsize
252 patchedsize = mpatch.patchedsize
250 textdiff = bdiff.bdiff
253 textdiff = bdiff.bdiff
@@ -1,1270 +1,1274 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22 _sha = sha.new
22 _sha = sha.new
23
23
24 # revlog flags
24 # revlog flags
25 REVLOGV0 = 0
25 REVLOGV0 = 0
26 REVLOGNG = 1
26 REVLOGNG = 1
27 REVLOGNGINLINEDATA = (1 << 16)
27 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31
31
32 class RevlogError(Exception):
32 class RevlogError(Exception):
33 pass
33 pass
34 class LookupError(RevlogError):
34 class LookupError(RevlogError):
35 pass
35 pass
36
36
37 def getoffset(q):
37 def getoffset(q):
38 return int(q >> 16)
38 return int(q >> 16)
39
39
40 def gettype(q):
40 def gettype(q):
41 return int(q & 0xFFFF)
41 return int(q & 0xFFFF)
42
42
43 def offset_type(offset, type):
43 def offset_type(offset, type):
44 return long(long(offset) << 16 | type)
44 return long(long(offset) << 16 | type)
45
45
46 def hash(text, p1, p2):
46 def hash(text, p1, p2):
47 """generate a hash from the given text and its parent hashes
47 """generate a hash from the given text and its parent hashes
48
48
49 This hash combines both the current file contents and its history
49 This hash combines both the current file contents and its history
50 in a manner that makes it easy to distinguish nodes with the same
50 in a manner that makes it easy to distinguish nodes with the same
51 content in the revision graph.
51 content in the revision graph.
52 """
52 """
53 l = [p1, p2]
53 l = [p1, p2]
54 l.sort()
54 l.sort()
55 s = _sha(l[0])
55 s = _sha(l[0])
56 s.update(l[1])
56 s.update(l[1])
57 s.update(text)
57 s.update(text)
58 return s.digest()
58 return s.digest()
59
59
60 def compress(text):
60 def compress(text):
61 """ generate a possibly-compressed representation of text """
61 """ generate a possibly-compressed representation of text """
62 if not text:
62 if not text:
63 return ("", text)
63 return ("", text)
64 if len(text) < 44:
64 if len(text) < 44:
65 if text[0] == '\0':
65 if text[0] == '\0':
66 return ("", text)
66 return ("", text)
67 return ('u', text)
67 return ('u', text)
68 bin = _compress(text)
68 bin = _compress(text)
69 if len(bin) > len(text):
69 if len(bin) > len(text):
70 if text[0] == '\0':
70 if text[0] == '\0':
71 return ("", text)
71 return ("", text)
72 return ('u', text)
72 return ('u', text)
73 return ("", bin)
73 return ("", bin)
74
74
75 def decompress(bin):
75 def decompress(bin):
76 """ decompress the given input """
76 """ decompress the given input """
77 if not bin:
77 if not bin:
78 return bin
78 return bin
79 t = bin[0]
79 t = bin[0]
80 if t == '\0':
80 if t == '\0':
81 return bin
81 return bin
82 if t == 'x':
82 if t == 'x':
83 return _decompress(bin)
83 return _decompress(bin)
84 if t == 'u':
84 if t == 'u':
85 return bin[1:]
85 return bin[1:]
86 raise RevlogError(_("unknown compression type %r") % t)
86 raise RevlogError(_("unknown compression type %r") % t)
87
87
88 class lazyparser(object):
88 class lazyparser(object):
89 """
89 """
90 this class avoids the need to parse the entirety of large indices
90 this class avoids the need to parse the entirety of large indices
91 """
91 """
92
92
93 # lazyparser is not safe to use on windows if win32 extensions not
93 # lazyparser is not safe to use on windows if win32 extensions not
94 # available. it keeps file handle open, which make it not possible
94 # available. it keeps file handle open, which make it not possible
95 # to break hardlinks on local cloned repos.
95 # to break hardlinks on local cloned repos.
96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
97 hasattr(util, 'win32api'))
97 hasattr(util, 'win32api'))
98
98
99 def __init__(self, dataf, size):
99 def __init__(self, dataf, size):
100 self.dataf = dataf
100 self.dataf = dataf
101 self.s = struct.calcsize(indexformatng)
101 self.s = struct.calcsize(indexformatng)
102 self.datasize = size
102 self.datasize = size
103 self.l = size/self.s
103 self.l = size/self.s
104 self.index = [None] * self.l
104 self.index = [None] * self.l
105 self.map = {nullid: nullrev}
105 self.map = {nullid: nullrev}
106 self.allmap = 0
106 self.allmap = 0
107 self.all = 0
107 self.all = 0
108 self.mapfind_count = 0
108 self.mapfind_count = 0
109
109
110 def loadmap(self):
110 def loadmap(self):
111 """
111 """
112 during a commit, we need to make sure the rev being added is
112 during a commit, we need to make sure the rev being added is
113 not a duplicate. This requires loading the entire index,
113 not a duplicate. This requires loading the entire index,
114 which is fairly slow. loadmap can load up just the node map,
114 which is fairly slow. loadmap can load up just the node map,
115 which takes much less time.
115 which takes much less time.
116 """
116 """
117 if self.allmap:
117 if self.allmap:
118 return
118 return
119 end = self.datasize
119 end = self.datasize
120 self.allmap = 1
120 self.allmap = 1
121 cur = 0
121 cur = 0
122 count = 0
122 count = 0
123 blocksize = self.s * 256
123 blocksize = self.s * 256
124 self.dataf.seek(0)
124 self.dataf.seek(0)
125 while cur < end:
125 while cur < end:
126 data = self.dataf.read(blocksize)
126 data = self.dataf.read(blocksize)
127 off = 0
127 off = 0
128 for x in xrange(256):
128 for x in xrange(256):
129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
130 self.map[n] = count
130 self.map[n] = count
131 count += 1
131 count += 1
132 if count >= self.l:
132 if count >= self.l:
133 break
133 break
134 off += self.s
134 off += self.s
135 cur += blocksize
135 cur += blocksize
136
136
137 def loadblock(self, blockstart, blocksize, data=None):
137 def loadblock(self, blockstart, blocksize, data=None):
138 if self.all:
138 if self.all:
139 return
139 return
140 if data is None:
140 if data is None:
141 self.dataf.seek(blockstart)
141 self.dataf.seek(blockstart)
142 if blockstart + blocksize > self.datasize:
142 if blockstart + blocksize > self.datasize:
143 # the revlog may have grown since we've started running,
143 # the revlog may have grown since we've started running,
144 # but we don't have space in self.index for more entries.
144 # but we don't have space in self.index for more entries.
145 # limit blocksize so that we don't get too much data.
145 # limit blocksize so that we don't get too much data.
146 blocksize = max(self.datasize - blockstart, 0)
146 blocksize = max(self.datasize - blockstart, 0)
147 data = self.dataf.read(blocksize)
147 data = self.dataf.read(blocksize)
148 lend = len(data) / self.s
148 lend = len(data) / self.s
149 i = blockstart / self.s
149 i = blockstart / self.s
150 off = 0
150 off = 0
151 # lazyindex supports __delitem__
151 # lazyindex supports __delitem__
152 if lend > len(self.index) - i:
152 if lend > len(self.index) - i:
153 lend = len(self.index) - i
153 lend = len(self.index) - i
154 for x in xrange(lend):
154 for x in xrange(lend):
155 if self.index[i + x] == None:
155 if self.index[i + x] == None:
156 b = data[off : off + self.s]
156 b = data[off : off + self.s]
157 self.index[i + x] = b
157 self.index[i + x] = b
158 n = b[ngshaoffset:ngshaoffset + 20]
158 n = b[ngshaoffset:ngshaoffset + 20]
159 self.map[n] = i + x
159 self.map[n] = i + x
160 off += self.s
160 off += self.s
161
161
162 def findnode(self, node):
162 def findnode(self, node):
163 """search backwards through the index file for a specific node"""
163 """search backwards through the index file for a specific node"""
164 if self.allmap:
164 if self.allmap:
165 return None
165 return None
166
166
167 # hg log will cause many many searches for the manifest
167 # hg log will cause many many searches for the manifest
168 # nodes. After we get called a few times, just load the whole
168 # nodes. After we get called a few times, just load the whole
169 # thing.
169 # thing.
170 if self.mapfind_count > 8:
170 if self.mapfind_count > 8:
171 self.loadmap()
171 self.loadmap()
172 if node in self.map:
172 if node in self.map:
173 return node
173 return node
174 return None
174 return None
175 self.mapfind_count += 1
175 self.mapfind_count += 1
176 last = self.l - 1
176 last = self.l - 1
177 while self.index[last] != None:
177 while self.index[last] != None:
178 if last == 0:
178 if last == 0:
179 self.all = 1
179 self.all = 1
180 self.allmap = 1
180 self.allmap = 1
181 return None
181 return None
182 last -= 1
182 last -= 1
183 end = (last + 1) * self.s
183 end = (last + 1) * self.s
184 blocksize = self.s * 256
184 blocksize = self.s * 256
185 while end >= 0:
185 while end >= 0:
186 start = max(end - blocksize, 0)
186 start = max(end - blocksize, 0)
187 self.dataf.seek(start)
187 self.dataf.seek(start)
188 data = self.dataf.read(end - start)
188 data = self.dataf.read(end - start)
189 findend = end - start
189 findend = end - start
190 while True:
190 while True:
191 # we're searching backwards, so we have to make sure
191 # we're searching backwards, so we have to make sure
192 # we don't find a changeset where this node is a parent
192 # we don't find a changeset where this node is a parent
193 off = data.find(node, 0, findend)
193 off = data.find(node, 0, findend)
194 findend = off
194 findend = off
195 if off >= 0:
195 if off >= 0:
196 i = off / self.s
196 i = off / self.s
197 off = i * self.s
197 off = i * self.s
198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
199 if n == node:
199 if n == node:
200 self.map[n] = i + start / self.s
200 self.map[n] = i + start / self.s
201 return node
201 return node
202 else:
202 else:
203 break
203 break
204 end -= blocksize
204 end -= blocksize
205 return None
205 return None
206
206
207 def loadindex(self, i=None, end=None):
207 def loadindex(self, i=None, end=None):
208 if self.all:
208 if self.all:
209 return
209 return
210 all = False
210 all = False
211 if i == None:
211 if i == None:
212 blockstart = 0
212 blockstart = 0
213 blocksize = (65536 / self.s) * self.s
213 blocksize = (65536 / self.s) * self.s
214 end = self.datasize
214 end = self.datasize
215 all = True
215 all = True
216 else:
216 else:
217 if end:
217 if end:
218 blockstart = i * self.s
218 blockstart = i * self.s
219 end = end * self.s
219 end = end * self.s
220 blocksize = end - blockstart
220 blocksize = end - blockstart
221 else:
221 else:
222 blockstart = (i & ~1023) * self.s
222 blockstart = (i & ~1023) * self.s
223 blocksize = self.s * 1024
223 blocksize = self.s * 1024
224 end = blockstart + blocksize
224 end = blockstart + blocksize
225 while blockstart < end:
225 while blockstart < end:
226 self.loadblock(blockstart, blocksize)
226 self.loadblock(blockstart, blocksize)
227 blockstart += blocksize
227 blockstart += blocksize
228 if all:
228 if all:
229 self.all = True
229 self.all = True
230
230
231 class lazyindex(object):
231 class lazyindex(object):
232 """a lazy version of the index array"""
232 """a lazy version of the index array"""
233 def __init__(self, parser):
233 def __init__(self, parser):
234 self.p = parser
234 self.p = parser
235 def __len__(self):
235 def __len__(self):
236 return len(self.p.index)
236 return len(self.p.index)
237 def load(self, pos):
237 def load(self, pos):
238 if pos < 0:
238 if pos < 0:
239 pos += len(self.p.index)
239 pos += len(self.p.index)
240 self.p.loadindex(pos)
240 self.p.loadindex(pos)
241 return self.p.index[pos]
241 return self.p.index[pos]
242 def __getitem__(self, pos):
242 def __getitem__(self, pos):
243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
244 def __setitem__(self, pos, item):
244 def __setitem__(self, pos, item):
245 self.p.index[pos] = _pack(indexformatng, *item)
245 self.p.index[pos] = _pack(indexformatng, *item)
246 def __delitem__(self, pos):
246 def __delitem__(self, pos):
247 del self.p.index[pos]
247 del self.p.index[pos]
248 def insert(self, pos, e):
248 def insert(self, pos, e):
249 self.p.index.insert(pos, _pack(indexformatng, *e))
249 self.p.index.insert(pos, _pack(indexformatng, *e))
250 def append(self, e):
250 def append(self, e):
251 self.p.index.append(_pack(indexformatng, *e))
251 self.p.index.append(_pack(indexformatng, *e))
252
252
253 class lazymap(object):
253 class lazymap(object):
254 """a lazy version of the node map"""
254 """a lazy version of the node map"""
255 def __init__(self, parser):
255 def __init__(self, parser):
256 self.p = parser
256 self.p = parser
257 def load(self, key):
257 def load(self, key):
258 n = self.p.findnode(key)
258 n = self.p.findnode(key)
259 if n == None:
259 if n == None:
260 raise KeyError(key)
260 raise KeyError(key)
261 def __contains__(self, key):
261 def __contains__(self, key):
262 if key in self.p.map:
262 if key in self.p.map:
263 return True
263 return True
264 self.p.loadmap()
264 self.p.loadmap()
265 return key in self.p.map
265 return key in self.p.map
266 def __iter__(self):
266 def __iter__(self):
267 yield nullid
267 yield nullid
268 for i in xrange(self.p.l):
268 for i in xrange(self.p.l):
269 ret = self.p.index[i]
269 ret = self.p.index[i]
270 if not ret:
270 if not ret:
271 self.p.loadindex(i)
271 self.p.loadindex(i)
272 ret = self.p.index[i]
272 ret = self.p.index[i]
273 if isinstance(ret, str):
273 if isinstance(ret, str):
274 ret = _unpack(indexformatng, ret)
274 ret = _unpack(indexformatng, ret)
275 yield ret[7]
275 yield ret[7]
276 def __getitem__(self, key):
276 def __getitem__(self, key):
277 try:
277 try:
278 return self.p.map[key]
278 return self.p.map[key]
279 except KeyError:
279 except KeyError:
280 try:
280 try:
281 self.load(key)
281 self.load(key)
282 return self.p.map[key]
282 return self.p.map[key]
283 except KeyError:
283 except KeyError:
284 raise KeyError("node " + hex(key))
284 raise KeyError("node " + hex(key))
285 def __setitem__(self, key, val):
285 def __setitem__(self, key, val):
286 self.p.map[key] = val
286 self.p.map[key] = val
287 def __delitem__(self, key):
287 def __delitem__(self, key):
288 del self.p.map[key]
288 del self.p.map[key]
289
289
290 indexformatv0 = ">4l20s20s20s"
290 indexformatv0 = ">4l20s20s20s"
291 v0shaoffset = 56
291 v0shaoffset = 56
292
292
293 class revlogoldio(object):
293 class revlogoldio(object):
294 def __init__(self):
294 def __init__(self):
295 self.size = struct.calcsize(indexformatv0)
295 self.size = struct.calcsize(indexformatv0)
296
296
297 def parseindex(self, fp, inline):
297 def parseindex(self, fp, inline):
298 s = self.size
298 s = self.size
299 index = []
299 index = []
300 nodemap = {nullid: nullrev}
300 nodemap = {nullid: nullrev}
301 n = off = 0
301 n = off = 0
302 data = fp.read()
302 data = fp.read()
303 l = len(data)
303 l = len(data)
304 while off + s <= l:
304 while off + s <= l:
305 cur = data[off:off + s]
305 cur = data[off:off + s]
306 off += s
306 off += s
307 e = _unpack(indexformatv0, cur)
307 e = _unpack(indexformatv0, cur)
308 # transform to revlogv1 format
308 # transform to revlogv1 format
309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 nodemap[e[4]], nodemap[e[5]], e[6])
310 nodemap[e[4]], nodemap[e[5]], e[6])
311 index.append(e2)
311 index.append(e2)
312 nodemap[e[6]] = n
312 nodemap[e[6]] = n
313 n += 1
313 n += 1
314
314
315 return index, nodemap, None
315 return index, nodemap, None
316
316
317 def packentry(self, entry, node, version, rev):
317 def packentry(self, entry, node, version, rev):
318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
319 node(entry[5]), node(entry[6]), entry[7])
319 node(entry[5]), node(entry[6]), entry[7])
320 return _pack(indexformatv0, *e2)
320 return _pack(indexformatv0, *e2)
321
321
322 # index ng:
322 # index ng:
323 # 6 bytes offset
323 # 6 bytes offset
324 # 2 bytes flags
324 # 2 bytes flags
325 # 4 bytes compressed length
325 # 4 bytes compressed length
326 # 4 bytes uncompressed length
326 # 4 bytes uncompressed length
327 # 4 bytes: base rev
327 # 4 bytes: base rev
328 # 4 bytes link rev
328 # 4 bytes link rev
329 # 4 bytes parent 1 rev
329 # 4 bytes parent 1 rev
330 # 4 bytes parent 2 rev
330 # 4 bytes parent 2 rev
331 # 32 bytes: nodeid
331 # 32 bytes: nodeid
332 indexformatng = ">Qiiiiii20s12x"
332 indexformatng = ">Qiiiiii20s12x"
333 ngshaoffset = 32
333 ngshaoffset = 32
334 versionformat = ">I"
334 versionformat = ">I"
335
335
336 class revlogio(object):
336 class revlogio(object):
337 def __init__(self):
337 def __init__(self):
338 self.size = struct.calcsize(indexformatng)
338 self.size = struct.calcsize(indexformatng)
339
339
340 def parseindex(self, fp, inline):
340 def parseindex(self, fp, inline):
341 try:
341 try:
342 size = util.fstat(fp).st_size
342 size = util.fstat(fp).st_size
343 except AttributeError:
343 except AttributeError:
344 size = 0
344 size = 0
345
345
346 if lazyparser.safe_to_use and not inline and size > 1000000:
346 if lazyparser.safe_to_use and not inline and size > 1000000:
347 # big index, let's parse it on demand
347 # big index, let's parse it on demand
348 parser = lazyparser(fp, size)
348 parser = lazyparser(fp, size)
349 index = lazyindex(parser)
349 index = lazyindex(parser)
350 nodemap = lazymap(parser)
350 nodemap = lazymap(parser)
351 e = list(index[0])
351 e = list(index[0])
352 type = gettype(e[0])
352 type = gettype(e[0])
353 e[0] = offset_type(0, type)
353 e[0] = offset_type(0, type)
354 index[0] = e
354 index[0] = e
355 return index, nodemap, None
355 return index, nodemap, None
356
356
357 s = self.size
357 s = self.size
358 cache = None
358 cache = None
359 index = []
359 index = []
360 nodemap = {nullid: nullrev}
360 nodemap = {nullid: nullrev}
361 n = off = 0
361 n = off = 0
362 # if we're not using lazymap, always read the whole index
362 # if we're not using lazymap, always read the whole index
363 data = fp.read()
363 data = fp.read()
364 l = len(data) - s
364 l = len(data) - s
365 append = index.append
365 append = index.append
366 if inline:
366 if inline:
367 cache = (0, data)
367 cache = (0, data)
368 while off <= l:
368 while off <= l:
369 e = _unpack(indexformatng, data[off:off + s])
369 e = _unpack(indexformatng, data[off:off + s])
370 nodemap[e[7]] = n
370 nodemap[e[7]] = n
371 append(e)
371 append(e)
372 n += 1
372 n += 1
373 if e[1] < 0:
373 if e[1] < 0:
374 break
374 break
375 off += e[1] + s
375 off += e[1] + s
376 else:
376 else:
377 while off <= l:
377 while off <= l:
378 e = _unpack(indexformatng, data[off:off + s])
378 e = _unpack(indexformatng, data[off:off + s])
379 nodemap[e[7]] = n
379 nodemap[e[7]] = n
380 append(e)
380 append(e)
381 n += 1
381 n += 1
382 off += s
382 off += s
383
383
384 e = list(index[0])
384 e = list(index[0])
385 type = gettype(e[0])
385 type = gettype(e[0])
386 e[0] = offset_type(0, type)
386 e[0] = offset_type(0, type)
387 index[0] = e
387 index[0] = e
388
388
389 return index, nodemap, cache
389 return index, nodemap, cache
390
390
391 def packentry(self, entry, node, version, rev):
391 def packentry(self, entry, node, version, rev):
392 p = _pack(indexformatng, *entry)
392 p = _pack(indexformatng, *entry)
393 if rev == 0:
393 if rev == 0:
394 p = _pack(versionformat, version) + p[4:]
394 p = _pack(versionformat, version) + p[4:]
395 return p
395 return p
396
396
397 class revlog(object):
397 class revlog(object):
398 """
398 """
399 the underlying revision storage object
399 the underlying revision storage object
400
400
401 A revlog consists of two parts, an index and the revision data.
401 A revlog consists of two parts, an index and the revision data.
402
402
403 The index is a file with a fixed record size containing
403 The index is a file with a fixed record size containing
404 information on each revision, includings its nodeid (hash), the
404 information on each revision, includings its nodeid (hash), the
405 nodeids of its parents, the position and offset of its data within
405 nodeids of its parents, the position and offset of its data within
406 the data file, and the revision it's based on. Finally, each entry
406 the data file, and the revision it's based on. Finally, each entry
407 contains a linkrev entry that can serve as a pointer to external
407 contains a linkrev entry that can serve as a pointer to external
408 data.
408 data.
409
409
410 The revision data itself is a linear collection of data chunks.
410 The revision data itself is a linear collection of data chunks.
411 Each chunk represents a revision and is usually represented as a
411 Each chunk represents a revision and is usually represented as a
412 delta against the previous chunk. To bound lookup time, runs of
412 delta against the previous chunk. To bound lookup time, runs of
413 deltas are limited to about 2 times the length of the original
413 deltas are limited to about 2 times the length of the original
414 version data. This makes retrieval of a version proportional to
414 version data. This makes retrieval of a version proportional to
415 its size, or O(1) relative to the number of revisions.
415 its size, or O(1) relative to the number of revisions.
416
416
417 Both pieces of the revlog are written to in an append-only
417 Both pieces of the revlog are written to in an append-only
418 fashion, which means we never need to rewrite a file to insert or
418 fashion, which means we never need to rewrite a file to insert or
419 remove data, and can use some simple techniques to avoid the need
419 remove data, and can use some simple techniques to avoid the need
420 for locking while reading.
420 for locking while reading.
421 """
421 """
422 def __init__(self, opener, indexfile):
422 def __init__(self, opener, indexfile):
423 """
423 """
424 create a revlog object
424 create a revlog object
425
425
426 opener is a function that abstracts the file opening operation
426 opener is a function that abstracts the file opening operation
427 and can be used to implement COW semantics or the like.
427 and can be used to implement COW semantics or the like.
428 """
428 """
429 self.indexfile = indexfile
429 self.indexfile = indexfile
430 self.datafile = indexfile[:-2] + ".d"
430 self.datafile = indexfile[:-2] + ".d"
431 self.opener = opener
431 self.opener = opener
432 self._cache = None
432 self._cache = None
433 self._chunkcache = None
433 self._chunkcache = None
434 self.nodemap = {nullid: nullrev}
434 self.nodemap = {nullid: nullrev}
435 self.index = []
435 self.index = []
436
436
437 v = REVLOG_DEFAULT_VERSION
437 v = REVLOG_DEFAULT_VERSION
438 if hasattr(opener, "defversion"):
438 if hasattr(opener, "defversion"):
439 v = opener.defversion
439 v = opener.defversion
440 if v & REVLOGNG:
440 if v & REVLOGNG:
441 v |= REVLOGNGINLINEDATA
441 v |= REVLOGNGINLINEDATA
442
442
443 i = ""
443 i = ""
444 try:
444 try:
445 f = self.opener(self.indexfile)
445 f = self.opener(self.indexfile)
446 i = f.read(4)
446 i = f.read(4)
447 f.seek(0)
447 f.seek(0)
448 if len(i) > 0:
448 if len(i) > 0:
449 v = struct.unpack(versionformat, i)[0]
449 v = struct.unpack(versionformat, i)[0]
450 except IOError, inst:
450 except IOError, inst:
451 if inst.errno != errno.ENOENT:
451 if inst.errno != errno.ENOENT:
452 raise
452 raise
453
453
454 self.version = v
454 self.version = v
455 self._inline = v & REVLOGNGINLINEDATA
455 self._inline = v & REVLOGNGINLINEDATA
456 flags = v & ~0xFFFF
456 flags = v & ~0xFFFF
457 fmt = v & 0xFFFF
457 fmt = v & 0xFFFF
458 if fmt == REVLOGV0 and flags:
458 if fmt == REVLOGV0 and flags:
459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 % (self.indexfile, flags >> 16))
460 % (self.indexfile, flags >> 16))
461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 % (self.indexfile, flags >> 16))
463 % (self.indexfile, flags >> 16))
464 elif fmt > REVLOGNG:
464 elif fmt > REVLOGNG:
465 raise RevlogError(_("index %s unknown format %d")
465 raise RevlogError(_("index %s unknown format %d")
466 % (self.indexfile, fmt))
466 % (self.indexfile, fmt))
467
467
468 self._io = revlogio()
468 self._io = revlogio()
469 if self.version == REVLOGV0:
469 if self.version == REVLOGV0:
470 self._io = revlogoldio()
470 self._io = revlogoldio()
471 if i:
471 if i:
472 d = self._io.parseindex(f, self._inline)
472 d = self._io.parseindex(f, self._inline)
473 self.index, self.nodemap, self._chunkcache = d
473 self.index, self.nodemap, self._chunkcache = d
474
474
475 # add the magic null revision at -1
475 # add the magic null revision at -1
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477
477
478 def _loadindex(self, start, end):
478 def _loadindex(self, start, end):
479 """load a block of indexes all at once from the lazy parser"""
479 """load a block of indexes all at once from the lazy parser"""
480 if isinstance(self.index, lazyindex):
480 if isinstance(self.index, lazyindex):
481 self.index.p.loadindex(start, end)
481 self.index.p.loadindex(start, end)
482
482
483 def _loadindexmap(self):
483 def _loadindexmap(self):
484 """loads both the map and the index from the lazy parser"""
484 """loads both the map and the index from the lazy parser"""
485 if isinstance(self.index, lazyindex):
485 if isinstance(self.index, lazyindex):
486 p = self.index.p
486 p = self.index.p
487 p.loadindex()
487 p.loadindex()
488 self.nodemap = p.map
488 self.nodemap = p.map
489
489
490 def _loadmap(self):
490 def _loadmap(self):
491 """loads the map from the lazy parser"""
491 """loads the map from the lazy parser"""
492 if isinstance(self.nodemap, lazymap):
492 if isinstance(self.nodemap, lazymap):
493 self.nodemap.p.loadmap()
493 self.nodemap.p.loadmap()
494 self.nodemap = self.nodemap.p.map
494 self.nodemap = self.nodemap.p.map
495
495
496 def tip(self):
496 def tip(self):
497 return self.node(len(self.index) - 2)
497 return self.node(len(self.index) - 2)
498 def count(self):
498 def count(self):
499 return len(self.index) - 1
499 return len(self.index) - 1
500
500
501 def rev(self, node):
501 def rev(self, node):
502 try:
502 try:
503 return self.nodemap[node]
503 return self.nodemap[node]
504 except KeyError:
504 except KeyError:
505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 def node(self, rev):
506 def node(self, rev):
507 return self.index[rev][7]
507 return self.index[rev][7]
508 def linkrev(self, node):
508 def linkrev(self, node):
509 return self.index[self.rev(node)][4]
509 return self.index[self.rev(node)][4]
510 def parents(self, node):
510 def parents(self, node):
511 d = self.index[self.rev(node)][5:7]
511 d = self.index[self.rev(node)][5:7]
512 return (self.node(d[0]), self.node(d[1]))
512 return (self.node(d[0]), self.node(d[1]))
513 def parentrevs(self, rev):
513 def parentrevs(self, rev):
514 return self.index[rev][5:7]
514 return self.index[rev][5:7]
515 def start(self, rev):
515 def start(self, rev):
516 return int(self.index[rev][0] >> 16)
516 return int(self.index[rev][0] >> 16)
517 def end(self, rev):
517 def end(self, rev):
518 return self.start(rev) + self.length(rev)
518 return self.start(rev) + self.length(rev)
519 def length(self, rev):
519 def length(self, rev):
520 return self.index[rev][1]
520 return self.index[rev][1]
521 def base(self, rev):
521 def base(self, rev):
522 return self.index[rev][3]
522 return self.index[rev][3]
523
523
524 def size(self, rev):
524 def size(self, rev):
525 """return the length of the uncompressed text for a given revision"""
525 """return the length of the uncompressed text for a given revision"""
526 l = self.index[rev][2]
526 l = self.index[rev][2]
527 if l >= 0:
527 if l >= 0:
528 return l
528 return l
529
529
530 t = self.revision(self.node(rev))
530 t = self.revision(self.node(rev))
531 return len(t)
531 return len(t)
532
532
533 # alternate implementation, The advantage to this code is it
533 # alternate implementation, The advantage to this code is it
534 # will be faster for a single revision. But, the results are not
534 # will be faster for a single revision. But, the results are not
535 # cached, so finding the size of every revision will be slower.
535 # cached, so finding the size of every revision will be slower.
536 """
536 """
537 if self.cache and self.cache[1] == rev:
537 if self.cache and self.cache[1] == rev:
538 return len(self.cache[2])
538 return len(self.cache[2])
539
539
540 base = self.base(rev)
540 base = self.base(rev)
541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 base = self.cache[1]
542 base = self.cache[1]
543 text = self.cache[2]
543 text = self.cache[2]
544 else:
544 else:
545 text = self.revision(self.node(base))
545 text = self.revision(self.node(base))
546
546
547 l = len(text)
547 l = len(text)
548 for x in xrange(base + 1, rev + 1):
548 for x in xrange(base + 1, rev + 1):
549 l = mdiff.patchedsize(l, self.chunk(x))
549 l = mdiff.patchedsize(l, self.chunk(x))
550 return l
550 return l
551 """
551 """
552
552
553 def reachable(self, node, stop=None):
553 def reachable(self, node, stop=None):
554 """return a hash of all nodes ancestral to a given node, including
554 """return a hash of all nodes ancestral to a given node, including
555 the node itself, stopping when stop is matched"""
555 the node itself, stopping when stop is matched"""
556 reachable = {}
556 reachable = {}
557 visit = [node]
557 visit = [node]
558 reachable[node] = 1
558 reachable[node] = 1
559 if stop:
559 if stop:
560 stopn = self.rev(stop)
560 stopn = self.rev(stop)
561 else:
561 else:
562 stopn = 0
562 stopn = 0
563 while visit:
563 while visit:
564 n = visit.pop(0)
564 n = visit.pop(0)
565 if n == stop:
565 if n == stop:
566 continue
566 continue
567 if n == nullid:
567 if n == nullid:
568 continue
568 continue
569 for p in self.parents(n):
569 for p in self.parents(n):
570 if self.rev(p) < stopn:
570 if self.rev(p) < stopn:
571 continue
571 continue
572 if p not in reachable:
572 if p not in reachable:
573 reachable[p] = 1
573 reachable[p] = 1
574 visit.append(p)
574 visit.append(p)
575 return reachable
575 return reachable
576
576
577 def nodesbetween(self, roots=None, heads=None):
577 def nodesbetween(self, roots=None, heads=None):
578 """Return a tuple containing three elements. Elements 1 and 2 contain
578 """Return a tuple containing three elements. Elements 1 and 2 contain
579 a final list bases and heads after all the unreachable ones have been
579 a final list bases and heads after all the unreachable ones have been
580 pruned. Element 0 contains a topologically sorted list of all
580 pruned. Element 0 contains a topologically sorted list of all
581
581
582 nodes that satisfy these constraints:
582 nodes that satisfy these constraints:
583 1. All nodes must be descended from a node in roots (the nodes on
583 1. All nodes must be descended from a node in roots (the nodes on
584 roots are considered descended from themselves).
584 roots are considered descended from themselves).
585 2. All nodes must also be ancestors of a node in heads (the nodes in
585 2. All nodes must also be ancestors of a node in heads (the nodes in
586 heads are considered to be their own ancestors).
586 heads are considered to be their own ancestors).
587
587
588 If roots is unspecified, nullid is assumed as the only root.
588 If roots is unspecified, nullid is assumed as the only root.
589 If heads is unspecified, it is taken to be the output of the
589 If heads is unspecified, it is taken to be the output of the
590 heads method (i.e. a list of all nodes in the repository that
590 heads method (i.e. a list of all nodes in the repository that
591 have no children)."""
591 have no children)."""
592 nonodes = ([], [], [])
592 nonodes = ([], [], [])
593 if roots is not None:
593 if roots is not None:
594 roots = list(roots)
594 roots = list(roots)
595 if not roots:
595 if not roots:
596 return nonodes
596 return nonodes
597 lowestrev = min([self.rev(n) for n in roots])
597 lowestrev = min([self.rev(n) for n in roots])
598 else:
598 else:
599 roots = [nullid] # Everybody's a descendent of nullid
599 roots = [nullid] # Everybody's a descendent of nullid
600 lowestrev = nullrev
600 lowestrev = nullrev
601 if (lowestrev == nullrev) and (heads is None):
601 if (lowestrev == nullrev) and (heads is None):
602 # We want _all_ the nodes!
602 # We want _all_ the nodes!
603 return ([self.node(r) for r in xrange(0, self.count())],
603 return ([self.node(r) for r in xrange(0, self.count())],
604 [nullid], list(self.heads()))
604 [nullid], list(self.heads()))
605 if heads is None:
605 if heads is None:
606 # All nodes are ancestors, so the latest ancestor is the last
606 # All nodes are ancestors, so the latest ancestor is the last
607 # node.
607 # node.
608 highestrev = self.count() - 1
608 highestrev = self.count() - 1
609 # Set ancestors to None to signal that every node is an ancestor.
609 # Set ancestors to None to signal that every node is an ancestor.
610 ancestors = None
610 ancestors = None
611 # Set heads to an empty dictionary for later discovery of heads
611 # Set heads to an empty dictionary for later discovery of heads
612 heads = {}
612 heads = {}
613 else:
613 else:
614 heads = list(heads)
614 heads = list(heads)
615 if not heads:
615 if not heads:
616 return nonodes
616 return nonodes
617 ancestors = {}
617 ancestors = {}
618 # Turn heads into a dictionary so we can remove 'fake' heads.
618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Also, later we will be using it to filter out the heads we can't
619 # Also, later we will be using it to filter out the heads we can't
620 # find from roots.
620 # find from roots.
621 heads = dict.fromkeys(heads, 0)
621 heads = dict.fromkeys(heads, 0)
622 # Start at the top and keep marking parents until we're done.
622 # Start at the top and keep marking parents until we're done.
623 nodestotag = heads.keys()
623 nodestotag = heads.keys()
624 # Remember where the top was so we can use it as a limit later.
624 # Remember where the top was so we can use it as a limit later.
625 highestrev = max([self.rev(n) for n in nodestotag])
625 highestrev = max([self.rev(n) for n in nodestotag])
626 while nodestotag:
626 while nodestotag:
627 # grab a node to tag
627 # grab a node to tag
628 n = nodestotag.pop()
628 n = nodestotag.pop()
629 # Never tag nullid
629 # Never tag nullid
630 if n == nullid:
630 if n == nullid:
631 continue
631 continue
632 # A node's revision number represents its place in a
632 # A node's revision number represents its place in a
633 # topologically sorted list of nodes.
633 # topologically sorted list of nodes.
634 r = self.rev(n)
634 r = self.rev(n)
635 if r >= lowestrev:
635 if r >= lowestrev:
636 if n not in ancestors:
636 if n not in ancestors:
637 # If we are possibly a descendent of one of the roots
637 # If we are possibly a descendent of one of the roots
638 # and we haven't already been marked as an ancestor
638 # and we haven't already been marked as an ancestor
639 ancestors[n] = 1 # Mark as ancestor
639 ancestors[n] = 1 # Mark as ancestor
640 # Add non-nullid parents to list of nodes to tag.
640 # Add non-nullid parents to list of nodes to tag.
641 nodestotag.extend([p for p in self.parents(n) if
641 nodestotag.extend([p for p in self.parents(n) if
642 p != nullid])
642 p != nullid])
643 elif n in heads: # We've seen it before, is it a fake head?
643 elif n in heads: # We've seen it before, is it a fake head?
644 # So it is, real heads should not be the ancestors of
644 # So it is, real heads should not be the ancestors of
645 # any other heads.
645 # any other heads.
646 heads.pop(n)
646 heads.pop(n)
647 if not ancestors:
647 if not ancestors:
648 return nonodes
648 return nonodes
649 # Now that we have our set of ancestors, we want to remove any
649 # Now that we have our set of ancestors, we want to remove any
650 # roots that are not ancestors.
650 # roots that are not ancestors.
651
651
652 # If one of the roots was nullid, everything is included anyway.
652 # If one of the roots was nullid, everything is included anyway.
653 if lowestrev > nullrev:
653 if lowestrev > nullrev:
654 # But, since we weren't, let's recompute the lowest rev to not
654 # But, since we weren't, let's recompute the lowest rev to not
655 # include roots that aren't ancestors.
655 # include roots that aren't ancestors.
656
656
657 # Filter out roots that aren't ancestors of heads
657 # Filter out roots that aren't ancestors of heads
658 roots = [n for n in roots if n in ancestors]
658 roots = [n for n in roots if n in ancestors]
659 # Recompute the lowest revision
659 # Recompute the lowest revision
660 if roots:
660 if roots:
661 lowestrev = min([self.rev(n) for n in roots])
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
662 else:
663 # No more roots? Return empty list
663 # No more roots? Return empty list
664 return nonodes
664 return nonodes
665 else:
665 else:
666 # We are descending from nullid, and don't need to care about
666 # We are descending from nullid, and don't need to care about
667 # any other roots.
667 # any other roots.
668 lowestrev = nullrev
668 lowestrev = nullrev
669 roots = [nullid]
669 roots = [nullid]
670 # Transform our roots list into a 'set' (i.e. a dictionary where the
670 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 # values don't matter.
671 # values don't matter.
672 descendents = dict.fromkeys(roots, 1)
672 descendents = dict.fromkeys(roots, 1)
673 # Also, keep the original roots so we can filter out roots that aren't
673 # Also, keep the original roots so we can filter out roots that aren't
674 # 'real' roots (i.e. are descended from other roots).
674 # 'real' roots (i.e. are descended from other roots).
675 roots = descendents.copy()
675 roots = descendents.copy()
676 # Our topologically sorted list of output nodes.
676 # Our topologically sorted list of output nodes.
677 orderedout = []
677 orderedout = []
678 # Don't start at nullid since we don't want nullid in our output list,
678 # Don't start at nullid since we don't want nullid in our output list,
679 # and if nullid shows up in descedents, empty parents will look like
679 # and if nullid shows up in descedents, empty parents will look like
680 # they're descendents.
680 # they're descendents.
681 for r in xrange(max(lowestrev, 0), highestrev + 1):
681 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 n = self.node(r)
682 n = self.node(r)
683 isdescendent = False
683 isdescendent = False
684 if lowestrev == nullrev: # Everybody is a descendent of nullid
684 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 isdescendent = True
685 isdescendent = True
686 elif n in descendents:
686 elif n in descendents:
687 # n is already a descendent
687 # n is already a descendent
688 isdescendent = True
688 isdescendent = True
689 # This check only needs to be done here because all the roots
689 # This check only needs to be done here because all the roots
690 # will start being marked is descendents before the loop.
690 # will start being marked is descendents before the loop.
691 if n in roots:
691 if n in roots:
692 # If n was a root, check if it's a 'real' root.
692 # If n was a root, check if it's a 'real' root.
693 p = tuple(self.parents(n))
693 p = tuple(self.parents(n))
694 # If any of its parents are descendents, it's not a root.
694 # If any of its parents are descendents, it's not a root.
695 if (p[0] in descendents) or (p[1] in descendents):
695 if (p[0] in descendents) or (p[1] in descendents):
696 roots.pop(n)
696 roots.pop(n)
697 else:
697 else:
698 p = tuple(self.parents(n))
698 p = tuple(self.parents(n))
699 # A node is a descendent if either of its parents are
699 # A node is a descendent if either of its parents are
700 # descendents. (We seeded the dependents list with the roots
700 # descendents. (We seeded the dependents list with the roots
701 # up there, remember?)
701 # up there, remember?)
702 if (p[0] in descendents) or (p[1] in descendents):
702 if (p[0] in descendents) or (p[1] in descendents):
703 descendents[n] = 1
703 descendents[n] = 1
704 isdescendent = True
704 isdescendent = True
705 if isdescendent and ((ancestors is None) or (n in ancestors)):
705 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 # Only include nodes that are both descendents and ancestors.
706 # Only include nodes that are both descendents and ancestors.
707 orderedout.append(n)
707 orderedout.append(n)
708 if (ancestors is not None) and (n in heads):
708 if (ancestors is not None) and (n in heads):
709 # We're trying to figure out which heads are reachable
709 # We're trying to figure out which heads are reachable
710 # from roots.
710 # from roots.
711 # Mark this head as having been reached
711 # Mark this head as having been reached
712 heads[n] = 1
712 heads[n] = 1
713 elif ancestors is None:
713 elif ancestors is None:
714 # Otherwise, we're trying to discover the heads.
714 # Otherwise, we're trying to discover the heads.
715 # Assume this is a head because if it isn't, the next step
715 # Assume this is a head because if it isn't, the next step
716 # will eventually remove it.
716 # will eventually remove it.
717 heads[n] = 1
717 heads[n] = 1
718 # But, obviously its parents aren't.
718 # But, obviously its parents aren't.
719 for p in self.parents(n):
719 for p in self.parents(n):
720 heads.pop(p, None)
720 heads.pop(p, None)
721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 roots = roots.keys()
722 roots = roots.keys()
723 assert orderedout
723 assert orderedout
724 assert roots
724 assert roots
725 assert heads
725 assert heads
726 return (orderedout, roots, heads)
726 return (orderedout, roots, heads)
727
727
728 def heads(self, start=None, stop=None):
728 def heads(self, start=None, stop=None):
729 """return the list of all nodes that have no children
729 """return the list of all nodes that have no children
730
730
731 if start is specified, only heads that are descendants of
731 if start is specified, only heads that are descendants of
732 start will be returned
732 start will be returned
733 if stop is specified, it will consider all the revs from stop
733 if stop is specified, it will consider all the revs from stop
734 as if they had no children
734 as if they had no children
735 """
735 """
736 if start is None and stop is None:
736 if start is None and stop is None:
737 count = self.count()
737 count = self.count()
738 if not count:
738 if not count:
739 return [nullid]
739 return [nullid]
740 ishead = [1] * (count + 1)
740 ishead = [1] * (count + 1)
741 index = self.index
741 index = self.index
742 for r in xrange(count):
742 for r in xrange(count):
743 e = index[r]
743 e = index[r]
744 ishead[e[5]] = ishead[e[6]] = 0
744 ishead[e[5]] = ishead[e[6]] = 0
745 return [self.node(r) for r in xrange(count) if ishead[r]]
745 return [self.node(r) for r in xrange(count) if ishead[r]]
746
746
747 if start is None:
747 if start is None:
748 start = nullid
748 start = nullid
749 if stop is None:
749 if stop is None:
750 stop = []
750 stop = []
751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
752 startrev = self.rev(start)
752 startrev = self.rev(start)
753 reachable = {startrev: 1}
753 reachable = {startrev: 1}
754 heads = {startrev: 1}
754 heads = {startrev: 1}
755
755
756 parentrevs = self.parentrevs
756 parentrevs = self.parentrevs
757 for r in xrange(startrev + 1, self.count()):
757 for r in xrange(startrev + 1, self.count()):
758 for p in parentrevs(r):
758 for p in parentrevs(r):
759 if p in reachable:
759 if p in reachable:
760 if r not in stoprevs:
760 if r not in stoprevs:
761 reachable[r] = 1
761 reachable[r] = 1
762 heads[r] = 1
762 heads[r] = 1
763 if p in heads and p not in stoprevs:
763 if p in heads and p not in stoprevs:
764 del heads[p]
764 del heads[p]
765
765
766 return [self.node(r) for r in heads]
766 return [self.node(r) for r in heads]
767
767
768 def children(self, node):
768 def children(self, node):
769 """find the children of a given node"""
769 """find the children of a given node"""
770 c = []
770 c = []
771 p = self.rev(node)
771 p = self.rev(node)
772 for r in range(p + 1, self.count()):
772 for r in range(p + 1, self.count()):
773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
774 if prevs:
774 if prevs:
775 for pr in prevs:
775 for pr in prevs:
776 if pr == p:
776 if pr == p:
777 c.append(self.node(r))
777 c.append(self.node(r))
778 elif p == nullrev:
778 elif p == nullrev:
779 c.append(self.node(r))
779 c.append(self.node(r))
780 return c
780 return c
781
781
782 def _match(self, id):
782 def _match(self, id):
783 if isinstance(id, (long, int)):
783 if isinstance(id, (long, int)):
784 # rev
784 # rev
785 return self.node(id)
785 return self.node(id)
786 if len(id) == 20:
786 if len(id) == 20:
787 # possibly a binary node
787 # possibly a binary node
788 # odds of a binary node being all hex in ASCII are 1 in 10**25
788 # odds of a binary node being all hex in ASCII are 1 in 10**25
789 try:
789 try:
790 node = id
790 node = id
791 r = self.rev(node) # quick search the index
791 r = self.rev(node) # quick search the index
792 return node
792 return node
793 except LookupError:
793 except LookupError:
794 pass # may be partial hex id
794 pass # may be partial hex id
795 try:
795 try:
796 # str(rev)
796 # str(rev)
797 rev = int(id)
797 rev = int(id)
798 if str(rev) != id:
798 if str(rev) != id:
799 raise ValueError
799 raise ValueError
800 if rev < 0:
800 if rev < 0:
801 rev = self.count() + rev
801 rev = self.count() + rev
802 if rev < 0 or rev >= self.count():
802 if rev < 0 or rev >= self.count():
803 raise ValueError
803 raise ValueError
804 return self.node(rev)
804 return self.node(rev)
805 except (ValueError, OverflowError):
805 except (ValueError, OverflowError):
806 pass
806 pass
807 if len(id) == 40:
807 if len(id) == 40:
808 try:
808 try:
809 # a full hex nodeid?
809 # a full hex nodeid?
810 node = bin(id)
810 node = bin(id)
811 r = self.rev(node)
811 r = self.rev(node)
812 return node
812 return node
813 except TypeError:
813 except TypeError:
814 pass
814 pass
815
815
816 def _partialmatch(self, id):
816 def _partialmatch(self, id):
817 if len(id) < 40:
817 if len(id) < 40:
818 try:
818 try:
819 # hex(node)[:...]
819 # hex(node)[:...]
820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
821 node = None
821 node = None
822 for n in self.nodemap:
822 for n in self.nodemap:
823 if n.startswith(bin_id) and hex(n).startswith(id):
823 if n.startswith(bin_id) and hex(n).startswith(id):
824 if node is not None:
824 if node is not None:
825 raise LookupError(_("Ambiguous identifier"))
825 raise LookupError(_("Ambiguous identifier"))
826 node = n
826 node = n
827 if node is not None:
827 if node is not None:
828 return node
828 return node
829 except TypeError:
829 except TypeError:
830 pass
830 pass
831
831
832 def lookup(self, id):
832 def lookup(self, id):
833 """locate a node based on:
833 """locate a node based on:
834 - revision number or str(revision number)
834 - revision number or str(revision number)
835 - nodeid or subset of hex nodeid
835 - nodeid or subset of hex nodeid
836 """
836 """
837 n = self._match(id)
837 n = self._match(id)
838 if n is not None:
838 if n is not None:
839 return n
839 return n
840 n = self._partialmatch(id)
840 n = self._partialmatch(id)
841 if n:
841 if n:
842 return n
842 return n
843
843
844 raise LookupError(_("No match found"))
844 raise LookupError(_("No match found"))
845
845
846 def cmp(self, node, text):
846 def cmp(self, node, text):
847 """compare text with a given file revision"""
847 """compare text with a given file revision"""
848 p1, p2 = self.parents(node)
848 p1, p2 = self.parents(node)
849 return hash(text, p1, p2) != node
849 return hash(text, p1, p2) != node
850
850
851 def chunk(self, rev, df=None):
851 def chunk(self, rev, df=None):
852 def loadcache(df):
852 def loadcache(df):
853 if not df:
853 if not df:
854 if self._inline:
854 if self._inline:
855 df = self.opener(self.indexfile)
855 df = self.opener(self.indexfile)
856 else:
856 else:
857 df = self.opener(self.datafile)
857 df = self.opener(self.datafile)
858 df.seek(start)
858 df.seek(start)
859 self._chunkcache = (start, df.read(cache_length))
859 self._chunkcache = (start, df.read(cache_length))
860
860
861 start, length = self.start(rev), self.length(rev)
861 start, length = self.start(rev), self.length(rev)
862 if self._inline:
862 if self._inline:
863 start += (rev + 1) * self._io.size
863 start += (rev + 1) * self._io.size
864 end = start + length
864 end = start + length
865
865
866 offset = 0
866 offset = 0
867 if not self._chunkcache:
867 if not self._chunkcache:
868 cache_length = max(65536, length)
868 cache_length = max(65536, length)
869 loadcache(df)
869 loadcache(df)
870 else:
870 else:
871 cache_start = self._chunkcache[0]
871 cache_start = self._chunkcache[0]
872 cache_length = len(self._chunkcache[1])
872 cache_length = len(self._chunkcache[1])
873 cache_end = cache_start + cache_length
873 cache_end = cache_start + cache_length
874 if start >= cache_start and end <= cache_end:
874 if start >= cache_start and end <= cache_end:
875 # it is cached
875 # it is cached
876 offset = start - cache_start
876 offset = start - cache_start
877 else:
877 else:
878 cache_length = max(65536, length)
878 cache_length = max(65536, length)
879 loadcache(df)
879 loadcache(df)
880
880
881 # avoid copying large chunks
881 # avoid copying large chunks
882 c = self._chunkcache[1]
882 c = self._chunkcache[1]
883 if cache_length != length:
883 if cache_length != length:
884 c = c[offset:offset + length]
884 c = c[offset:offset + length]
885
885
886 return decompress(c)
886 return decompress(c)
887
887
888 def delta(self, node):
888 def delta(self, node):
889 """return or calculate a delta between a node and its predecessor"""
889 """return or calculate a delta between a node and its predecessor"""
890 r = self.rev(node)
890 r = self.rev(node)
891 return self.revdiff(r - 1, r)
891 return self.revdiff(r - 1, r)
892
892
893 def revdiff(self, rev1, rev2):
893 def revdiff(self, rev1, rev2):
894 """return or calculate a delta between two revisions"""
894 """return or calculate a delta between two revisions"""
895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
896 return self.chunk(rev2)
896 return self.chunk(rev2)
897
897
898 return mdiff.textdiff(self.revision(self.node(rev1)),
898 return mdiff.textdiff(self.revision(self.node(rev1)),
899 self.revision(self.node(rev2)))
899 self.revision(self.node(rev2)))
900
900
901 def revision(self, node):
901 def revision(self, node):
902 """return an uncompressed revision of a given"""
902 """return an uncompressed revision of a given"""
903 if node == nullid:
903 if node == nullid:
904 return ""
904 return ""
905 if self._cache and self._cache[0] == node:
905 if self._cache and self._cache[0] == node:
906 return self._cache[2]
906 return self._cache[2]
907
907
908 # look up what we need to read
908 # look up what we need to read
909 text = None
909 text = None
910 rev = self.rev(node)
910 rev = self.rev(node)
911 base = self.base(rev)
911 base = self.base(rev)
912
912
913 # check rev flags
913 # check rev flags
914 if self.index[rev][0] & 0xFFFF:
914 if self.index[rev][0] & 0xFFFF:
915 raise RevlogError(_('incompatible revision flag %x') %
915 raise RevlogError(_('incompatible revision flag %x') %
916 (self.index[rev][0] & 0xFFFF))
916 (self.index[rev][0] & 0xFFFF))
917
917
918 if self._inline:
918 if self._inline:
919 # we probably have the whole chunk cached
919 # we probably have the whole chunk cached
920 df = None
920 df = None
921 else:
921 else:
922 df = self.opener(self.datafile)
922 df = self.opener(self.datafile)
923
923
924 # do we have useful data cached?
924 # do we have useful data cached?
925 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
925 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
926 base = self._cache[1]
926 base = self._cache[1]
927 text = self._cache[2]
927 text = self._cache[2]
928 self._loadindex(base, rev + 1)
928 self._loadindex(base, rev + 1)
929 else:
929 else:
930 self._loadindex(base, rev + 1)
930 self._loadindex(base, rev + 1)
931 text = self.chunk(base, df=df)
931 text = self.chunk(base, df=df)
932
932
933 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
933 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
934 text = mdiff.patches(text, bins)
934 text = mdiff.patches(text, bins)
935 p1, p2 = self.parents(node)
935 p1, p2 = self.parents(node)
936 if node != hash(text, p1, p2):
936 if node != hash(text, p1, p2):
937 raise RevlogError(_("integrity check failed on %s:%d")
937 raise RevlogError(_("integrity check failed on %s:%d")
938 % (self.datafile, rev))
938 % (self.datafile, rev))
939
939
940 self._cache = (node, rev, text)
940 self._cache = (node, rev, text)
941 return text
941 return text
942
942
943 def checkinlinesize(self, tr, fp=None):
943 def checkinlinesize(self, tr, fp=None):
944 if not self._inline:
944 if not self._inline:
945 return
945 return
946 if not fp:
946 if not fp:
947 fp = self.opener(self.indexfile, 'r')
947 fp = self.opener(self.indexfile, 'r')
948 fp.seek(0, 2)
948 fp.seek(0, 2)
949 size = fp.tell()
949 size = fp.tell()
950 if size < 131072:
950 if size < 131072:
951 return
951 return
952 trinfo = tr.find(self.indexfile)
952 trinfo = tr.find(self.indexfile)
953 if trinfo == None:
953 if trinfo == None:
954 raise RevlogError(_("%s not found in the transaction")
954 raise RevlogError(_("%s not found in the transaction")
955 % self.indexfile)
955 % self.indexfile)
956
956
957 trindex = trinfo[2]
957 trindex = trinfo[2]
958 dataoff = self.start(trindex)
958 dataoff = self.start(trindex)
959
959
960 tr.add(self.datafile, dataoff)
960 tr.add(self.datafile, dataoff)
961 df = self.opener(self.datafile, 'w')
961 df = self.opener(self.datafile, 'w')
962 calc = self._io.size
962 calc = self._io.size
963 for r in xrange(self.count()):
963 for r in xrange(self.count()):
964 start = self.start(r) + (r + 1) * calc
964 start = self.start(r) + (r + 1) * calc
965 length = self.length(r)
965 length = self.length(r)
966 fp.seek(start)
966 fp.seek(start)
967 d = fp.read(length)
967 d = fp.read(length)
968 df.write(d)
968 df.write(d)
969 fp.close()
969 fp.close()
970 df.close()
970 df.close()
971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
972 self.version &= ~(REVLOGNGINLINEDATA)
972 self.version &= ~(REVLOGNGINLINEDATA)
973 self._inline = False
973 self._inline = False
974 for i in xrange(self.count()):
974 for i in xrange(self.count()):
975 e = self._io.packentry(self.index[i], self.node, self.version, i)
975 e = self._io.packentry(self.index[i], self.node, self.version, i)
976 fp.write(e)
976 fp.write(e)
977
977
978 # if we don't call rename, the temp file will never replace the
978 # if we don't call rename, the temp file will never replace the
979 # real index
979 # real index
980 fp.rename()
980 fp.rename()
981
981
982 tr.replace(self.indexfile, trindex * calc)
982 tr.replace(self.indexfile, trindex * calc)
983 self._chunkcache = None
983 self._chunkcache = None
984
984
985 def addrevision(self, text, transaction, link, p1, p2, d=None):
985 def addrevision(self, text, transaction, link, p1, p2, d=None):
986 """add a revision to the log
986 """add a revision to the log
987
987
988 text - the revision data to add
988 text - the revision data to add
989 transaction - the transaction object used for rollback
989 transaction - the transaction object used for rollback
990 link - the linkrev data to add
990 link - the linkrev data to add
991 p1, p2 - the parent nodeids of the revision
991 p1, p2 - the parent nodeids of the revision
992 d - an optional precomputed delta
992 d - an optional precomputed delta
993 """
993 """
994 dfh = None
994 dfh = None
995 if not self._inline:
995 if not self._inline:
996 dfh = self.opener(self.datafile, "a")
996 dfh = self.opener(self.datafile, "a")
997 ifh = self.opener(self.indexfile, "a+")
997 ifh = self.opener(self.indexfile, "a+")
998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
999
999
1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1001 node = hash(text, p1, p2)
1001 node = hash(text, p1, p2)
1002 if node in self.nodemap:
1002 if node in self.nodemap:
1003 return node
1003 return node
1004
1004
1005 curr = self.count()
1005 curr = self.count()
1006 prev = curr - 1
1006 prev = curr - 1
1007 base = self.base(prev)
1007 base = self.base(prev)
1008 offset = self.end(prev)
1008 offset = self.end(prev)
1009
1009
1010 if curr:
1010 if curr:
1011 if not d:
1011 if not d:
1012 ptext = self.revision(self.node(prev))
1012 ptext = self.revision(self.node(prev))
1013 d = mdiff.textdiff(ptext, text)
1013 d = mdiff.textdiff(ptext, text)
1014 data = compress(d)
1014 data = compress(d)
1015 l = len(data[1]) + len(data[0])
1015 l = len(data[1]) + len(data[0])
1016 dist = l + offset - self.start(base)
1016 dist = l + offset - self.start(base)
1017
1017
1018 # full versions are inserted when the needed deltas
1018 # full versions are inserted when the needed deltas
1019 # become comparable to the uncompressed text
1019 # become comparable to the uncompressed text
1020 if not curr or dist > len(text) * 2:
1020 if not curr or dist > len(text) * 2:
1021 data = compress(text)
1021 data = compress(text)
1022 l = len(data[1]) + len(data[0])
1022 l = len(data[1]) + len(data[0])
1023 base = curr
1023 base = curr
1024
1024
1025 e = (offset_type(offset, 0), l, len(text),
1025 e = (offset_type(offset, 0), l, len(text),
1026 base, link, self.rev(p1), self.rev(p2), node)
1026 base, link, self.rev(p1), self.rev(p2), node)
1027 self.index.insert(-1, e)
1027 self.index.insert(-1, e)
1028 self.nodemap[node] = curr
1028 self.nodemap[node] = curr
1029
1029
1030 entry = self._io.packentry(e, self.node, self.version, curr)
1030 entry = self._io.packentry(e, self.node, self.version, curr)
1031 if not self._inline:
1031 if not self._inline:
1032 transaction.add(self.datafile, offset)
1032 transaction.add(self.datafile, offset)
1033 transaction.add(self.indexfile, curr * len(entry))
1033 transaction.add(self.indexfile, curr * len(entry))
1034 if data[0]:
1034 if data[0]:
1035 dfh.write(data[0])
1035 dfh.write(data[0])
1036 dfh.write(data[1])
1036 dfh.write(data[1])
1037 dfh.flush()
1037 dfh.flush()
1038 ifh.write(entry)
1038 ifh.write(entry)
1039 else:
1039 else:
1040 offset += curr * self._io.size
1040 offset += curr * self._io.size
1041 transaction.add(self.indexfile, offset, curr)
1041 transaction.add(self.indexfile, offset, curr)
1042 ifh.write(entry)
1042 ifh.write(entry)
1043 ifh.write(data[0])
1043 ifh.write(data[0])
1044 ifh.write(data[1])
1044 ifh.write(data[1])
1045 self.checkinlinesize(transaction, ifh)
1045 self.checkinlinesize(transaction, ifh)
1046
1046
1047 self._cache = (node, curr, text)
1047 self._cache = (node, curr, text)
1048 return node
1048 return node
1049
1049
1050 def ancestor(self, a, b):
1050 def ancestor(self, a, b):
1051 """calculate the least common ancestor of nodes a and b"""
1051 """calculate the least common ancestor of nodes a and b"""
1052
1052
1053 def parents(rev):
1053 def parents(rev):
1054 return [p for p in self.parentrevs(rev) if p != nullrev]
1054 return [p for p in self.parentrevs(rev) if p != nullrev]
1055
1055
1056 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1056 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1057 if c is None:
1057 if c is None:
1058 return nullid
1058 return nullid
1059
1059
1060 return self.node(c)
1060 return self.node(c)
1061
1061
1062 def group(self, nodelist, lookup, infocollect=None):
1062 def group(self, nodelist, lookup, infocollect=None):
1063 """calculate a delta group
1063 """calculate a delta group
1064
1064
1065 Given a list of changeset revs, return a set of deltas and
1065 Given a list of changeset revs, return a set of deltas and
1066 metadata corresponding to nodes. the first delta is
1066 metadata corresponding to nodes. the first delta is
1067 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1067 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1068 have this parent as it has all history before these
1068 have this parent as it has all history before these
1069 changesets. parent is parent[0]
1069 changesets. parent is parent[0]
1070 """
1070 """
1071 revs = [self.rev(n) for n in nodelist]
1071 revs = [self.rev(n) for n in nodelist]
1072
1072
1073 # if we don't have any revisions touched by these changesets, bail
1073 # if we don't have any revisions touched by these changesets, bail
1074 if not revs:
1074 if not revs:
1075 yield changegroup.closechunk()
1075 yield changegroup.closechunk()
1076 return
1076 return
1077
1077
1078 # add the parent of the first rev
1078 # add the parent of the first rev
1079 p = self.parents(self.node(revs[0]))[0]
1079 p = self.parents(self.node(revs[0]))[0]
1080 revs.insert(0, self.rev(p))
1080 revs.insert(0, self.rev(p))
1081
1081
1082 # build deltas
1082 # build deltas
1083 for d in xrange(0, len(revs) - 1):
1083 for d in xrange(0, len(revs) - 1):
1084 a, b = revs[d], revs[d + 1]
1084 a, b = revs[d], revs[d + 1]
1085 nb = self.node(b)
1085 nb = self.node(b)
1086
1086
1087 if infocollect is not None:
1087 if infocollect is not None:
1088 infocollect(nb)
1088 infocollect(nb)
1089
1089
1090 d = self.revdiff(a, b)
1091 p = self.parents(nb)
1090 p = self.parents(nb)
1092 meta = nb + p[0] + p[1] + lookup(nb)
1091 meta = nb + p[0] + p[1] + lookup(nb)
1092 if a == -1:
1093 d = self.revision(nb)
1094 meta += mdiff.trivialdiffheader(len(d))
1095 else:
1096 d = self.revdiff(a, b)
1093 yield changegroup.genchunk("%s%s" % (meta, d))
1097 yield changegroup.genchunk("%s%s" % (meta, d))
1094
1098
1095 yield changegroup.closechunk()
1099 yield changegroup.closechunk()
1096
1100
1097 def addgroup(self, revs, linkmapper, transaction, unique=0):
1101 def addgroup(self, revs, linkmapper, transaction, unique=0):
1098 """
1102 """
1099 add a delta group
1103 add a delta group
1100
1104
1101 given a set of deltas, add them to the revision log. the
1105 given a set of deltas, add them to the revision log. the
1102 first delta is against its parent, which should be in our
1106 first delta is against its parent, which should be in our
1103 log, the rest are against the previous delta.
1107 log, the rest are against the previous delta.
1104 """
1108 """
1105
1109
1106 #track the base of the current delta log
1110 #track the base of the current delta log
1107 r = self.count()
1111 r = self.count()
1108 t = r - 1
1112 t = r - 1
1109 node = None
1113 node = None
1110
1114
1111 base = prev = nullrev
1115 base = prev = nullrev
1112 start = end = textlen = 0
1116 start = end = textlen = 0
1113 if r:
1117 if r:
1114 end = self.end(t)
1118 end = self.end(t)
1115
1119
1116 ifh = self.opener(self.indexfile, "a+")
1120 ifh = self.opener(self.indexfile, "a+")
1117 isize = r * self._io.size
1121 isize = r * self._io.size
1118 if self._inline:
1122 if self._inline:
1119 transaction.add(self.indexfile, end + isize, r)
1123 transaction.add(self.indexfile, end + isize, r)
1120 dfh = None
1124 dfh = None
1121 else:
1125 else:
1122 transaction.add(self.indexfile, isize, r)
1126 transaction.add(self.indexfile, isize, r)
1123 transaction.add(self.datafile, end)
1127 transaction.add(self.datafile, end)
1124 dfh = self.opener(self.datafile, "a")
1128 dfh = self.opener(self.datafile, "a")
1125
1129
1126 # loop through our set of deltas
1130 # loop through our set of deltas
1127 chain = None
1131 chain = None
1128 for chunk in revs:
1132 for chunk in revs:
1129 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1133 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1130 link = linkmapper(cs)
1134 link = linkmapper(cs)
1131 if node in self.nodemap:
1135 if node in self.nodemap:
1132 # this can happen if two branches make the same change
1136 # this can happen if two branches make the same change
1133 # if unique:
1137 # if unique:
1134 # raise RevlogError(_("already have %s") % hex(node[:4]))
1138 # raise RevlogError(_("already have %s") % hex(node[:4]))
1135 chain = node
1139 chain = node
1136 continue
1140 continue
1137 delta = chunk[80:]
1141 delta = chunk[80:]
1138
1142
1139 for p in (p1, p2):
1143 for p in (p1, p2):
1140 if not p in self.nodemap:
1144 if not p in self.nodemap:
1141 raise LookupError(_("unknown parent %s") % short(p))
1145 raise LookupError(_("unknown parent %s") % short(p))
1142
1146
1143 if not chain:
1147 if not chain:
1144 # retrieve the parent revision of the delta chain
1148 # retrieve the parent revision of the delta chain
1145 chain = p1
1149 chain = p1
1146 if not chain in self.nodemap:
1150 if not chain in self.nodemap:
1147 raise LookupError(_("unknown base %s") % short(chain[:4]))
1151 raise LookupError(_("unknown base %s") % short(chain[:4]))
1148
1152
1149 # full versions are inserted when the needed deltas become
1153 # full versions are inserted when the needed deltas become
1150 # comparable to the uncompressed text or when the previous
1154 # comparable to the uncompressed text or when the previous
1151 # version is not the one we have a delta against. We use
1155 # version is not the one we have a delta against. We use
1152 # the size of the previous full rev as a proxy for the
1156 # the size of the previous full rev as a proxy for the
1153 # current size.
1157 # current size.
1154
1158
1155 if chain == prev:
1159 if chain == prev:
1156 tempd = compress(delta)
1160 tempd = compress(delta)
1157 cdelta = tempd[0] + tempd[1]
1161 cdelta = tempd[0] + tempd[1]
1158 textlen = mdiff.patchedsize(textlen, delta)
1162 textlen = mdiff.patchedsize(textlen, delta)
1159
1163
1160 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1164 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1161 # flush our writes here so we can read it in revision
1165 # flush our writes here so we can read it in revision
1162 if dfh:
1166 if dfh:
1163 dfh.flush()
1167 dfh.flush()
1164 ifh.flush()
1168 ifh.flush()
1165 text = self.revision(chain)
1169 text = self.revision(chain)
1166 text = mdiff.patches(text, [delta])
1170 text = mdiff.patches(text, [delta])
1167 chk = self._addrevision(text, transaction, link, p1, p2, None,
1171 chk = self._addrevision(text, transaction, link, p1, p2, None,
1168 ifh, dfh)
1172 ifh, dfh)
1169 if not dfh and not self._inline:
1173 if not dfh and not self._inline:
1170 # addrevision switched from inline to conventional
1174 # addrevision switched from inline to conventional
1171 # reopen the index
1175 # reopen the index
1172 dfh = self.opener(self.datafile, "a")
1176 dfh = self.opener(self.datafile, "a")
1173 ifh = self.opener(self.indexfile, "a")
1177 ifh = self.opener(self.indexfile, "a")
1174 if chk != node:
1178 if chk != node:
1175 raise RevlogError(_("consistency error adding group"))
1179 raise RevlogError(_("consistency error adding group"))
1176 textlen = len(text)
1180 textlen = len(text)
1177 else:
1181 else:
1178 e = (offset_type(end, 0), len(cdelta), textlen, base,
1182 e = (offset_type(end, 0), len(cdelta), textlen, base,
1179 link, self.rev(p1), self.rev(p2), node)
1183 link, self.rev(p1), self.rev(p2), node)
1180 self.index.insert(-1, e)
1184 self.index.insert(-1, e)
1181 self.nodemap[node] = r
1185 self.nodemap[node] = r
1182 entry = self._io.packentry(e, self.node, self.version, r)
1186 entry = self._io.packentry(e, self.node, self.version, r)
1183 if self._inline:
1187 if self._inline:
1184 ifh.write(entry)
1188 ifh.write(entry)
1185 ifh.write(cdelta)
1189 ifh.write(cdelta)
1186 self.checkinlinesize(transaction, ifh)
1190 self.checkinlinesize(transaction, ifh)
1187 if not self._inline:
1191 if not self._inline:
1188 dfh = self.opener(self.datafile, "a")
1192 dfh = self.opener(self.datafile, "a")
1189 ifh = self.opener(self.indexfile, "a")
1193 ifh = self.opener(self.indexfile, "a")
1190 else:
1194 else:
1191 dfh.write(cdelta)
1195 dfh.write(cdelta)
1192 ifh.write(entry)
1196 ifh.write(entry)
1193
1197
1194 t, r, chain, prev = r, r + 1, node, node
1198 t, r, chain, prev = r, r + 1, node, node
1195 base = self.base(t)
1199 base = self.base(t)
1196 start = self.start(base)
1200 start = self.start(base)
1197 end = self.end(t)
1201 end = self.end(t)
1198
1202
1199 return node
1203 return node
1200
1204
1201 def strip(self, rev, minlink):
1205 def strip(self, rev, minlink):
1202 if self.count() == 0 or rev >= self.count():
1206 if self.count() == 0 or rev >= self.count():
1203 return
1207 return
1204
1208
1205 if isinstance(self.index, lazyindex):
1209 if isinstance(self.index, lazyindex):
1206 self._loadindexmap()
1210 self._loadindexmap()
1207
1211
1208 # When stripping away a revision, we need to make sure it
1212 # When stripping away a revision, we need to make sure it
1209 # does not actually belong to an older changeset.
1213 # does not actually belong to an older changeset.
1210 # The minlink parameter defines the oldest revision
1214 # The minlink parameter defines the oldest revision
1211 # we're allowed to strip away.
1215 # we're allowed to strip away.
1212 while minlink > self.index[rev][4]:
1216 while minlink > self.index[rev][4]:
1213 rev += 1
1217 rev += 1
1214 if rev >= self.count():
1218 if rev >= self.count():
1215 return
1219 return
1216
1220
1217 # first truncate the files on disk
1221 # first truncate the files on disk
1218 end = self.start(rev)
1222 end = self.start(rev)
1219 if not self._inline:
1223 if not self._inline:
1220 df = self.opener(self.datafile, "a")
1224 df = self.opener(self.datafile, "a")
1221 df.truncate(end)
1225 df.truncate(end)
1222 end = rev * self._io.size
1226 end = rev * self._io.size
1223 else:
1227 else:
1224 end += rev * self._io.size
1228 end += rev * self._io.size
1225
1229
1226 indexf = self.opener(self.indexfile, "a")
1230 indexf = self.opener(self.indexfile, "a")
1227 indexf.truncate(end)
1231 indexf.truncate(end)
1228
1232
1229 # then reset internal state in memory to forget those revisions
1233 # then reset internal state in memory to forget those revisions
1230 self._cache = None
1234 self._cache = None
1231 self._chunkcache = None
1235 self._chunkcache = None
1232 for x in xrange(rev, self.count()):
1236 for x in xrange(rev, self.count()):
1233 del self.nodemap[self.node(x)]
1237 del self.nodemap[self.node(x)]
1234
1238
1235 del self.index[rev:-1]
1239 del self.index[rev:-1]
1236
1240
1237 def checksize(self):
1241 def checksize(self):
1238 expected = 0
1242 expected = 0
1239 if self.count():
1243 if self.count():
1240 expected = max(0, self.end(self.count() - 1))
1244 expected = max(0, self.end(self.count() - 1))
1241
1245
1242 try:
1246 try:
1243 f = self.opener(self.datafile)
1247 f = self.opener(self.datafile)
1244 f.seek(0, 2)
1248 f.seek(0, 2)
1245 actual = f.tell()
1249 actual = f.tell()
1246 dd = actual - expected
1250 dd = actual - expected
1247 except IOError, inst:
1251 except IOError, inst:
1248 if inst.errno != errno.ENOENT:
1252 if inst.errno != errno.ENOENT:
1249 raise
1253 raise
1250 dd = 0
1254 dd = 0
1251
1255
1252 try:
1256 try:
1253 f = self.opener(self.indexfile)
1257 f = self.opener(self.indexfile)
1254 f.seek(0, 2)
1258 f.seek(0, 2)
1255 actual = f.tell()
1259 actual = f.tell()
1256 s = self._io.size
1260 s = self._io.size
1257 i = max(0, actual / s)
1261 i = max(0, actual / s)
1258 di = actual - (i * s)
1262 di = actual - (i * s)
1259 if self._inline:
1263 if self._inline:
1260 databytes = 0
1264 databytes = 0
1261 for r in xrange(self.count()):
1265 for r in xrange(self.count()):
1262 databytes += max(0, self.length(r))
1266 databytes += max(0, self.length(r))
1263 dd = 0
1267 dd = 0
1264 di = actual - self.count() * s - databytes
1268 di = actual - self.count() * s - databytes
1265 except IOError, inst:
1269 except IOError, inst:
1266 if inst.errno != errno.ENOENT:
1270 if inst.errno != errno.ENOENT:
1267 raise
1271 raise
1268 di = 0
1272 di = 0
1269
1273
1270 return (dd, di)
1274 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now