##// END OF EJS Templates
Move hash function back to revlog from node
mpm@selenic.com -
r1091:d62130f9 default
parent child Browse files
Show More
@@ -1,36 +1,21 b''
1 """
1 """
2 node.py - basic nodeid manipulation for mercurial
2 node.py - basic nodeid manipulation for mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms
6 This software may be used and distributed according to the terms
7 of the GNU General Public License, incorporated herein by reference.
7 of the GNU General Public License, incorporated herein by reference.
8 """
8 """
9
9
10 import sha, binascii
10 import sha, binascii
11
11
12 nullid = "\0" * 20
12 nullid = "\0" * 20
13
13
14 def hex(node):
14 def hex(node):
15 return binascii.hexlify(node)
15 return binascii.hexlify(node)
16
16
17 def bin(node):
17 def bin(node):
18 return binascii.unhexlify(node)
18 return binascii.unhexlify(node)
19
19
20 def short(node):
20 def short(node):
21 return hex(node[:6])
21 return hex(node[:6])
22
23 def hash(text, p1, p2):
24 """generate a hash from the given text and its parent hashes
25
26 This hash combines both the current file contents and its history
27 in a manner that makes it easy to distinguish nodes with the same
28 content in the revision graph.
29 """
30 l = [p1, p2]
31 l.sort()
32 s = sha.new(l[0])
33 s.update(l[1])
34 s.update(text)
35 return s.digest()
36
@@ -1,632 +1,646 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 import zlib, struct, sha, binascii, heapq
13 import zlib, struct, sha, binascii, heapq
14 from mercurial import mdiff
14 from mercurial import mdiff
15 from node import *
15 from node import *
16
16
17 def hash(text, p1, p2):
18 """generate a hash from the given text and its parent hashes
19
20 This hash combines both the current file contents and its history
21 in a manner that makes it easy to distinguish nodes with the same
22 content in the revision graph.
23 """
24 l = [p1, p2]
25 l.sort()
26 s = sha.new(l[0])
27 s.update(l[1])
28 s.update(text)
29 return s.digest()
30
17 def compress(text):
31 def compress(text):
18 """ generate a possibly-compressed representation of text """
32 """ generate a possibly-compressed representation of text """
19 if not text: return text
33 if not text: return text
20 if len(text) < 44:
34 if len(text) < 44:
21 if text[0] == '\0': return text
35 if text[0] == '\0': return text
22 return 'u' + text
36 return 'u' + text
23 bin = zlib.compress(text)
37 bin = zlib.compress(text)
24 if len(bin) > len(text):
38 if len(bin) > len(text):
25 if text[0] == '\0': return text
39 if text[0] == '\0': return text
26 return 'u' + text
40 return 'u' + text
27 return bin
41 return bin
28
42
29 def decompress(bin):
43 def decompress(bin):
30 """ decompress the given input """
44 """ decompress the given input """
31 if not bin: return bin
45 if not bin: return bin
32 t = bin[0]
46 t = bin[0]
33 if t == '\0': return bin
47 if t == '\0': return bin
34 if t == 'x': return zlib.decompress(bin)
48 if t == 'x': return zlib.decompress(bin)
35 if t == 'u': return bin[1:]
49 if t == 'u': return bin[1:]
36 raise RevlogError("unknown compression type %s" % t)
50 raise RevlogError("unknown compression type %s" % t)
37
51
38 indexformat = ">4l20s20s20s"
52 indexformat = ">4l20s20s20s"
39
53
40 class lazyparser:
54 class lazyparser:
41 """
55 """
42 this class avoids the need to parse the entirety of large indices
56 this class avoids the need to parse the entirety of large indices
43
57
44 By default we parse and load 1000 entries at a time.
58 By default we parse and load 1000 entries at a time.
45
59
46 If no position is specified, we load the whole index, and replace
60 If no position is specified, we load the whole index, and replace
47 the lazy objects in revlog with the underlying objects for
61 the lazy objects in revlog with the underlying objects for
48 efficiency in cases where we look at most of the nodes.
62 efficiency in cases where we look at most of the nodes.
49 """
63 """
50 def __init__(self, data, revlog):
64 def __init__(self, data, revlog):
51 self.data = data
65 self.data = data
52 self.s = struct.calcsize(indexformat)
66 self.s = struct.calcsize(indexformat)
53 self.l = len(data)/self.s
67 self.l = len(data)/self.s
54 self.index = [None] * self.l
68 self.index = [None] * self.l
55 self.map = {nullid: -1}
69 self.map = {nullid: -1}
56 self.all = 0
70 self.all = 0
57 self.revlog = revlog
71 self.revlog = revlog
58
72
59 def load(self, pos=None):
73 def load(self, pos=None):
60 if self.all: return
74 if self.all: return
61 if pos is not None:
75 if pos is not None:
62 block = pos / 1000
76 block = pos / 1000
63 i = block * 1000
77 i = block * 1000
64 end = min(self.l, i + 1000)
78 end = min(self.l, i + 1000)
65 else:
79 else:
66 self.all = 1
80 self.all = 1
67 i = 0
81 i = 0
68 end = self.l
82 end = self.l
69 self.revlog.index = self.index
83 self.revlog.index = self.index
70 self.revlog.nodemap = self.map
84 self.revlog.nodemap = self.map
71
85
72 while i < end:
86 while i < end:
73 d = self.data[i * self.s: (i + 1) * self.s]
87 d = self.data[i * self.s: (i + 1) * self.s]
74 e = struct.unpack(indexformat, d)
88 e = struct.unpack(indexformat, d)
75 self.index[i] = e
89 self.index[i] = e
76 self.map[e[6]] = i
90 self.map[e[6]] = i
77 i += 1
91 i += 1
78
92
79 class lazyindex:
93 class lazyindex:
80 """a lazy version of the index array"""
94 """a lazy version of the index array"""
81 def __init__(self, parser):
95 def __init__(self, parser):
82 self.p = parser
96 self.p = parser
83 def __len__(self):
97 def __len__(self):
84 return len(self.p.index)
98 return len(self.p.index)
85 def load(self, pos):
99 def load(self, pos):
86 self.p.load(pos)
100 self.p.load(pos)
87 return self.p.index[pos]
101 return self.p.index[pos]
88 def __getitem__(self, pos):
102 def __getitem__(self, pos):
89 return self.p.index[pos] or self.load(pos)
103 return self.p.index[pos] or self.load(pos)
90 def append(self, e):
104 def append(self, e):
91 self.p.index.append(e)
105 self.p.index.append(e)
92
106
93 class lazymap:
107 class lazymap:
94 """a lazy version of the node map"""
108 """a lazy version of the node map"""
95 def __init__(self, parser):
109 def __init__(self, parser):
96 self.p = parser
110 self.p = parser
97 def load(self, key):
111 def load(self, key):
98 if self.p.all: return
112 if self.p.all: return
99 n = self.p.data.find(key)
113 n = self.p.data.find(key)
100 if n < 0: raise KeyError("node " + hex(key))
114 if n < 0: raise KeyError("node " + hex(key))
101 pos = n / self.p.s
115 pos = n / self.p.s
102 self.p.load(pos)
116 self.p.load(pos)
103 def __contains__(self, key):
117 def __contains__(self, key):
104 self.p.load()
118 self.p.load()
105 return key in self.p.map
119 return key in self.p.map
106 def __iter__(self):
120 def __iter__(self):
107 yield nullid
121 yield nullid
108 for i in xrange(self.p.l):
122 for i in xrange(self.p.l):
109 try:
123 try:
110 yield self.p.index[i][6]
124 yield self.p.index[i][6]
111 except:
125 except:
112 self.p.load(i)
126 self.p.load(i)
113 yield self.p.index[i][6]
127 yield self.p.index[i][6]
114 def __getitem__(self, key):
128 def __getitem__(self, key):
115 try:
129 try:
116 return self.p.map[key]
130 return self.p.map[key]
117 except KeyError:
131 except KeyError:
118 try:
132 try:
119 self.load(key)
133 self.load(key)
120 return self.p.map[key]
134 return self.p.map[key]
121 except KeyError:
135 except KeyError:
122 raise KeyError("node " + hex(key))
136 raise KeyError("node " + hex(key))
123 def __setitem__(self, key, val):
137 def __setitem__(self, key, val):
124 self.p.map[key] = val
138 self.p.map[key] = val
125
139
126 class RevlogError(Exception): pass
140 class RevlogError(Exception): pass
127
141
128 class revlog:
142 class revlog:
129 """
143 """
130 the underlying revision storage object
144 the underlying revision storage object
131
145
132 A revlog consists of two parts, an index and the revision data.
146 A revlog consists of two parts, an index and the revision data.
133
147
134 The index is a file with a fixed record size containing
148 The index is a file with a fixed record size containing
135 information on each revision, includings its nodeid (hash), the
149 information on each revision, includings its nodeid (hash), the
136 nodeids of its parents, the position and offset of its data within
150 nodeids of its parents, the position and offset of its data within
137 the data file, and the revision it's based on. Finally, each entry
151 the data file, and the revision it's based on. Finally, each entry
138 contains a linkrev entry that can serve as a pointer to external
152 contains a linkrev entry that can serve as a pointer to external
139 data.
153 data.
140
154
141 The revision data itself is a linear collection of data chunks.
155 The revision data itself is a linear collection of data chunks.
142 Each chunk represents a revision and is usually represented as a
156 Each chunk represents a revision and is usually represented as a
143 delta against the previous chunk. To bound lookup time, runs of
157 delta against the previous chunk. To bound lookup time, runs of
144 deltas are limited to about 2 times the length of the original
158 deltas are limited to about 2 times the length of the original
145 version data. This makes retrieval of a version proportional to
159 version data. This makes retrieval of a version proportional to
146 its size, or O(1) relative to the number of revisions.
160 its size, or O(1) relative to the number of revisions.
147
161
148 Both pieces of the revlog are written to in an append-only
162 Both pieces of the revlog are written to in an append-only
149 fashion, which means we never need to rewrite a file to insert or
163 fashion, which means we never need to rewrite a file to insert or
150 remove data, and can use some simple techniques to avoid the need
164 remove data, and can use some simple techniques to avoid the need
151 for locking while reading.
165 for locking while reading.
152 """
166 """
153 def __init__(self, opener, indexfile, datafile):
167 def __init__(self, opener, indexfile, datafile):
154 """
168 """
155 create a revlog object
169 create a revlog object
156
170
157 opener is a function that abstracts the file opening operation
171 opener is a function that abstracts the file opening operation
158 and can be used to implement COW semantics or the like.
172 and can be used to implement COW semantics or the like.
159 """
173 """
160 self.indexfile = indexfile
174 self.indexfile = indexfile
161 self.datafile = datafile
175 self.datafile = datafile
162 self.opener = opener
176 self.opener = opener
163 self.cache = None
177 self.cache = None
164
178
165 try:
179 try:
166 i = self.opener(self.indexfile).read()
180 i = self.opener(self.indexfile).read()
167 except IOError:
181 except IOError:
168 i = ""
182 i = ""
169
183
170 if len(i) > 10000:
184 if len(i) > 10000:
171 # big index, let's parse it on demand
185 # big index, let's parse it on demand
172 parser = lazyparser(i, self)
186 parser = lazyparser(i, self)
173 self.index = lazyindex(parser)
187 self.index = lazyindex(parser)
174 self.nodemap = lazymap(parser)
188 self.nodemap = lazymap(parser)
175 else:
189 else:
176 s = struct.calcsize(indexformat)
190 s = struct.calcsize(indexformat)
177 l = len(i) / s
191 l = len(i) / s
178 self.index = [None] * l
192 self.index = [None] * l
179 m = [None] * l
193 m = [None] * l
180
194
181 n = 0
195 n = 0
182 for f in xrange(0, len(i), s):
196 for f in xrange(0, len(i), s):
183 # offset, size, base, linkrev, p1, p2, nodeid
197 # offset, size, base, linkrev, p1, p2, nodeid
184 e = struct.unpack(indexformat, i[f:f + s])
198 e = struct.unpack(indexformat, i[f:f + s])
185 m[n] = (e[6], n)
199 m[n] = (e[6], n)
186 self.index[n] = e
200 self.index[n] = e
187 n += 1
201 n += 1
188
202
189 self.nodemap = dict(m)
203 self.nodemap = dict(m)
190 self.nodemap[nullid] = -1
204 self.nodemap[nullid] = -1
191
205
192 def tip(self): return self.node(len(self.index) - 1)
206 def tip(self): return self.node(len(self.index) - 1)
193 def count(self): return len(self.index)
207 def count(self): return len(self.index)
194 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
208 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
195 def rev(self, node): return self.nodemap[node]
209 def rev(self, node): return self.nodemap[node]
196 def linkrev(self, node): return self.index[self.nodemap[node]][3]
210 def linkrev(self, node): return self.index[self.nodemap[node]][3]
197 def parents(self, node):
211 def parents(self, node):
198 if node == nullid: return (nullid, nullid)
212 if node == nullid: return (nullid, nullid)
199 return self.index[self.nodemap[node]][4:6]
213 return self.index[self.nodemap[node]][4:6]
200
214
201 def start(self, rev): return self.index[rev][0]
215 def start(self, rev): return self.index[rev][0]
202 def length(self, rev): return self.index[rev][1]
216 def length(self, rev): return self.index[rev][1]
203 def end(self, rev): return self.start(rev) + self.length(rev)
217 def end(self, rev): return self.start(rev) + self.length(rev)
204 def base(self, rev): return self.index[rev][2]
218 def base(self, rev): return self.index[rev][2]
205
219
206 def reachable(self, rev, stop=None):
220 def reachable(self, rev, stop=None):
207 reachable = {}
221 reachable = {}
208 visit = [rev]
222 visit = [rev]
209 reachable[rev] = 1
223 reachable[rev] = 1
210 if stop:
224 if stop:
211 stopn = self.rev(stop)
225 stopn = self.rev(stop)
212 else:
226 else:
213 stopn = 0
227 stopn = 0
214 while visit:
228 while visit:
215 n = visit.pop(0)
229 n = visit.pop(0)
216 if n == stop:
230 if n == stop:
217 continue
231 continue
218 if n == nullid:
232 if n == nullid:
219 continue
233 continue
220 for p in self.parents(n):
234 for p in self.parents(n):
221 if self.rev(p) < stopn:
235 if self.rev(p) < stopn:
222 continue
236 continue
223 if p not in reachable:
237 if p not in reachable:
224 reachable[p] = 1
238 reachable[p] = 1
225 visit.append(p)
239 visit.append(p)
226 return reachable
240 return reachable
227
241
228 def heads(self, stop=None):
242 def heads(self, stop=None):
229 """return the list of all nodes that have no children"""
243 """return the list of all nodes that have no children"""
230 p = {}
244 p = {}
231 h = []
245 h = []
232 stoprev = 0
246 stoprev = 0
233 if stop and stop in self.nodemap:
247 if stop and stop in self.nodemap:
234 stoprev = self.rev(stop)
248 stoprev = self.rev(stop)
235
249
236 for r in range(self.count() - 1, -1, -1):
250 for r in range(self.count() - 1, -1, -1):
237 n = self.node(r)
251 n = self.node(r)
238 if n not in p:
252 if n not in p:
239 h.append(n)
253 h.append(n)
240 if n == stop:
254 if n == stop:
241 break
255 break
242 if r < stoprev:
256 if r < stoprev:
243 break
257 break
244 for pn in self.parents(n):
258 for pn in self.parents(n):
245 p[pn] = 1
259 p[pn] = 1
246 return h
260 return h
247
261
248 def children(self, node):
262 def children(self, node):
249 """find the children of a given node"""
263 """find the children of a given node"""
250 c = []
264 c = []
251 p = self.rev(node)
265 p = self.rev(node)
252 for r in range(p + 1, self.count()):
266 for r in range(p + 1, self.count()):
253 n = self.node(r)
267 n = self.node(r)
254 for pn in self.parents(n):
268 for pn in self.parents(n):
255 if pn == node:
269 if pn == node:
256 c.append(n)
270 c.append(n)
257 continue
271 continue
258 elif pn == nullid:
272 elif pn == nullid:
259 continue
273 continue
260 return c
274 return c
261
275
262 def lookup(self, id):
276 def lookup(self, id):
263 """locate a node based on revision number or subset of hex nodeid"""
277 """locate a node based on revision number or subset of hex nodeid"""
264 try:
278 try:
265 rev = int(id)
279 rev = int(id)
266 if str(rev) != id: raise ValueError
280 if str(rev) != id: raise ValueError
267 if rev < 0: rev = self.count() + rev
281 if rev < 0: rev = self.count() + rev
268 if rev < 0 or rev >= self.count(): raise ValueError
282 if rev < 0 or rev >= self.count(): raise ValueError
269 return self.node(rev)
283 return self.node(rev)
270 except (ValueError, OverflowError):
284 except (ValueError, OverflowError):
271 c = []
285 c = []
272 for n in self.nodemap:
286 for n in self.nodemap:
273 if hex(n).startswith(id):
287 if hex(n).startswith(id):
274 c.append(n)
288 c.append(n)
275 if len(c) > 1: raise KeyError("Ambiguous identifier")
289 if len(c) > 1: raise KeyError("Ambiguous identifier")
276 if len(c) < 1: raise KeyError("No match found")
290 if len(c) < 1: raise KeyError("No match found")
277 return c[0]
291 return c[0]
278
292
279 return None
293 return None
280
294
281 def diff(self, a, b):
295 def diff(self, a, b):
282 """return a delta between two revisions"""
296 """return a delta between two revisions"""
283 return mdiff.textdiff(a, b)
297 return mdiff.textdiff(a, b)
284
298
285 def patches(self, t, pl):
299 def patches(self, t, pl):
286 """apply a list of patches to a string"""
300 """apply a list of patches to a string"""
287 return mdiff.patches(t, pl)
301 return mdiff.patches(t, pl)
288
302
289 def delta(self, node):
303 def delta(self, node):
290 """return or calculate a delta between a node and its predecessor"""
304 """return or calculate a delta between a node and its predecessor"""
291 r = self.rev(node)
305 r = self.rev(node)
292 b = self.base(r)
306 b = self.base(r)
293 if r == b:
307 if r == b:
294 return self.diff(self.revision(self.node(r - 1)),
308 return self.diff(self.revision(self.node(r - 1)),
295 self.revision(node))
309 self.revision(node))
296 else:
310 else:
297 f = self.opener(self.datafile)
311 f = self.opener(self.datafile)
298 f.seek(self.start(r))
312 f.seek(self.start(r))
299 data = f.read(self.length(r))
313 data = f.read(self.length(r))
300 return decompress(data)
314 return decompress(data)
301
315
302 def revision(self, node):
316 def revision(self, node):
303 """return an uncompressed revision of a given"""
317 """return an uncompressed revision of a given"""
304 if node == nullid: return ""
318 if node == nullid: return ""
305 if self.cache and self.cache[0] == node: return self.cache[2]
319 if self.cache and self.cache[0] == node: return self.cache[2]
306
320
307 # look up what we need to read
321 # look up what we need to read
308 text = None
322 text = None
309 rev = self.rev(node)
323 rev = self.rev(node)
310 start, length, base, link, p1, p2, node = self.index[rev]
324 start, length, base, link, p1, p2, node = self.index[rev]
311 end = start + length
325 end = start + length
312 if base != rev: start = self.start(base)
326 if base != rev: start = self.start(base)
313
327
314 # do we have useful data cached?
328 # do we have useful data cached?
315 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
329 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
316 base = self.cache[1]
330 base = self.cache[1]
317 start = self.start(base + 1)
331 start = self.start(base + 1)
318 text = self.cache[2]
332 text = self.cache[2]
319 last = 0
333 last = 0
320
334
321 f = self.opener(self.datafile)
335 f = self.opener(self.datafile)
322 f.seek(start)
336 f.seek(start)
323 data = f.read(end - start)
337 data = f.read(end - start)
324
338
325 if text is None:
339 if text is None:
326 last = self.length(base)
340 last = self.length(base)
327 text = decompress(data[:last])
341 text = decompress(data[:last])
328
342
329 bins = []
343 bins = []
330 for r in xrange(base + 1, rev + 1):
344 for r in xrange(base + 1, rev + 1):
331 s = self.length(r)
345 s = self.length(r)
332 bins.append(decompress(data[last:last + s]))
346 bins.append(decompress(data[last:last + s]))
333 last = last + s
347 last = last + s
334
348
335 text = mdiff.patches(text, bins)
349 text = mdiff.patches(text, bins)
336
350
337 if node != hash(text, p1, p2):
351 if node != hash(text, p1, p2):
338 raise IOError("integrity check failed on %s:%d"
352 raise IOError("integrity check failed on %s:%d"
339 % (self.datafile, rev))
353 % (self.datafile, rev))
340
354
341 self.cache = (node, rev, text)
355 self.cache = (node, rev, text)
342 return text
356 return text
343
357
344 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
358 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
345 """add a revision to the log
359 """add a revision to the log
346
360
347 text - the revision data to add
361 text - the revision data to add
348 transaction - the transaction object used for rollback
362 transaction - the transaction object used for rollback
349 link - the linkrev data to add
363 link - the linkrev data to add
350 p1, p2 - the parent nodeids of the revision
364 p1, p2 - the parent nodeids of the revision
351 d - an optional precomputed delta
365 d - an optional precomputed delta
352 """
366 """
353 if text is None: text = ""
367 if text is None: text = ""
354 if p1 is None: p1 = self.tip()
368 if p1 is None: p1 = self.tip()
355 if p2 is None: p2 = nullid
369 if p2 is None: p2 = nullid
356
370
357 node = hash(text, p1, p2)
371 node = hash(text, p1, p2)
358
372
359 if node in self.nodemap:
373 if node in self.nodemap:
360 return node
374 return node
361
375
362 n = self.count()
376 n = self.count()
363 t = n - 1
377 t = n - 1
364
378
365 if n:
379 if n:
366 base = self.base(t)
380 base = self.base(t)
367 start = self.start(base)
381 start = self.start(base)
368 end = self.end(t)
382 end = self.end(t)
369 if not d:
383 if not d:
370 prev = self.revision(self.tip())
384 prev = self.revision(self.tip())
371 d = self.diff(prev, text)
385 d = self.diff(prev, text)
372 data = compress(d)
386 data = compress(d)
373 dist = end - start + len(data)
387 dist = end - start + len(data)
374
388
375 # full versions are inserted when the needed deltas
389 # full versions are inserted when the needed deltas
376 # become comparable to the uncompressed text
390 # become comparable to the uncompressed text
377 if not n or dist > len(text) * 2:
391 if not n or dist > len(text) * 2:
378 data = compress(text)
392 data = compress(text)
379 base = n
393 base = n
380 else:
394 else:
381 base = self.base(t)
395 base = self.base(t)
382
396
383 offset = 0
397 offset = 0
384 if t >= 0:
398 if t >= 0:
385 offset = self.end(t)
399 offset = self.end(t)
386
400
387 e = (offset, len(data), base, link, p1, p2, node)
401 e = (offset, len(data), base, link, p1, p2, node)
388
402
389 self.index.append(e)
403 self.index.append(e)
390 self.nodemap[node] = n
404 self.nodemap[node] = n
391 entry = struct.pack(indexformat, *e)
405 entry = struct.pack(indexformat, *e)
392
406
393 transaction.add(self.datafile, e[0])
407 transaction.add(self.datafile, e[0])
394 self.opener(self.datafile, "a").write(data)
408 self.opener(self.datafile, "a").write(data)
395 transaction.add(self.indexfile, n * len(entry))
409 transaction.add(self.indexfile, n * len(entry))
396 self.opener(self.indexfile, "a").write(entry)
410 self.opener(self.indexfile, "a").write(entry)
397
411
398 self.cache = (node, n, text)
412 self.cache = (node, n, text)
399 return node
413 return node
400
414
401 def ancestor(self, a, b):
415 def ancestor(self, a, b):
402 """calculate the least common ancestor of nodes a and b"""
416 """calculate the least common ancestor of nodes a and b"""
403 # calculate the distance of every node from root
417 # calculate the distance of every node from root
404 dist = {nullid: 0}
418 dist = {nullid: 0}
405 for i in xrange(self.count()):
419 for i in xrange(self.count()):
406 n = self.node(i)
420 n = self.node(i)
407 p1, p2 = self.parents(n)
421 p1, p2 = self.parents(n)
408 dist[n] = max(dist[p1], dist[p2]) + 1
422 dist[n] = max(dist[p1], dist[p2]) + 1
409
423
410 # traverse ancestors in order of decreasing distance from root
424 # traverse ancestors in order of decreasing distance from root
411 def ancestors(node):
425 def ancestors(node):
412 # we store negative distances because heap returns smallest member
426 # we store negative distances because heap returns smallest member
413 h = [(-dist[node], node)]
427 h = [(-dist[node], node)]
414 seen = {}
428 seen = {}
415 earliest = self.count()
429 earliest = self.count()
416 while h:
430 while h:
417 d, n = heapq.heappop(h)
431 d, n = heapq.heappop(h)
418 if n not in seen:
432 if n not in seen:
419 seen[n] = 1
433 seen[n] = 1
420 r = self.rev(n)
434 r = self.rev(n)
421 yield (-d, r, n)
435 yield (-d, r, n)
422 for p in self.parents(n):
436 for p in self.parents(n):
423 heapq.heappush(h, (-dist[p], p))
437 heapq.heappush(h, (-dist[p], p))
424
438
425 x = ancestors(a)
439 x = ancestors(a)
426 y = ancestors(b)
440 y = ancestors(b)
427 lx = x.next()
441 lx = x.next()
428 ly = y.next()
442 ly = y.next()
429
443
430 # increment each ancestor list until it is closer to root than
444 # increment each ancestor list until it is closer to root than
431 # the other, or they match
445 # the other, or they match
432 while 1:
446 while 1:
433 if lx == ly:
447 if lx == ly:
434 return lx[2]
448 return lx[2]
435 elif lx < ly:
449 elif lx < ly:
436 ly = y.next()
450 ly = y.next()
437 elif lx > ly:
451 elif lx > ly:
438 lx = x.next()
452 lx = x.next()
439
453
440 def group(self, linkmap):
454 def group(self, linkmap):
441 """calculate a delta group
455 """calculate a delta group
442
456
443 Given a list of changeset revs, return a set of deltas and
457 Given a list of changeset revs, return a set of deltas and
444 metadata corresponding to nodes. the first delta is
458 metadata corresponding to nodes. the first delta is
445 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
459 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
446 have this parent as it has all history before these
460 have this parent as it has all history before these
447 changesets. parent is parent[0]
461 changesets. parent is parent[0]
448 """
462 """
449 revs = []
463 revs = []
450 needed = {}
464 needed = {}
451
465
452 # find file nodes/revs that match changeset revs
466 # find file nodes/revs that match changeset revs
453 for i in xrange(0, self.count()):
467 for i in xrange(0, self.count()):
454 if self.index[i][3] in linkmap:
468 if self.index[i][3] in linkmap:
455 revs.append(i)
469 revs.append(i)
456 needed[i] = 1
470 needed[i] = 1
457
471
458 # if we don't have any revisions touched by these changesets, bail
472 # if we don't have any revisions touched by these changesets, bail
459 if not revs:
473 if not revs:
460 yield struct.pack(">l", 0)
474 yield struct.pack(">l", 0)
461 return
475 return
462
476
463 # add the parent of the first rev
477 # add the parent of the first rev
464 p = self.parents(self.node(revs[0]))[0]
478 p = self.parents(self.node(revs[0]))[0]
465 revs.insert(0, self.rev(p))
479 revs.insert(0, self.rev(p))
466
480
467 # for each delta that isn't contiguous in the log, we need to
481 # for each delta that isn't contiguous in the log, we need to
468 # reconstruct the base, reconstruct the result, and then
482 # reconstruct the base, reconstruct the result, and then
469 # calculate the delta. We also need to do this where we've
483 # calculate the delta. We also need to do this where we've
470 # stored a full version and not a delta
484 # stored a full version and not a delta
471 for i in xrange(0, len(revs) - 1):
485 for i in xrange(0, len(revs) - 1):
472 a, b = revs[i], revs[i + 1]
486 a, b = revs[i], revs[i + 1]
473 if a + 1 != b or self.base(b) == b:
487 if a + 1 != b or self.base(b) == b:
474 for j in xrange(self.base(a), a + 1):
488 for j in xrange(self.base(a), a + 1):
475 needed[j] = 1
489 needed[j] = 1
476 for j in xrange(self.base(b), b + 1):
490 for j in xrange(self.base(b), b + 1):
477 needed[j] = 1
491 needed[j] = 1
478
492
479 # calculate spans to retrieve from datafile
493 # calculate spans to retrieve from datafile
480 needed = needed.keys()
494 needed = needed.keys()
481 needed.sort()
495 needed.sort()
482 spans = []
496 spans = []
483 oo = -1
497 oo = -1
484 ol = 0
498 ol = 0
485 for n in needed:
499 for n in needed:
486 if n < 0: continue
500 if n < 0: continue
487 o = self.start(n)
501 o = self.start(n)
488 l = self.length(n)
502 l = self.length(n)
489 if oo + ol == o: # can we merge with the previous?
503 if oo + ol == o: # can we merge with the previous?
490 nl = spans[-1][2]
504 nl = spans[-1][2]
491 nl.append((n, l))
505 nl.append((n, l))
492 ol += l
506 ol += l
493 spans[-1] = (oo, ol, nl)
507 spans[-1] = (oo, ol, nl)
494 else:
508 else:
495 oo = o
509 oo = o
496 ol = l
510 ol = l
497 spans.append((oo, ol, [(n, l)]))
511 spans.append((oo, ol, [(n, l)]))
498
512
499 # read spans in, divide up chunks
513 # read spans in, divide up chunks
500 chunks = {}
514 chunks = {}
501 for span in spans:
515 for span in spans:
502 # we reopen the file for each span to make http happy for now
516 # we reopen the file for each span to make http happy for now
503 f = self.opener(self.datafile)
517 f = self.opener(self.datafile)
504 f.seek(span[0])
518 f.seek(span[0])
505 data = f.read(span[1])
519 data = f.read(span[1])
506
520
507 # divide up the span
521 # divide up the span
508 pos = 0
522 pos = 0
509 for r, l in span[2]:
523 for r, l in span[2]:
510 chunks[r] = decompress(data[pos: pos + l])
524 chunks[r] = decompress(data[pos: pos + l])
511 pos += l
525 pos += l
512
526
513 # helper to reconstruct intermediate versions
527 # helper to reconstruct intermediate versions
514 def construct(text, base, rev):
528 def construct(text, base, rev):
515 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
529 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
516 return mdiff.patches(text, bins)
530 return mdiff.patches(text, bins)
517
531
518 # build deltas
532 # build deltas
519 deltas = []
533 deltas = []
520 for d in xrange(0, len(revs) - 1):
534 for d in xrange(0, len(revs) - 1):
521 a, b = revs[d], revs[d + 1]
535 a, b = revs[d], revs[d + 1]
522 n = self.node(b)
536 n = self.node(b)
523
537
524 # do we need to construct a new delta?
538 # do we need to construct a new delta?
525 if a + 1 != b or self.base(b) == b:
539 if a + 1 != b or self.base(b) == b:
526 if a >= 0:
540 if a >= 0:
527 base = self.base(a)
541 base = self.base(a)
528 ta = chunks[self.base(a)]
542 ta = chunks[self.base(a)]
529 ta = construct(ta, base, a)
543 ta = construct(ta, base, a)
530 else:
544 else:
531 ta = ""
545 ta = ""
532
546
533 base = self.base(b)
547 base = self.base(b)
534 if a > base:
548 if a > base:
535 base = a
549 base = a
536 tb = ta
550 tb = ta
537 else:
551 else:
538 tb = chunks[self.base(b)]
552 tb = chunks[self.base(b)]
539 tb = construct(tb, base, b)
553 tb = construct(tb, base, b)
540 d = self.diff(ta, tb)
554 d = self.diff(ta, tb)
541 else:
555 else:
542 d = chunks[b]
556 d = chunks[b]
543
557
544 p = self.parents(n)
558 p = self.parents(n)
545 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
559 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
546 l = struct.pack(">l", len(meta) + len(d) + 4)
560 l = struct.pack(">l", len(meta) + len(d) + 4)
547 yield l
561 yield l
548 yield meta
562 yield meta
549 yield d
563 yield d
550
564
551 yield struct.pack(">l", 0)
565 yield struct.pack(">l", 0)
552
566
553 def addgroup(self, revs, linkmapper, transaction, unique=0):
567 def addgroup(self, revs, linkmapper, transaction, unique=0):
554 """
568 """
555 add a delta group
569 add a delta group
556
570
557 given a set of deltas, add them to the revision log. the
571 given a set of deltas, add them to the revision log. the
558 first delta is against its parent, which should be in our
572 first delta is against its parent, which should be in our
559 log, the rest are against the previous delta.
573 log, the rest are against the previous delta.
560 """
574 """
561
575
562 #track the base of the current delta log
576 #track the base of the current delta log
563 r = self.count()
577 r = self.count()
564 t = r - 1
578 t = r - 1
565 node = nullid
579 node = nullid
566
580
567 base = prev = -1
581 base = prev = -1
568 start = end = measure = 0
582 start = end = measure = 0
569 if r:
583 if r:
570 start = self.start(self.base(t))
584 start = self.start(self.base(t))
571 end = self.end(t)
585 end = self.end(t)
572 measure = self.length(self.base(t))
586 measure = self.length(self.base(t))
573 base = self.base(t)
587 base = self.base(t)
574 prev = self.tip()
588 prev = self.tip()
575
589
576 transaction.add(self.datafile, end)
590 transaction.add(self.datafile, end)
577 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
591 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
578 dfh = self.opener(self.datafile, "a")
592 dfh = self.opener(self.datafile, "a")
579 ifh = self.opener(self.indexfile, "a")
593 ifh = self.opener(self.indexfile, "a")
580
594
581 # loop through our set of deltas
595 # loop through our set of deltas
582 chain = None
596 chain = None
583 for chunk in revs:
597 for chunk in revs:
584 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
598 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
585 link = linkmapper(cs)
599 link = linkmapper(cs)
586 if node in self.nodemap:
600 if node in self.nodemap:
587 # this can happen if two branches make the same change
601 # this can happen if two branches make the same change
588 if unique:
602 if unique:
589 raise RevlogError("already have %s" % hex(node[:4]))
603 raise RevlogError("already have %s" % hex(node[:4]))
590 chain = node
604 chain = node
591 continue
605 continue
592 delta = chunk[80:]
606 delta = chunk[80:]
593
607
594 if not chain:
608 if not chain:
595 # retrieve the parent revision of the delta chain
609 # retrieve the parent revision of the delta chain
596 chain = p1
610 chain = p1
597 if not chain in self.nodemap:
611 if not chain in self.nodemap:
598 raise RevlogError("unknown base %s" % short(chain[:4]))
612 raise RevlogError("unknown base %s" % short(chain[:4]))
599
613
600 # full versions are inserted when the needed deltas become
614 # full versions are inserted when the needed deltas become
601 # comparable to the uncompressed text or when the previous
615 # comparable to the uncompressed text or when the previous
602 # version is not the one we have a delta against. We use
616 # version is not the one we have a delta against. We use
603 # the size of the previous full rev as a proxy for the
617 # the size of the previous full rev as a proxy for the
604 # current size.
618 # current size.
605
619
606 if chain == prev:
620 if chain == prev:
607 cdelta = compress(delta)
621 cdelta = compress(delta)
608
622
609 if chain != prev or (end - start + len(cdelta)) > measure * 2:
623 if chain != prev or (end - start + len(cdelta)) > measure * 2:
610 # flush our writes here so we can read it in revision
624 # flush our writes here so we can read it in revision
611 dfh.flush()
625 dfh.flush()
612 ifh.flush()
626 ifh.flush()
613 text = self.revision(chain)
627 text = self.revision(chain)
614 text = self.patches(text, [delta])
628 text = self.patches(text, [delta])
615 chk = self.addrevision(text, transaction, link, p1, p2)
629 chk = self.addrevision(text, transaction, link, p1, p2)
616 if chk != node:
630 if chk != node:
617 raise RevlogError("consistency error adding group")
631 raise RevlogError("consistency error adding group")
618 measure = len(text)
632 measure = len(text)
619 else:
633 else:
620 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
634 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
621 self.index.append(e)
635 self.index.append(e)
622 self.nodemap[node] = r
636 self.nodemap[node] = r
623 dfh.write(cdelta)
637 dfh.write(cdelta)
624 ifh.write(struct.pack(indexformat, *e))
638 ifh.write(struct.pack(indexformat, *e))
625
639
626 t, r, chain, prev = r, r + 1, node, node
640 t, r, chain, prev = r, r + 1, node, node
627 start = self.start(self.base(t))
641 start = self.start(self.base(t))
628 end = self.end(t)
642 end = self.end(t)
629
643
630 dfh.close()
644 dfh.close()
631 ifh.close()
645 ifh.close()
632 return node
646 return node
General Comments 0
You need to be logged in to leave comments. Login now