##// END OF EJS Templates
strip: make repair.strip transactional to avoid repository corruption...
Henrik Stuart -
r8073:e8a28556 default
parent child Browse files
Show More
@@ -0,0 +1,36 b''
1 #!/bin/sh
2
3 echo "[extensions]" >> $HGRCPATH
4 echo "mq=">> $HGRCPATH
5
6 teststrip() {
7 hg -q up -C $1
8 echo % before update $1, strip $2
9 hg parents
10 chmod -$3 $4
11 hg strip $2 2>&1 | sed 's/\(saving bundle to \).*/\1/' | sed 's/Permission denied.*\.hg\/store\/\(.*\)/Permission denied \.hg\/store\/\1/'
12 echo % after update $1, strip $2
13 chmod +$3 $4
14 hg verify
15 echo % journal contents
16 cat .hg/store/journal | sed 's/\.i[^\n]*/\.i/'
17 ls .hg/store/journal >/dev/null 2>&1 && hg recover
18 ls .hg/strip-backup/* >/dev/null 2>&1 && hg unbundle -q .hg/strip-backup/*
19 rm -rf .hg/strip-backup
20 }
21
22 hg init test
23 cd test
24
25 echo a > a
26 hg -q ci -m "a" -A
27
28 echo b > b
29 hg -q ci -m "b" -A
30
31 echo c > c
32 hg -q ci -m "c" -A
33
34 teststrip 0 1 w .hg/store/data/b.i
35 teststrip 0 1 r .hg/store/data/b.i
36 teststrip 0 1 w .hg/store/00changelog.i
@@ -0,0 +1,83 b''
1 % before update 0, strip 1
2 changeset: 0:cb9a9f314b8b
3 user: test
4 date: Thu Jan 01 00:00:00 1970 +0000
5 summary: a
6
7 saving bundle to
8 transaction abort!
9 failed to truncate data/b.i
10 rollback failed - please run hg recover
11 abort: Permission denied .hg/store/data/b.i
12 % after update 0, strip 1
13 checking changesets
14 checking manifests
15 crosschecking files in changesets and manifests
16 checking files
17 warning: orphan revlog 'data/b.i'
18 1 files, 1 changesets, 1 total revisions
19 1 warnings encountered!
20 % journal contents
21 00changelog.i
22 00manifest.i
23 data/b.i
24 data/c.i
25 rolling back interrupted transaction
26 checking changesets
27 checking manifests
28 crosschecking files in changesets and manifests
29 checking files
30 1 files, 1 changesets, 1 total revisions
31 % before update 0, strip 1
32 changeset: 0:cb9a9f314b8b
33 user: test
34 date: Thu Jan 01 00:00:00 1970 +0000
35 summary: a
36
37 abort: Permission denied .hg/store/data/b.i
38 % after update 0, strip 1
39 checking changesets
40 checking manifests
41 crosschecking files in changesets and manifests
42 checking files
43 3 files, 3 changesets, 3 total revisions
44 % journal contents
45 cat: .hg/store/journal: No such file or directory
46 % before update 0, strip 1
47 changeset: 0:cb9a9f314b8b
48 user: test
49 date: Thu Jan 01 00:00:00 1970 +0000
50 summary: a
51
52 saving bundle to
53 transaction abort!
54 failed to truncate 00changelog.i
55 rollback failed - please run hg recover
56 abort: Permission denied .hg/store/00changelog.i
57 % after update 0, strip 1
58 checking changesets
59 checking manifests
60 crosschecking files in changesets and manifests
61 1: changeset refers to unknown manifest a539ce0c1a22
62 2: changeset refers to unknown manifest e3738bf54399
63 b@1: in changeset but not in manifest
64 c@2: in changeset but not in manifest
65 checking files
66 data/b.i@1: missing revlog!
67 0: empty or missing b
68 data/c.i@2: missing revlog!
69 0: empty or missing c
70 3 files, 3 changesets, 1 total revisions
71 8 integrity errors encountered!
72 (first damaged changeset appears to be 0)
73 % journal contents
74 00changelog.i
75 00manifest.i
76 data/b.i
77 data/c.i
78 rolling back interrupted transaction
79 checking changesets
80 checking manifests
81 crosschecking files in changesets and manifests
82 checking files
83 1 files, 1 changesets, 1 total revisions
@@ -1,135 +1,149 b''
1 1 # repair.py - functions for repository repair for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 4 # Copyright 2007 Matt Mackall
5 5 #
6 6 # This software may be used and distributed according to the terms
7 7 # of the GNU General Public License, incorporated herein by reference.
8 8
9 9 import changegroup, os
10 10 from node import nullrev, short
11 11 from i18n import _
12 12
13 13 def _bundle(repo, bases, heads, node, suffix, extranodes=None):
14 14 """create a bundle with the specified revisions as a backup"""
15 15 cg = repo.changegroupsubset(bases, heads, 'strip', extranodes)
16 16 backupdir = repo.join("strip-backup")
17 17 if not os.path.isdir(backupdir):
18 18 os.mkdir(backupdir)
19 19 name = os.path.join(backupdir, "%s-%s" % (short(node), suffix))
20 20 repo.ui.warn(_("saving bundle to %s\n") % name)
21 21 return changegroup.writebundle(cg, name, "HG10BZ")
22 22
23 23 def _collectfiles(repo, striprev):
24 24 """find out the filelogs affected by the strip"""
25 25 files = {}
26 26
27 27 for x in xrange(striprev, len(repo)):
28 28 for name in repo[x].files():
29 29 if name in files:
30 30 continue
31 31 files[name] = 1
32 32
33 33 files = files.keys()
34 34 files.sort()
35 35 return files
36 36
37 37 def _collectextranodes(repo, files, link):
38 38 """return the nodes that have to be saved before the strip"""
39 39 def collectone(revlog):
40 40 extra = []
41 41 startrev = count = len(revlog)
42 42 # find the truncation point of the revlog
43 43 for i in xrange(0, count):
44 44 lrev = revlog.linkrev(i)
45 45 if lrev >= link:
46 46 startrev = i + 1
47 47 break
48 48
49 49 # see if any revision after that point has a linkrev less than link
50 50 # (we have to manually save these guys)
51 51 for i in xrange(startrev, count):
52 52 node = revlog.node(i)
53 53 lrev = revlog.linkrev(i)
54 54 if lrev < link:
55 55 extra.append((node, cl.node(lrev)))
56 56
57 57 return extra
58 58
59 59 extranodes = {}
60 60 cl = repo.changelog
61 61 extra = collectone(repo.manifest)
62 62 if extra:
63 63 extranodes[1] = extra
64 64 for fname in files:
65 65 f = repo.file(fname)
66 66 extra = collectone(f)
67 67 if extra:
68 68 extranodes[fname] = extra
69 69
70 70 return extranodes
71 71
72 72 def strip(ui, repo, node, backup="all"):
73 73 cl = repo.changelog
74 74 # TODO delete the undo files, and handle undo of merge sets
75 75 striprev = cl.rev(node)
76 76
77 77 # Some revisions with rev > striprev may not be descendants of striprev.
78 78 # We have to find these revisions and put them in a bundle, so that
79 79 # we can restore them after the truncations.
80 80 # To create the bundle we use repo.changegroupsubset which requires
81 81 # the list of heads and bases of the set of interesting revisions.
82 82 # (head = revision in the set that has no descendant in the set;
83 83 # base = revision in the set that has no ancestor in the set)
84 84 tostrip = {striprev: 1}
85 85 saveheads = {}
86 86 savebases = []
87 87 for r in xrange(striprev + 1, len(cl)):
88 88 parents = cl.parentrevs(r)
89 89 if parents[0] in tostrip or parents[1] in tostrip:
90 90 # r is a descendant of striprev
91 91 tostrip[r] = 1
92 92 # if this is a merge and one of the parents does not descend
93 93 # from striprev, mark that parent as a savehead.
94 94 if parents[1] != nullrev:
95 95 for p in parents:
96 96 if p not in tostrip and p > striprev:
97 97 saveheads[p] = 1
98 98 else:
99 99 # if no parents of this revision will be stripped, mark it as
100 100 # a savebase
101 101 if parents[0] < striprev and parents[1] < striprev:
102 102 savebases.append(cl.node(r))
103 103
104 104 for p in parents:
105 105 if p in saveheads:
106 106 del saveheads[p]
107 107 saveheads[r] = 1
108 108
109 109 saveheads = [cl.node(r) for r in saveheads]
110 110 files = _collectfiles(repo, striprev)
111 111
112 112 extranodes = _collectextranodes(repo, files, striprev)
113 113
114 114 # create a changegroup for all the branches we need to keep
115 115 if backup == "all":
116 116 _bundle(repo, [node], cl.heads(), node, 'backup')
117 117 if saveheads or extranodes:
118 118 chgrpfile = _bundle(repo, savebases, saveheads, node, 'temp',
119 119 extranodes)
120 120
121 cl.strip(striprev)
122 repo.manifest.strip(striprev)
123 for name in files:
124 f = repo.file(name)
125 f.strip(striprev)
121 fs = [repo.file(name) for name in files]
122 mfst = repo.manifest
123
124 tr = repo.transaction()
125 offset = len(tr.entries)
126
127 cl.strip(striprev, tr)
128 mfst.strip(striprev, tr)
129 for f in fs:
130 f.strip(striprev, tr)
131
132 try:
133 for i in xrange(offset, len(tr.entries)):
134 file, troffset, ignore = tr.entries[i]
135 repo.sopener(file, 'a').truncate(troffset)
136 tr.close()
137 except:
138 tr.abort()
139 raise
126 140
127 141 if saveheads or extranodes:
128 142 ui.status(_("adding branch\n"))
129 143 f = open(chgrpfile, "rb")
130 144 gen = changegroup.readbundle(f, chgrpfile)
131 145 repo.addchangegroup(gen, 'strip', 'bundle:' + chgrpfile, True)
132 146 f.close()
133 147 if backup != "strip":
134 148 os.unlink(chgrpfile)
135 149
@@ -1,1373 +1,1371 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 # import stuff from node for others to import from revlog
14 14 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 15 from i18n import _
16 16 import changegroup, errno, ancestor, mdiff, parsers
17 17 import struct, util, zlib, error
18 18
19 19 _pack = struct.pack
20 20 _unpack = struct.unpack
21 21 _compress = zlib.compress
22 22 _decompress = zlib.decompress
23 23 _sha = util.sha1
24 24
25 25 # revlog flags
26 26 REVLOGV0 = 0
27 27 REVLOGNG = 1
28 28 REVLOGNGINLINEDATA = (1 << 16)
29 29 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 30 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 32
33 33 RevlogError = error.RevlogError
34 34 LookupError = error.LookupError
35 35
36 36 def getoffset(q):
37 37 return int(q >> 16)
38 38
39 39 def gettype(q):
40 40 return int(q & 0xFFFF)
41 41
42 42 def offset_type(offset, type):
43 43 return long(long(offset) << 16 | type)
44 44
45 45 nullhash = _sha(nullid)
46 46
47 47 def hash(text, p1, p2):
48 48 """generate a hash from the given text and its parent hashes
49 49
50 50 This hash combines both the current file contents and its history
51 51 in a manner that makes it easy to distinguish nodes with the same
52 52 content in the revision graph.
53 53 """
54 54 # As of now, if one of the parent node is null, p2 is null
55 55 if p2 == nullid:
56 56 # deep copy of a hash is faster than creating one
57 57 s = nullhash.copy()
58 58 s.update(p1)
59 59 else:
60 60 # none of the parent nodes are nullid
61 61 l = [p1, p2]
62 62 l.sort()
63 63 s = _sha(l[0])
64 64 s.update(l[1])
65 65 s.update(text)
66 66 return s.digest()
67 67
68 68 def compress(text):
69 69 """ generate a possibly-compressed representation of text """
70 70 if not text:
71 71 return ("", text)
72 72 l = len(text)
73 73 bin = None
74 74 if l < 44:
75 75 pass
76 76 elif l > 1000000:
77 77 # zlib makes an internal copy, thus doubling memory usage for
78 78 # large files, so lets do this in pieces
79 79 z = zlib.compressobj()
80 80 p = []
81 81 pos = 0
82 82 while pos < l:
83 83 pos2 = pos + 2**20
84 84 p.append(z.compress(text[pos:pos2]))
85 85 pos = pos2
86 86 p.append(z.flush())
87 87 if sum(map(len, p)) < l:
88 88 bin = "".join(p)
89 89 else:
90 90 bin = _compress(text)
91 91 if bin is None or len(bin) > l:
92 92 if text[0] == '\0':
93 93 return ("", text)
94 94 return ('u', text)
95 95 return ("", bin)
96 96
97 97 def decompress(bin):
98 98 """ decompress the given input """
99 99 if not bin:
100 100 return bin
101 101 t = bin[0]
102 102 if t == '\0':
103 103 return bin
104 104 if t == 'x':
105 105 return _decompress(bin)
106 106 if t == 'u':
107 107 return bin[1:]
108 108 raise RevlogError(_("unknown compression type %r") % t)
109 109
110 110 class lazyparser(object):
111 111 """
112 112 this class avoids the need to parse the entirety of large indices
113 113 """
114 114
115 115 # lazyparser is not safe to use on windows if win32 extensions not
116 116 # available. it keeps file handle open, which make it not possible
117 117 # to break hardlinks on local cloned repos.
118 118
119 119 def __init__(self, dataf, size):
120 120 self.dataf = dataf
121 121 self.s = struct.calcsize(indexformatng)
122 122 self.datasize = size
123 123 self.l = size/self.s
124 124 self.index = [None] * self.l
125 125 self.map = {nullid: nullrev}
126 126 self.allmap = 0
127 127 self.all = 0
128 128 self.mapfind_count = 0
129 129
130 130 def loadmap(self):
131 131 """
132 132 during a commit, we need to make sure the rev being added is
133 133 not a duplicate. This requires loading the entire index,
134 134 which is fairly slow. loadmap can load up just the node map,
135 135 which takes much less time.
136 136 """
137 137 if self.allmap:
138 138 return
139 139 end = self.datasize
140 140 self.allmap = 1
141 141 cur = 0
142 142 count = 0
143 143 blocksize = self.s * 256
144 144 self.dataf.seek(0)
145 145 while cur < end:
146 146 data = self.dataf.read(blocksize)
147 147 off = 0
148 148 for x in xrange(256):
149 149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
150 150 self.map[n] = count
151 151 count += 1
152 152 if count >= self.l:
153 153 break
154 154 off += self.s
155 155 cur += blocksize
156 156
157 157 def loadblock(self, blockstart, blocksize, data=None):
158 158 if self.all:
159 159 return
160 160 if data is None:
161 161 self.dataf.seek(blockstart)
162 162 if blockstart + blocksize > self.datasize:
163 163 # the revlog may have grown since we've started running,
164 164 # but we don't have space in self.index for more entries.
165 165 # limit blocksize so that we don't get too much data.
166 166 blocksize = max(self.datasize - blockstart, 0)
167 167 data = self.dataf.read(blocksize)
168 168 lend = len(data) / self.s
169 169 i = blockstart / self.s
170 170 off = 0
171 171 # lazyindex supports __delitem__
172 172 if lend > len(self.index) - i:
173 173 lend = len(self.index) - i
174 174 for x in xrange(lend):
175 175 if self.index[i + x] == None:
176 176 b = data[off : off + self.s]
177 177 self.index[i + x] = b
178 178 n = b[ngshaoffset:ngshaoffset + 20]
179 179 self.map[n] = i + x
180 180 off += self.s
181 181
182 182 def findnode(self, node):
183 183 """search backwards through the index file for a specific node"""
184 184 if self.allmap:
185 185 return None
186 186
187 187 # hg log will cause many many searches for the manifest
188 188 # nodes. After we get called a few times, just load the whole
189 189 # thing.
190 190 if self.mapfind_count > 8:
191 191 self.loadmap()
192 192 if node in self.map:
193 193 return node
194 194 return None
195 195 self.mapfind_count += 1
196 196 last = self.l - 1
197 197 while self.index[last] != None:
198 198 if last == 0:
199 199 self.all = 1
200 200 self.allmap = 1
201 201 return None
202 202 last -= 1
203 203 end = (last + 1) * self.s
204 204 blocksize = self.s * 256
205 205 while end >= 0:
206 206 start = max(end - blocksize, 0)
207 207 self.dataf.seek(start)
208 208 data = self.dataf.read(end - start)
209 209 findend = end - start
210 210 while True:
211 211 # we're searching backwards, so we have to make sure
212 212 # we don't find a changeset where this node is a parent
213 213 off = data.find(node, 0, findend)
214 214 findend = off
215 215 if off >= 0:
216 216 i = off / self.s
217 217 off = i * self.s
218 218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
219 219 if n == node:
220 220 self.map[n] = i + start / self.s
221 221 return node
222 222 else:
223 223 break
224 224 end -= blocksize
225 225 return None
226 226
227 227 def loadindex(self, i=None, end=None):
228 228 if self.all:
229 229 return
230 230 all = False
231 231 if i == None:
232 232 blockstart = 0
233 233 blocksize = (65536 / self.s) * self.s
234 234 end = self.datasize
235 235 all = True
236 236 else:
237 237 if end:
238 238 blockstart = i * self.s
239 239 end = end * self.s
240 240 blocksize = end - blockstart
241 241 else:
242 242 blockstart = (i & ~1023) * self.s
243 243 blocksize = self.s * 1024
244 244 end = blockstart + blocksize
245 245 while blockstart < end:
246 246 self.loadblock(blockstart, blocksize)
247 247 blockstart += blocksize
248 248 if all:
249 249 self.all = True
250 250
251 251 class lazyindex(object):
252 252 """a lazy version of the index array"""
253 253 def __init__(self, parser):
254 254 self.p = parser
255 255 def __len__(self):
256 256 return len(self.p.index)
257 257 def load(self, pos):
258 258 if pos < 0:
259 259 pos += len(self.p.index)
260 260 self.p.loadindex(pos)
261 261 return self.p.index[pos]
262 262 def __getitem__(self, pos):
263 263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
264 264 def __setitem__(self, pos, item):
265 265 self.p.index[pos] = _pack(indexformatng, *item)
266 266 def __delitem__(self, pos):
267 267 del self.p.index[pos]
268 268 def insert(self, pos, e):
269 269 self.p.index.insert(pos, _pack(indexformatng, *e))
270 270 def append(self, e):
271 271 self.p.index.append(_pack(indexformatng, *e))
272 272
273 273 class lazymap(object):
274 274 """a lazy version of the node map"""
275 275 def __init__(self, parser):
276 276 self.p = parser
277 277 def load(self, key):
278 278 n = self.p.findnode(key)
279 279 if n == None:
280 280 raise KeyError(key)
281 281 def __contains__(self, key):
282 282 if key in self.p.map:
283 283 return True
284 284 self.p.loadmap()
285 285 return key in self.p.map
286 286 def __iter__(self):
287 287 yield nullid
288 288 for i in xrange(self.p.l):
289 289 ret = self.p.index[i]
290 290 if not ret:
291 291 self.p.loadindex(i)
292 292 ret = self.p.index[i]
293 293 if isinstance(ret, str):
294 294 ret = _unpack(indexformatng, ret)
295 295 yield ret[7]
296 296 def __getitem__(self, key):
297 297 try:
298 298 return self.p.map[key]
299 299 except KeyError:
300 300 try:
301 301 self.load(key)
302 302 return self.p.map[key]
303 303 except KeyError:
304 304 raise KeyError("node " + hex(key))
305 305 def __setitem__(self, key, val):
306 306 self.p.map[key] = val
307 307 def __delitem__(self, key):
308 308 del self.p.map[key]
309 309
310 310 indexformatv0 = ">4l20s20s20s"
311 311 v0shaoffset = 56
312 312
313 313 class revlogoldio(object):
314 314 def __init__(self):
315 315 self.size = struct.calcsize(indexformatv0)
316 316
317 317 def parseindex(self, fp, inline):
318 318 s = self.size
319 319 index = []
320 320 nodemap = {nullid: nullrev}
321 321 n = off = 0
322 322 data = fp.read()
323 323 l = len(data)
324 324 while off + s <= l:
325 325 cur = data[off:off + s]
326 326 off += s
327 327 e = _unpack(indexformatv0, cur)
328 328 # transform to revlogv1 format
329 329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
330 330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
331 331 index.append(e2)
332 332 nodemap[e[6]] = n
333 333 n += 1
334 334
335 335 return index, nodemap, None
336 336
337 337 def packentry(self, entry, node, version, rev):
338 338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
339 339 node(entry[5]), node(entry[6]), entry[7])
340 340 return _pack(indexformatv0, *e2)
341 341
342 342 # index ng:
343 343 # 6 bytes offset
344 344 # 2 bytes flags
345 345 # 4 bytes compressed length
346 346 # 4 bytes uncompressed length
347 347 # 4 bytes: base rev
348 348 # 4 bytes link rev
349 349 # 4 bytes parent 1 rev
350 350 # 4 bytes parent 2 rev
351 351 # 32 bytes: nodeid
352 352 indexformatng = ">Qiiiiii20s12x"
353 353 ngshaoffset = 32
354 354 versionformat = ">I"
355 355
356 356 class revlogio(object):
357 357 def __init__(self):
358 358 self.size = struct.calcsize(indexformatng)
359 359
360 360 def parseindex(self, fp, inline):
361 361 try:
362 362 size = util.fstat(fp).st_size
363 363 except AttributeError:
364 364 size = 0
365 365
366 366 if util.openhardlinks() and not inline and size > 1000000:
367 367 # big index, let's parse it on demand
368 368 parser = lazyparser(fp, size)
369 369 index = lazyindex(parser)
370 370 nodemap = lazymap(parser)
371 371 e = list(index[0])
372 372 type = gettype(e[0])
373 373 e[0] = offset_type(0, type)
374 374 index[0] = e
375 375 return index, nodemap, None
376 376
377 377 data = fp.read()
378 378 # call the C implementation to parse the index data
379 379 index, nodemap, cache = parsers.parse_index(data, inline)
380 380 return index, nodemap, cache
381 381
382 382 def packentry(self, entry, node, version, rev):
383 383 p = _pack(indexformatng, *entry)
384 384 if rev == 0:
385 385 p = _pack(versionformat, version) + p[4:]
386 386 return p
387 387
388 388 class revlog(object):
389 389 """
390 390 the underlying revision storage object
391 391
392 392 A revlog consists of two parts, an index and the revision data.
393 393
394 394 The index is a file with a fixed record size containing
395 395 information on each revision, including its nodeid (hash), the
396 396 nodeids of its parents, the position and offset of its data within
397 397 the data file, and the revision it's based on. Finally, each entry
398 398 contains a linkrev entry that can serve as a pointer to external
399 399 data.
400 400
401 401 The revision data itself is a linear collection of data chunks.
402 402 Each chunk represents a revision and is usually represented as a
403 403 delta against the previous chunk. To bound lookup time, runs of
404 404 deltas are limited to about 2 times the length of the original
405 405 version data. This makes retrieval of a version proportional to
406 406 its size, or O(1) relative to the number of revisions.
407 407
408 408 Both pieces of the revlog are written to in an append-only
409 409 fashion, which means we never need to rewrite a file to insert or
410 410 remove data, and can use some simple techniques to avoid the need
411 411 for locking while reading.
412 412 """
413 413 def __init__(self, opener, indexfile):
414 414 """
415 415 create a revlog object
416 416
417 417 opener is a function that abstracts the file opening operation
418 418 and can be used to implement COW semantics or the like.
419 419 """
420 420 self.indexfile = indexfile
421 421 self.datafile = indexfile[:-2] + ".d"
422 422 self.opener = opener
423 423 self._cache = None
424 424 self._chunkcache = None
425 425 self.nodemap = {nullid: nullrev}
426 426 self.index = []
427 427
428 428 v = REVLOG_DEFAULT_VERSION
429 429 if hasattr(opener, "defversion"):
430 430 v = opener.defversion
431 431 if v & REVLOGNG:
432 432 v |= REVLOGNGINLINEDATA
433 433
434 434 i = ""
435 435 try:
436 436 f = self.opener(self.indexfile)
437 437 i = f.read(4)
438 438 f.seek(0)
439 439 if len(i) > 0:
440 440 v = struct.unpack(versionformat, i)[0]
441 441 except IOError, inst:
442 442 if inst.errno != errno.ENOENT:
443 443 raise
444 444
445 445 self.version = v
446 446 self._inline = v & REVLOGNGINLINEDATA
447 447 flags = v & ~0xFFFF
448 448 fmt = v & 0xFFFF
449 449 if fmt == REVLOGV0 and flags:
450 450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
451 451 % (self.indexfile, flags >> 16))
452 452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
453 453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
454 454 % (self.indexfile, flags >> 16))
455 455 elif fmt > REVLOGNG:
456 456 raise RevlogError(_("index %s unknown format %d")
457 457 % (self.indexfile, fmt))
458 458
459 459 self._io = revlogio()
460 460 if self.version == REVLOGV0:
461 461 self._io = revlogoldio()
462 462 if i:
463 463 try:
464 464 d = self._io.parseindex(f, self._inline)
465 465 except (ValueError, IndexError), e:
466 466 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
467 467 self.index, self.nodemap, self._chunkcache = d
468 468
469 469 # add the magic null revision at -1 (if it hasn't been done already)
470 470 if (self.index == [] or isinstance(self.index, lazyindex) or
471 471 self.index[-1][7] != nullid) :
472 472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
473 473
474 474 def _loadindex(self, start, end):
475 475 """load a block of indexes all at once from the lazy parser"""
476 476 if isinstance(self.index, lazyindex):
477 477 self.index.p.loadindex(start, end)
478 478
479 479 def _loadindexmap(self):
480 480 """loads both the map and the index from the lazy parser"""
481 481 if isinstance(self.index, lazyindex):
482 482 p = self.index.p
483 483 p.loadindex()
484 484 self.nodemap = p.map
485 485
486 486 def _loadmap(self):
487 487 """loads the map from the lazy parser"""
488 488 if isinstance(self.nodemap, lazymap):
489 489 self.nodemap.p.loadmap()
490 490 self.nodemap = self.nodemap.p.map
491 491
492 492 def tip(self):
493 493 return self.node(len(self.index) - 2)
494 494 def __len__(self):
495 495 return len(self.index) - 1
496 496 def __iter__(self):
497 497 for i in xrange(len(self)):
498 498 yield i
499 499 def rev(self, node):
500 500 try:
501 501 return self.nodemap[node]
502 502 except KeyError:
503 503 raise LookupError(node, self.indexfile, _('no node'))
504 504 def node(self, rev):
505 505 return self.index[rev][7]
506 506 def linkrev(self, rev):
507 507 return self.index[rev][4]
508 508 def parents(self, node):
509 509 i = self.index
510 510 d = i[self.rev(node)]
511 511 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
512 512 def parentrevs(self, rev):
513 513 return self.index[rev][5:7]
514 514 def start(self, rev):
515 515 return int(self.index[rev][0] >> 16)
516 516 def end(self, rev):
517 517 return self.start(rev) + self.length(rev)
518 518 def length(self, rev):
519 519 return self.index[rev][1]
520 520 def base(self, rev):
521 521 return self.index[rev][3]
522 522
523 523 def size(self, rev):
524 524 """return the length of the uncompressed text for a given revision"""
525 525 l = self.index[rev][2]
526 526 if l >= 0:
527 527 return l
528 528
529 529 t = self.revision(self.node(rev))
530 530 return len(t)
531 531
532 532 # alternate implementation, The advantage to this code is it
533 533 # will be faster for a single revision. But, the results are not
534 534 # cached, so finding the size of every revision will be slower.
535 535 """
536 536 if self.cache and self.cache[1] == rev:
537 537 return len(self.cache[2])
538 538
539 539 base = self.base(rev)
540 540 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
541 541 base = self.cache[1]
542 542 text = self.cache[2]
543 543 else:
544 544 text = self.revision(self.node(base))
545 545
546 546 l = len(text)
547 547 for x in xrange(base + 1, rev + 1):
548 548 l = mdiff.patchedsize(l, self.chunk(x))
549 549 return l
550 550 """
551 551
552 552 def reachable(self, node, stop=None):
553 553 """return a hash of all nodes ancestral to a given node, including
554 554 the node itself, stopping when stop is matched"""
555 555 reachable = {}
556 556 visit = [node]
557 557 reachable[node] = 1
558 558 if stop:
559 559 stopn = self.rev(stop)
560 560 else:
561 561 stopn = 0
562 562 while visit:
563 563 n = visit.pop(0)
564 564 if n == stop:
565 565 continue
566 566 if n == nullid:
567 567 continue
568 568 for p in self.parents(n):
569 569 if self.rev(p) < stopn:
570 570 continue
571 571 if p not in reachable:
572 572 reachable[p] = 1
573 573 visit.append(p)
574 574 return reachable
575 575
576 576 def ancestors(self, *revs):
577 577 'Generate the ancestors of revs using a breadth-first visit'
578 578 visit = list(revs)
579 579 seen = util.set([nullrev])
580 580 while visit:
581 581 for parent in self.parentrevs(visit.pop(0)):
582 582 if parent not in seen:
583 583 visit.append(parent)
584 584 seen.add(parent)
585 585 yield parent
586 586
587 587 def descendants(self, *revs):
588 588 'Generate the descendants of revs in topological order'
589 589 seen = util.set(revs)
590 590 for i in xrange(min(revs) + 1, len(self)):
591 591 for x in self.parentrevs(i):
592 592 if x != nullrev and x in seen:
593 593 seen.add(i)
594 594 yield i
595 595 break
596 596
597 597 def findmissing(self, common=None, heads=None):
598 598 '''
599 599 returns the topologically sorted list of nodes from the set:
600 600 missing = (ancestors(heads) \ ancestors(common))
601 601
602 602 where ancestors() is the set of ancestors from heads, heads included
603 603
604 604 if heads is None, the heads of the revlog are used
605 605 if common is None, nullid is assumed to be a common node
606 606 '''
607 607 if common is None:
608 608 common = [nullid]
609 609 if heads is None:
610 610 heads = self.heads()
611 611
612 612 common = [self.rev(n) for n in common]
613 613 heads = [self.rev(n) for n in heads]
614 614
615 615 # we want the ancestors, but inclusive
616 616 has = dict.fromkeys(self.ancestors(*common))
617 617 has[nullrev] = None
618 618 for r in common:
619 619 has[r] = None
620 620
621 621 # take all ancestors from heads that aren't in has
622 622 missing = {}
623 623 visit = [r for r in heads if r not in has]
624 624 while visit:
625 625 r = visit.pop(0)
626 626 if r in missing:
627 627 continue
628 628 else:
629 629 missing[r] = None
630 630 for p in self.parentrevs(r):
631 631 if p not in has:
632 632 visit.append(p)
633 633 missing = missing.keys()
634 634 missing.sort()
635 635 return [self.node(r) for r in missing]
636 636
637 637 def nodesbetween(self, roots=None, heads=None):
638 638 """Return a tuple containing three elements. Elements 1 and 2 contain
639 639 a final list bases and heads after all the unreachable ones have been
640 640 pruned. Element 0 contains a topologically sorted list of all
641 641
642 642 nodes that satisfy these constraints:
643 643 1. All nodes must be descended from a node in roots (the nodes on
644 644 roots are considered descended from themselves).
645 645 2. All nodes must also be ancestors of a node in heads (the nodes in
646 646 heads are considered to be their own ancestors).
647 647
648 648 If roots is unspecified, nullid is assumed as the only root.
649 649 If heads is unspecified, it is taken to be the output of the
650 650 heads method (i.e. a list of all nodes in the repository that
651 651 have no children)."""
652 652 nonodes = ([], [], [])
653 653 if roots is not None:
654 654 roots = list(roots)
655 655 if not roots:
656 656 return nonodes
657 657 lowestrev = min([self.rev(n) for n in roots])
658 658 else:
659 659 roots = [nullid] # Everybody's a descendent of nullid
660 660 lowestrev = nullrev
661 661 if (lowestrev == nullrev) and (heads is None):
662 662 # We want _all_ the nodes!
663 663 return ([self.node(r) for r in self], [nullid], list(self.heads()))
664 664 if heads is None:
665 665 # All nodes are ancestors, so the latest ancestor is the last
666 666 # node.
667 667 highestrev = len(self) - 1
668 668 # Set ancestors to None to signal that every node is an ancestor.
669 669 ancestors = None
670 670 # Set heads to an empty dictionary for later discovery of heads
671 671 heads = {}
672 672 else:
673 673 heads = list(heads)
674 674 if not heads:
675 675 return nonodes
676 676 ancestors = {}
677 677 # Turn heads into a dictionary so we can remove 'fake' heads.
678 678 # Also, later we will be using it to filter out the heads we can't
679 679 # find from roots.
680 680 heads = dict.fromkeys(heads, 0)
681 681 # Start at the top and keep marking parents until we're done.
682 682 nodestotag = heads.keys()
683 683 # Remember where the top was so we can use it as a limit later.
684 684 highestrev = max([self.rev(n) for n in nodestotag])
685 685 while nodestotag:
686 686 # grab a node to tag
687 687 n = nodestotag.pop()
688 688 # Never tag nullid
689 689 if n == nullid:
690 690 continue
691 691 # A node's revision number represents its place in a
692 692 # topologically sorted list of nodes.
693 693 r = self.rev(n)
694 694 if r >= lowestrev:
695 695 if n not in ancestors:
696 696 # If we are possibly a descendent of one of the roots
697 697 # and we haven't already been marked as an ancestor
698 698 ancestors[n] = 1 # Mark as ancestor
699 699 # Add non-nullid parents to list of nodes to tag.
700 700 nodestotag.extend([p for p in self.parents(n) if
701 701 p != nullid])
702 702 elif n in heads: # We've seen it before, is it a fake head?
703 703 # So it is, real heads should not be the ancestors of
704 704 # any other heads.
705 705 heads.pop(n)
706 706 if not ancestors:
707 707 return nonodes
708 708 # Now that we have our set of ancestors, we want to remove any
709 709 # roots that are not ancestors.
710 710
711 711 # If one of the roots was nullid, everything is included anyway.
712 712 if lowestrev > nullrev:
713 713 # But, since we weren't, let's recompute the lowest rev to not
714 714 # include roots that aren't ancestors.
715 715
716 716 # Filter out roots that aren't ancestors of heads
717 717 roots = [n for n in roots if n in ancestors]
718 718 # Recompute the lowest revision
719 719 if roots:
720 720 lowestrev = min([self.rev(n) for n in roots])
721 721 else:
722 722 # No more roots? Return empty list
723 723 return nonodes
724 724 else:
725 725 # We are descending from nullid, and don't need to care about
726 726 # any other roots.
727 727 lowestrev = nullrev
728 728 roots = [nullid]
729 729 # Transform our roots list into a 'set' (i.e. a dictionary where the
730 730 # values don't matter.
731 731 descendents = dict.fromkeys(roots, 1)
732 732 # Also, keep the original roots so we can filter out roots that aren't
733 733 # 'real' roots (i.e. are descended from other roots).
734 734 roots = descendents.copy()
735 735 # Our topologically sorted list of output nodes.
736 736 orderedout = []
737 737 # Don't start at nullid since we don't want nullid in our output list,
738 738 # and if nullid shows up in descedents, empty parents will look like
739 739 # they're descendents.
740 740 for r in xrange(max(lowestrev, 0), highestrev + 1):
741 741 n = self.node(r)
742 742 isdescendent = False
743 743 if lowestrev == nullrev: # Everybody is a descendent of nullid
744 744 isdescendent = True
745 745 elif n in descendents:
746 746 # n is already a descendent
747 747 isdescendent = True
748 748 # This check only needs to be done here because all the roots
749 749 # will start being marked is descendents before the loop.
750 750 if n in roots:
751 751 # If n was a root, check if it's a 'real' root.
752 752 p = tuple(self.parents(n))
753 753 # If any of its parents are descendents, it's not a root.
754 754 if (p[0] in descendents) or (p[1] in descendents):
755 755 roots.pop(n)
756 756 else:
757 757 p = tuple(self.parents(n))
758 758 # A node is a descendent if either of its parents are
759 759 # descendents. (We seeded the dependents list with the roots
760 760 # up there, remember?)
761 761 if (p[0] in descendents) or (p[1] in descendents):
762 762 descendents[n] = 1
763 763 isdescendent = True
764 764 if isdescendent and ((ancestors is None) or (n in ancestors)):
765 765 # Only include nodes that are both descendents and ancestors.
766 766 orderedout.append(n)
767 767 if (ancestors is not None) and (n in heads):
768 768 # We're trying to figure out which heads are reachable
769 769 # from roots.
770 770 # Mark this head as having been reached
771 771 heads[n] = 1
772 772 elif ancestors is None:
773 773 # Otherwise, we're trying to discover the heads.
774 774 # Assume this is a head because if it isn't, the next step
775 775 # will eventually remove it.
776 776 heads[n] = 1
777 777 # But, obviously its parents aren't.
778 778 for p in self.parents(n):
779 779 heads.pop(p, None)
780 780 heads = [n for n in heads.iterkeys() if heads[n] != 0]
781 781 roots = roots.keys()
782 782 assert orderedout
783 783 assert roots
784 784 assert heads
785 785 return (orderedout, roots, heads)
786 786
787 787 def heads(self, start=None, stop=None):
788 788 """return the list of all nodes that have no children
789 789
790 790 if start is specified, only heads that are descendants of
791 791 start will be returned
792 792 if stop is specified, it will consider all the revs from stop
793 793 as if they had no children
794 794 """
795 795 if start is None and stop is None:
796 796 count = len(self)
797 797 if not count:
798 798 return [nullid]
799 799 ishead = [1] * (count + 1)
800 800 index = self.index
801 801 for r in xrange(count):
802 802 e = index[r]
803 803 ishead[e[5]] = ishead[e[6]] = 0
804 804 return [self.node(r) for r in xrange(count) if ishead[r]]
805 805
806 806 if start is None:
807 807 start = nullid
808 808 if stop is None:
809 809 stop = []
810 810 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
811 811 startrev = self.rev(start)
812 812 reachable = {startrev: 1}
813 813 heads = {startrev: 1}
814 814
815 815 parentrevs = self.parentrevs
816 816 for r in xrange(startrev + 1, len(self)):
817 817 for p in parentrevs(r):
818 818 if p in reachable:
819 819 if r not in stoprevs:
820 820 reachable[r] = 1
821 821 heads[r] = 1
822 822 if p in heads and p not in stoprevs:
823 823 del heads[p]
824 824
825 825 return [self.node(r) for r in heads]
826 826
827 827 def children(self, node):
828 828 """find the children of a given node"""
829 829 c = []
830 830 p = self.rev(node)
831 831 for r in range(p + 1, len(self)):
832 832 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
833 833 if prevs:
834 834 for pr in prevs:
835 835 if pr == p:
836 836 c.append(self.node(r))
837 837 elif p == nullrev:
838 838 c.append(self.node(r))
839 839 return c
840 840
841 841 def _match(self, id):
842 842 if isinstance(id, (long, int)):
843 843 # rev
844 844 return self.node(id)
845 845 if len(id) == 20:
846 846 # possibly a binary node
847 847 # odds of a binary node being all hex in ASCII are 1 in 10**25
848 848 try:
849 849 node = id
850 850 self.rev(node) # quick search the index
851 851 return node
852 852 except LookupError:
853 853 pass # may be partial hex id
854 854 try:
855 855 # str(rev)
856 856 rev = int(id)
857 857 if str(rev) != id:
858 858 raise ValueError
859 859 if rev < 0:
860 860 rev = len(self) + rev
861 861 if rev < 0 or rev >= len(self):
862 862 raise ValueError
863 863 return self.node(rev)
864 864 except (ValueError, OverflowError):
865 865 pass
866 866 if len(id) == 40:
867 867 try:
868 868 # a full hex nodeid?
869 869 node = bin(id)
870 870 self.rev(node)
871 871 return node
872 872 except (TypeError, LookupError):
873 873 pass
874 874
875 875 def _partialmatch(self, id):
876 876 if len(id) < 40:
877 877 try:
878 878 # hex(node)[:...]
879 879 l = len(id) / 2 # grab an even number of digits
880 880 bin_id = bin(id[:l*2])
881 881 nl = [n for n in self.nodemap if n[:l] == bin_id]
882 882 nl = [n for n in nl if hex(n).startswith(id)]
883 883 if len(nl) > 0:
884 884 if len(nl) == 1:
885 885 return nl[0]
886 886 raise LookupError(id, self.indexfile,
887 887 _('ambiguous identifier'))
888 888 return None
889 889 except TypeError:
890 890 pass
891 891
892 892 def lookup(self, id):
893 893 """locate a node based on:
894 894 - revision number or str(revision number)
895 895 - nodeid or subset of hex nodeid
896 896 """
897 897 n = self._match(id)
898 898 if n is not None:
899 899 return n
900 900 n = self._partialmatch(id)
901 901 if n:
902 902 return n
903 903
904 904 raise LookupError(id, self.indexfile, _('no match found'))
905 905
906 906 def cmp(self, node, text):
907 907 """compare text with a given file revision"""
908 908 p1, p2 = self.parents(node)
909 909 return hash(text, p1, p2) != node
910 910
911 911 def chunk(self, rev, df=None):
912 912 def loadcache(df):
913 913 if not df:
914 914 if self._inline:
915 915 df = self.opener(self.indexfile)
916 916 else:
917 917 df = self.opener(self.datafile)
918 918 df.seek(start)
919 919 self._chunkcache = (start, df.read(cache_length))
920 920
921 921 start, length = self.start(rev), self.length(rev)
922 922 if self._inline:
923 923 start += (rev + 1) * self._io.size
924 924 end = start + length
925 925
926 926 offset = 0
927 927 if not self._chunkcache:
928 928 cache_length = max(65536, length)
929 929 loadcache(df)
930 930 else:
931 931 cache_start = self._chunkcache[0]
932 932 cache_length = len(self._chunkcache[1])
933 933 cache_end = cache_start + cache_length
934 934 if start >= cache_start and end <= cache_end:
935 935 # it is cached
936 936 offset = start - cache_start
937 937 else:
938 938 cache_length = max(65536, length)
939 939 loadcache(df)
940 940
941 941 # avoid copying large chunks
942 942 c = self._chunkcache[1]
943 943 if cache_length != length:
944 944 c = c[offset:offset + length]
945 945
946 946 return decompress(c)
947 947
948 948 def revdiff(self, rev1, rev2):
949 949 """return or calculate a delta between two revisions"""
950 950 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
951 951 return self.chunk(rev2)
952 952
953 953 return mdiff.textdiff(self.revision(self.node(rev1)),
954 954 self.revision(self.node(rev2)))
955 955
956 956 def revision(self, node):
957 957 """return an uncompressed revision of a given node"""
958 958 if node == nullid:
959 959 return ""
960 960 if self._cache and self._cache[0] == node:
961 961 return str(self._cache[2])
962 962
963 963 # look up what we need to read
964 964 text = None
965 965 rev = self.rev(node)
966 966 base = self.base(rev)
967 967
968 968 # check rev flags
969 969 if self.index[rev][0] & 0xFFFF:
970 970 raise RevlogError(_('incompatible revision flag %x') %
971 971 (self.index[rev][0] & 0xFFFF))
972 972
973 973 df = None
974 974
975 975 # do we have useful data cached?
976 976 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
977 977 base = self._cache[1]
978 978 text = str(self._cache[2])
979 979 self._loadindex(base, rev + 1)
980 980 if not self._inline and rev > base + 1:
981 981 df = self.opener(self.datafile)
982 982 else:
983 983 self._loadindex(base, rev + 1)
984 984 if not self._inline and rev > base:
985 985 df = self.opener(self.datafile)
986 986 text = self.chunk(base, df=df)
987 987
988 988 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
989 989 text = mdiff.patches(text, bins)
990 990 p1, p2 = self.parents(node)
991 991 if node != hash(text, p1, p2):
992 992 raise RevlogError(_("integrity check failed on %s:%d")
993 993 % (self.datafile, rev))
994 994
995 995 self._cache = (node, rev, text)
996 996 return text
997 997
998 998 def checkinlinesize(self, tr, fp=None):
999 999 if not self._inline:
1000 1000 return
1001 1001 if not fp:
1002 1002 fp = self.opener(self.indexfile, 'r')
1003 1003 fp.seek(0, 2)
1004 1004 size = fp.tell()
1005 1005 if size < 131072:
1006 1006 return
1007 1007 trinfo = tr.find(self.indexfile)
1008 1008 if trinfo == None:
1009 1009 raise RevlogError(_("%s not found in the transaction")
1010 1010 % self.indexfile)
1011 1011
1012 1012 trindex = trinfo[2]
1013 1013 dataoff = self.start(trindex)
1014 1014
1015 1015 tr.add(self.datafile, dataoff)
1016 1016 df = self.opener(self.datafile, 'w')
1017 1017 try:
1018 1018 calc = self._io.size
1019 1019 for r in self:
1020 1020 start = self.start(r) + (r + 1) * calc
1021 1021 length = self.length(r)
1022 1022 fp.seek(start)
1023 1023 d = fp.read(length)
1024 1024 df.write(d)
1025 1025 finally:
1026 1026 df.close()
1027 1027
1028 1028 fp.close()
1029 1029 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1030 1030 self.version &= ~(REVLOGNGINLINEDATA)
1031 1031 self._inline = False
1032 1032 for i in self:
1033 1033 e = self._io.packentry(self.index[i], self.node, self.version, i)
1034 1034 fp.write(e)
1035 1035
1036 1036 # if we don't call rename, the temp file will never replace the
1037 1037 # real index
1038 1038 fp.rename()
1039 1039
1040 1040 tr.replace(self.indexfile, trindex * calc)
1041 1041 self._chunkcache = None
1042 1042
1043 1043 def addrevision(self, text, transaction, link, p1, p2, d=None):
1044 1044 """add a revision to the log
1045 1045
1046 1046 text - the revision data to add
1047 1047 transaction - the transaction object used for rollback
1048 1048 link - the linkrev data to add
1049 1049 p1, p2 - the parent nodeids of the revision
1050 1050 d - an optional precomputed delta
1051 1051 """
1052 1052 dfh = None
1053 1053 if not self._inline:
1054 1054 dfh = self.opener(self.datafile, "a")
1055 1055 ifh = self.opener(self.indexfile, "a+")
1056 1056 try:
1057 1057 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1058 1058 finally:
1059 1059 if dfh:
1060 1060 dfh.close()
1061 1061 ifh.close()
1062 1062
1063 1063 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1064 1064 node = hash(text, p1, p2)
1065 1065 if node in self.nodemap:
1066 1066 return node
1067 1067
1068 1068 curr = len(self)
1069 1069 prev = curr - 1
1070 1070 base = self.base(prev)
1071 1071 offset = self.end(prev)
1072 1072
1073 1073 if curr:
1074 1074 if not d:
1075 1075 ptext = self.revision(self.node(prev))
1076 1076 d = mdiff.textdiff(ptext, text)
1077 1077 data = compress(d)
1078 1078 l = len(data[1]) + len(data[0])
1079 1079 dist = l + offset - self.start(base)
1080 1080
1081 1081 # full versions are inserted when the needed deltas
1082 1082 # become comparable to the uncompressed text
1083 1083 if not curr or dist > len(text) * 2:
1084 1084 data = compress(text)
1085 1085 l = len(data[1]) + len(data[0])
1086 1086 base = curr
1087 1087
1088 1088 e = (offset_type(offset, 0), l, len(text),
1089 1089 base, link, self.rev(p1), self.rev(p2), node)
1090 1090 self.index.insert(-1, e)
1091 1091 self.nodemap[node] = curr
1092 1092
1093 1093 entry = self._io.packentry(e, self.node, self.version, curr)
1094 1094 if not self._inline:
1095 1095 transaction.add(self.datafile, offset)
1096 1096 transaction.add(self.indexfile, curr * len(entry))
1097 1097 if data[0]:
1098 1098 dfh.write(data[0])
1099 1099 dfh.write(data[1])
1100 1100 dfh.flush()
1101 1101 ifh.write(entry)
1102 1102 else:
1103 1103 offset += curr * self._io.size
1104 1104 transaction.add(self.indexfile, offset, curr)
1105 1105 ifh.write(entry)
1106 1106 ifh.write(data[0])
1107 1107 ifh.write(data[1])
1108 1108 self.checkinlinesize(transaction, ifh)
1109 1109
1110 1110 self._cache = (node, curr, text)
1111 1111 return node
1112 1112
1113 1113 def ancestor(self, a, b):
1114 1114 """calculate the least common ancestor of nodes a and b"""
1115 1115
1116 1116 def parents(rev):
1117 1117 return [p for p in self.parentrevs(rev) if p != nullrev]
1118 1118
1119 1119 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1120 1120 if c is None:
1121 1121 return nullid
1122 1122
1123 1123 return self.node(c)
1124 1124
1125 1125 def group(self, nodelist, lookup, infocollect=None):
1126 1126 """calculate a delta group
1127 1127
1128 1128 Given a list of changeset revs, return a set of deltas and
1129 1129 metadata corresponding to nodes. the first delta is
1130 1130 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1131 1131 have this parent as it has all history before these
1132 1132 changesets. parent is parent[0]
1133 1133 """
1134 1134 revs = [self.rev(n) for n in nodelist]
1135 1135
1136 1136 # if we don't have any revisions touched by these changesets, bail
1137 1137 if not revs:
1138 1138 yield changegroup.closechunk()
1139 1139 return
1140 1140
1141 1141 # add the parent of the first rev
1142 1142 p = self.parents(self.node(revs[0]))[0]
1143 1143 revs.insert(0, self.rev(p))
1144 1144
1145 1145 # build deltas
1146 1146 for d in xrange(0, len(revs) - 1):
1147 1147 a, b = revs[d], revs[d + 1]
1148 1148 nb = self.node(b)
1149 1149
1150 1150 if infocollect is not None:
1151 1151 infocollect(nb)
1152 1152
1153 1153 p = self.parents(nb)
1154 1154 meta = nb + p[0] + p[1] + lookup(nb)
1155 1155 if a == -1:
1156 1156 d = self.revision(nb)
1157 1157 meta += mdiff.trivialdiffheader(len(d))
1158 1158 else:
1159 1159 d = self.revdiff(a, b)
1160 1160 yield changegroup.chunkheader(len(meta) + len(d))
1161 1161 yield meta
1162 1162 if len(d) > 2**20:
1163 1163 pos = 0
1164 1164 while pos < len(d):
1165 1165 pos2 = pos + 2 ** 18
1166 1166 yield d[pos:pos2]
1167 1167 pos = pos2
1168 1168 else:
1169 1169 yield d
1170 1170
1171 1171 yield changegroup.closechunk()
1172 1172
1173 1173 def addgroup(self, revs, linkmapper, transaction):
1174 1174 """
1175 1175 add a delta group
1176 1176
1177 1177 given a set of deltas, add them to the revision log. the
1178 1178 first delta is against its parent, which should be in our
1179 1179 log, the rest are against the previous delta.
1180 1180 """
1181 1181
1182 1182 #track the base of the current delta log
1183 1183 r = len(self)
1184 1184 t = r - 1
1185 1185 node = None
1186 1186
1187 1187 base = prev = nullrev
1188 1188 start = end = textlen = 0
1189 1189 if r:
1190 1190 end = self.end(t)
1191 1191
1192 1192 ifh = self.opener(self.indexfile, "a+")
1193 1193 isize = r * self._io.size
1194 1194 if self._inline:
1195 1195 transaction.add(self.indexfile, end + isize, r)
1196 1196 dfh = None
1197 1197 else:
1198 1198 transaction.add(self.indexfile, isize, r)
1199 1199 transaction.add(self.datafile, end)
1200 1200 dfh = self.opener(self.datafile, "a")
1201 1201
1202 1202 try:
1203 1203 # loop through our set of deltas
1204 1204 chain = None
1205 1205 for chunk in revs:
1206 1206 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1207 1207 link = linkmapper(cs)
1208 1208 if node in self.nodemap:
1209 1209 # this can happen if two branches make the same change
1210 1210 chain = node
1211 1211 continue
1212 1212 delta = buffer(chunk, 80)
1213 1213 del chunk
1214 1214
1215 1215 for p in (p1, p2):
1216 1216 if not p in self.nodemap:
1217 1217 raise LookupError(p, self.indexfile, _('unknown parent'))
1218 1218
1219 1219 if not chain:
1220 1220 # retrieve the parent revision of the delta chain
1221 1221 chain = p1
1222 1222 if not chain in self.nodemap:
1223 1223 raise LookupError(chain, self.indexfile, _('unknown base'))
1224 1224
1225 1225 # full versions are inserted when the needed deltas become
1226 1226 # comparable to the uncompressed text or when the previous
1227 1227 # version is not the one we have a delta against. We use
1228 1228 # the size of the previous full rev as a proxy for the
1229 1229 # current size.
1230 1230
1231 1231 if chain == prev:
1232 1232 cdelta = compress(delta)
1233 1233 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1234 1234 textlen = mdiff.patchedsize(textlen, delta)
1235 1235
1236 1236 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1237 1237 # flush our writes here so we can read it in revision
1238 1238 if dfh:
1239 1239 dfh.flush()
1240 1240 ifh.flush()
1241 1241 text = self.revision(chain)
1242 1242 if len(text) == 0:
1243 1243 # skip over trivial delta header
1244 1244 text = buffer(delta, 12)
1245 1245 else:
1246 1246 text = mdiff.patches(text, [delta])
1247 1247 del delta
1248 1248 chk = self._addrevision(text, transaction, link, p1, p2, None,
1249 1249 ifh, dfh)
1250 1250 if not dfh and not self._inline:
1251 1251 # addrevision switched from inline to conventional
1252 1252 # reopen the index
1253 1253 dfh = self.opener(self.datafile, "a")
1254 1254 ifh = self.opener(self.indexfile, "a")
1255 1255 if chk != node:
1256 1256 raise RevlogError(_("consistency error adding group"))
1257 1257 textlen = len(text)
1258 1258 else:
1259 1259 e = (offset_type(end, 0), cdeltalen, textlen, base,
1260 1260 link, self.rev(p1), self.rev(p2), node)
1261 1261 self.index.insert(-1, e)
1262 1262 self.nodemap[node] = r
1263 1263 entry = self._io.packentry(e, self.node, self.version, r)
1264 1264 if self._inline:
1265 1265 ifh.write(entry)
1266 1266 ifh.write(cdelta[0])
1267 1267 ifh.write(cdelta[1])
1268 1268 self.checkinlinesize(transaction, ifh)
1269 1269 if not self._inline:
1270 1270 dfh = self.opener(self.datafile, "a")
1271 1271 ifh = self.opener(self.indexfile, "a")
1272 1272 else:
1273 1273 dfh.write(cdelta[0])
1274 1274 dfh.write(cdelta[1])
1275 1275 ifh.write(entry)
1276 1276
1277 1277 t, r, chain, prev = r, r + 1, node, node
1278 1278 base = self.base(t)
1279 1279 start = self.start(base)
1280 1280 end = self.end(t)
1281 1281 finally:
1282 1282 if dfh:
1283 1283 dfh.close()
1284 1284 ifh.close()
1285 1285
1286 1286 return node
1287 1287
1288 def strip(self, minlink):
1288 def strip(self, minlink, transaction):
1289 1289 """truncate the revlog on the first revision with a linkrev >= minlink
1290 1290
1291 1291 This function is called when we're stripping revision minlink and
1292 1292 its descendants from the repository.
1293 1293
1294 1294 We have to remove all revisions with linkrev >= minlink, because
1295 1295 the equivalent changelog revisions will be renumbered after the
1296 1296 strip.
1297 1297
1298 1298 So we truncate the revlog on the first of these revisions, and
1299 1299 trust that the caller has saved the revisions that shouldn't be
1300 1300 removed and that it'll readd them after this truncation.
1301 1301 """
1302 1302 if len(self) == 0:
1303 1303 return
1304 1304
1305 1305 if isinstance(self.index, lazyindex):
1306 1306 self._loadindexmap()
1307 1307
1308 1308 for rev in self:
1309 1309 if self.index[rev][4] >= minlink:
1310 1310 break
1311 1311 else:
1312 1312 return
1313 1313
1314 1314 # first truncate the files on disk
1315 1315 end = self.start(rev)
1316 1316 if not self._inline:
1317 df = self.opener(self.datafile, "a")
1318 df.truncate(end)
1317 transaction.add(self.datafile, end)
1319 1318 end = rev * self._io.size
1320 1319 else:
1321 1320 end += rev * self._io.size
1322 1321
1323 indexf = self.opener(self.indexfile, "a")
1324 indexf.truncate(end)
1322 transaction.add(self.indexfile, end)
1325 1323
1326 1324 # then reset internal state in memory to forget those revisions
1327 1325 self._cache = None
1328 1326 self._chunkcache = None
1329 1327 for x in xrange(rev, len(self)):
1330 1328 del self.nodemap[self.node(x)]
1331 1329
1332 1330 del self.index[rev:-1]
1333 1331
1334 1332 def checksize(self):
1335 1333 expected = 0
1336 1334 if len(self):
1337 1335 expected = max(0, self.end(len(self) - 1))
1338 1336
1339 1337 try:
1340 1338 f = self.opener(self.datafile)
1341 1339 f.seek(0, 2)
1342 1340 actual = f.tell()
1343 1341 dd = actual - expected
1344 1342 except IOError, inst:
1345 1343 if inst.errno != errno.ENOENT:
1346 1344 raise
1347 1345 dd = 0
1348 1346
1349 1347 try:
1350 1348 f = self.opener(self.indexfile)
1351 1349 f.seek(0, 2)
1352 1350 actual = f.tell()
1353 1351 s = self._io.size
1354 1352 i = max(0, actual / s)
1355 1353 di = actual - (i * s)
1356 1354 if self._inline:
1357 1355 databytes = 0
1358 1356 for r in self:
1359 1357 databytes += max(0, self.length(r))
1360 1358 dd = 0
1361 1359 di = actual - len(self) * s - databytes
1362 1360 except IOError, inst:
1363 1361 if inst.errno != errno.ENOENT:
1364 1362 raise
1365 1363 di = 0
1366 1364
1367 1365 return (dd, di)
1368 1366
1369 1367 def files(self):
1370 1368 res = [ self.indexfile ]
1371 1369 if not self._inline:
1372 1370 res.append(self.datafile)
1373 1371 return res
General Comments 0
You need to be logged in to leave comments. Login now