##// END OF EJS Templates
bundle: get rid of chunkiter
Matt Mackall -
r12335:e21fe9c5 default
parent child Browse files
Show More
@@ -1,288 +1,288 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 reorder a revlog (the manifest by default) to save space
5 5
6 6 Specifically, this topologically sorts the revisions in the revlog so that
7 7 revisions on the same branch are adjacent as much as possible. This is a
8 8 workaround for the fact that Mercurial computes deltas relative to the
9 9 previous revision rather than relative to a parent revision.
10 10
11 11 This is *not* safe to run on a changelog.
12 12 """
13 13
14 14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 16 # renamed by Greg Ward <greg at gerg.ca>.
17 17
18 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 19 # e.g. by comparing "before" and "after" states of random changesets
20 20 # (maybe: export before, shrink, export after, diff).
21 21
22 22 import os, tempfile, errno
23 23 from mercurial import revlog, transaction, node, util
24 24 from mercurial import changegroup
25 25 from mercurial.i18n import _
26 26
27 27
28 28 def postorder(start, edges):
29 29 result = []
30 30 visit = list(start)
31 31 finished = set()
32 32
33 33 while visit:
34 34 cur = visit[-1]
35 35 for p in edges[cur]:
36 36 if p not in finished:
37 37 visit.append(p)
38 38 break
39 39 else:
40 40 result.append(cur)
41 41 finished.add(cur)
42 42 visit.pop()
43 43
44 44 return result
45 45
46 46 def toposort_reversepostorder(ui, rl):
47 47 # postorder of the reverse directed graph
48 48
49 49 # map rev to list of parent revs (p2 first)
50 50 parents = {}
51 51 heads = set()
52 52 ui.status(_('reading revs\n'))
53 53 try:
54 54 for rev in rl:
55 55 ui.progress(_('reading'), rev, total=len(rl))
56 56 (p1, p2) = rl.parentrevs(rev)
57 57 if p1 == p2 == node.nullrev:
58 58 parents[rev] = () # root node
59 59 elif p1 == p2 or p2 == node.nullrev:
60 60 parents[rev] = (p1,) # normal node
61 61 else:
62 62 parents[rev] = (p2, p1) # merge node
63 63 heads.add(rev)
64 64 for p in parents[rev]:
65 65 heads.discard(p)
66 66 finally:
67 67 ui.progress(_('reading'), None)
68 68
69 69 heads = list(heads)
70 70 heads.sort(reverse=True)
71 71
72 72 ui.status(_('sorting revs\n'))
73 73 return postorder(heads, parents)
74 74
75 75 def toposort_postorderreverse(ui, rl):
76 76 # reverse-postorder of the reverse directed graph
77 77
78 78 children = {}
79 79 roots = set()
80 80 ui.status(_('reading revs\n'))
81 81 try:
82 82 for rev in rl:
83 83 ui.progress(_('reading'), rev, total=len(rl))
84 84 (p1, p2) = rl.parentrevs(rev)
85 85 if p1 == p2 == node.nullrev:
86 86 roots.add(rev)
87 87 children[rev] = []
88 88 if p1 != node.nullrev:
89 89 children[p1].append(rev)
90 90 if p2 != node.nullrev:
91 91 children[p2].append(rev)
92 92 finally:
93 93 ui.progress(_('reading'), None)
94 94
95 95 roots = list(roots)
96 96 roots.sort()
97 97
98 98 ui.status(_('sorting revs\n'))
99 99 result = postorder(roots, children)
100 100 result.reverse()
101 101 return result
102 102
103 103 def writerevs(ui, r1, r2, order, tr):
104 104
105 105 ui.status(_('writing revs\n'))
106 106
107 107 count = [0]
108 108 def progress(*args):
109 109 ui.progress(_('writing'), count[0], total=len(order))
110 110 count[0] += 1
111 111
112 112 order = [r1.node(r) for r in order]
113 113
114 114 # this is a bit ugly, but it works
115 115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 116 unlookup = lambda x: int(x, 10)
117 117
118 118 try:
119 119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 r2.addgroup(group.chunks(), unlookup, tr)
120 r2.addgroup(group, unlookup, tr)
121 121 finally:
122 122 ui.progress(_('writing'), None)
123 123
124 124 def report(ui, r1, r2):
125 125 def getsize(r):
126 126 s = 0
127 127 for fn in (r.indexfile, r.datafile):
128 128 try:
129 129 s += os.stat(fn).st_size
130 130 except OSError, inst:
131 131 if inst.errno != errno.ENOENT:
132 132 raise
133 133 return s
134 134
135 135 oldsize = float(getsize(r1))
136 136 newsize = float(getsize(r2))
137 137
138 138 # argh: have to pass an int to %d, because a float >= 2^32
139 139 # blows up under Python 2.5 or earlier
140 140 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 141 % (int(oldsize), oldsize / 1024 / 1024))
142 142 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 143 % (int(newsize), newsize / 1024 / 1024))
144 144
145 145 shrink_percent = (oldsize - newsize) / oldsize * 100
146 146 shrink_factor = oldsize / newsize
147 147 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 148 % (shrink_percent, shrink_factor))
149 149
150 150 def shrink(ui, repo, **opts):
151 151 """shrink a revlog by reordering revisions
152 152
153 153 Rewrites all the entries in some revlog of the current repository
154 154 (by default, the manifest log) to save space.
155 155
156 156 Different sort algorithms have different performance
157 157 characteristics. Use ``--sort`` to select a sort algorithm so you
158 158 can determine which works best for your data.
159 159 """
160 160
161 161 if not repo.local():
162 162 raise util.Abort(_('not a local repository: %s') % repo.root)
163 163
164 164 fn = opts.get('revlog')
165 165 if not fn:
166 166 indexfn = repo.sjoin('00manifest.i')
167 167 else:
168 168 if not fn.endswith('.i'):
169 169 raise util.Abort(_('--revlog option must specify the revlog index '
170 170 'file (*.i), not %s') % opts.get('revlog'))
171 171
172 172 indexfn = os.path.realpath(fn)
173 173 store = repo.sjoin('')
174 174 if not indexfn.startswith(store):
175 175 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 176 'not %s') % (store, indexfn))
177 177
178 178 sortname = opts['sort']
179 179 try:
180 180 toposort = globals()['toposort_' + sortname]
181 181 except KeyError:
182 182 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183 183
184 184 if not os.path.exists(indexfn):
185 185 raise util.Abort(_('no such file: %s') % indexfn)
186 186 if '00changelog' in indexfn:
187 187 raise util.Abort(_('shrinking the changelog '
188 188 'will corrupt your repository'))
189 189
190 190 ui.write(_('shrinking %s\n') % indexfn)
191 191 prefix = os.path.basename(indexfn)[:-1]
192 192 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
193 193
194 194 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 195 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196 196
197 197 datafn, tmpdatafn = r1.datafile, r2.datafile
198 198
199 199 oldindexfn = indexfn + '.old'
200 200 olddatafn = datafn + '.old'
201 201 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 202 raise util.Abort(_('one or both of\n'
203 203 ' %s\n'
204 204 ' %s\n'
205 205 'exists from a previous run; please clean up '
206 206 'before running again') % (oldindexfn, olddatafn))
207 207
208 208 # Don't use repo.transaction(), because then things get hairy with
209 209 # paths: some need to be relative to .hg, and some need to be
210 210 # absolute. Doing it this way keeps things simple: everything is an
211 211 # absolute path.
212 212 lock = repo.lock(wait=False)
213 213 tr = transaction.transaction(ui.warn,
214 214 open,
215 215 repo.sjoin('journal'))
216 216
217 217 def ignoremissing(func):
218 218 def f(*args, **kw):
219 219 try:
220 220 return func(*args, **kw)
221 221 except OSError, inst:
222 222 if inst.errno != errno.ENOENT:
223 223 raise
224 224 return f
225 225
226 226 try:
227 227 try:
228 228 order = toposort(ui, r1)
229 229
230 230 suboptimal = 0
231 231 for i in xrange(1, len(order)):
232 232 parents = [p for p in r1.parentrevs(order[i])
233 233 if p != node.nullrev]
234 234 if parents and order[i - 1] not in parents:
235 235 suboptimal += 1
236 236 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237 237
238 238 writerevs(ui, r1, r2, order, tr)
239 239 report(ui, r1, r2)
240 240 tr.close()
241 241 except:
242 242 # Abort transaction first, so we truncate the files before
243 243 # deleting them.
244 244 tr.abort()
245 245 for fn in (tmpindexfn, tmpdatafn):
246 246 ignoremissing(os.unlink)(fn)
247 247 raise
248 248 if not opts.get('dry_run'):
249 249 # racy, both files cannot be renamed atomically
250 250 # copy files
251 251 util.os_link(indexfn, oldindexfn)
252 252 ignoremissing(util.os_link)(datafn, olddatafn)
253 253
254 254 # rename
255 255 util.rename(tmpindexfn, indexfn)
256 256 try:
257 257 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 258 util.rename(tmpdatafn, datafn)
259 259 except OSError, inst:
260 260 if inst.errno != errno.ENOENT:
261 261 raise
262 262 ignoremissing(os.unlink)(datafn)
263 263 else:
264 264 for fn in (tmpindexfn, tmpdatafn):
265 265 ignoremissing(os.unlink)(fn)
266 266 finally:
267 267 lock.release()
268 268
269 269 if not opts.get('dry_run'):
270 270 ui.write(_('note: old revlog saved in:\n'
271 271 ' %s\n'
272 272 ' %s\n'
273 273 '(You can delete those files when you are satisfied that your\n'
274 274 'repository is still sane. '
275 275 'Running \'hg verify\' is strongly recommended.)\n')
276 276 % (oldindexfn, olddatafn))
277 277
278 278 cmdtable = {
279 279 'shrink': (shrink,
280 280 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 281 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 282 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 283 ],
284 284 _('hg shrink [--revlog PATH]'))
285 285 }
286 286
287 287 if __name__ == "__main__":
288 288 print "shrink-revlog.py is now an extension (see hg help extensions)"
@@ -1,282 +1,287 b''
1 1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 2 #
3 3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Repository class for viewing uncompressed bundles.
9 9
10 10 This provides a read-only repository interface to bundles as if they
11 11 were part of the actual repository.
12 12 """
13 13
14 14 from node import nullid
15 15 from i18n import _
16 16 import os, struct, tempfile, shutil
17 17 import changegroup, util, mdiff
18 18 import localrepo, changelog, manifest, filelog, revlog, error
19 19
20 20 class bundlerevlog(revlog.revlog):
21 21 def __init__(self, opener, indexfile, bundle,
22 22 linkmapper=None):
23 23 # How it works:
24 24 # to retrieve a revision, we need to know the offset of
25 25 # the revision in the bundle (an unbundle object).
26 26 #
27 27 # We store this offset in the index (start), to differentiate a
28 28 # rev in the bundle and from a rev in the revlog, we check
29 29 # len(index[r]). If the tuple is bigger than 7, it is a bundle
30 30 # (it is bigger since we store the node to which the delta is)
31 31 #
32 32 revlog.revlog.__init__(self, opener, indexfile)
33 33 self.bundle = bundle
34 34 self.basemap = {}
35 35 def chunkpositer():
36 for chunk in bundle.chunks():
36 while 1:
37 chunk = bundle.chunk()
38 if not chunk:
39 break
37 40 pos = bundle.tell()
38 41 yield chunk, pos - len(chunk)
39 42 n = len(self)
40 43 prev = None
41 44 for chunk, start in chunkpositer():
42 45 size = len(chunk)
43 46 if size < 80:
44 47 raise util.Abort(_("invalid changegroup"))
45 48 start += 80
46 49 size -= 80
47 50 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
48 51 if node in self.nodemap:
49 52 prev = node
50 53 continue
51 54 for p in (p1, p2):
52 55 if not p in self.nodemap:
53 56 raise error.LookupError(p, self.indexfile,
54 57 _("unknown parent"))
55 58 if linkmapper is None:
56 59 link = n
57 60 else:
58 61 link = linkmapper(cs)
59 62
60 63 if not prev:
61 64 prev = p1
62 65 # start, size, full unc. size, base (unused), link, p1, p2, node
63 66 e = (revlog.offset_type(start, 0), size, -1, -1, link,
64 67 self.rev(p1), self.rev(p2), node)
65 68 self.basemap[n] = prev
66 69 self.index.insert(-1, e)
67 70 self.nodemap[node] = n
68 71 prev = node
69 72 n += 1
70 73
71 74 def inbundle(self, rev):
72 75 """is rev from the bundle"""
73 76 if rev < 0:
74 77 return False
75 78 return rev in self.basemap
76 79 def bundlebase(self, rev):
77 80 return self.basemap[rev]
78 81 def _chunk(self, rev):
79 82 # Warning: in case of bundle, the diff is against bundlebase,
80 83 # not against rev - 1
81 84 # XXX: could use some caching
82 85 if not self.inbundle(rev):
83 86 return revlog.revlog._chunk(self, rev)
84 87 self.bundle.seek(self.start(rev))
85 88 return self.bundle.read(self.length(rev))
86 89
87 90 def revdiff(self, rev1, rev2):
88 91 """return or calculate a delta between two revisions"""
89 92 if self.inbundle(rev1) and self.inbundle(rev2):
90 93 # hot path for bundle
91 94 revb = self.rev(self.bundlebase(rev2))
92 95 if revb == rev1:
93 96 return self._chunk(rev2)
94 97 elif not self.inbundle(rev1) and not self.inbundle(rev2):
95 98 return revlog.revlog.revdiff(self, rev1, rev2)
96 99
97 100 return mdiff.textdiff(self.revision(self.node(rev1)),
98 101 self.revision(self.node(rev2)))
99 102
100 103 def revision(self, node):
101 104 """return an uncompressed revision of a given"""
102 105 if node == nullid:
103 106 return ""
104 107
105 108 text = None
106 109 chain = []
107 110 iter_node = node
108 111 rev = self.rev(iter_node)
109 112 # reconstruct the revision if it is from a changegroup
110 113 while self.inbundle(rev):
111 114 if self._cache and self._cache[0] == iter_node:
112 115 text = self._cache[2]
113 116 break
114 117 chain.append(rev)
115 118 iter_node = self.bundlebase(rev)
116 119 rev = self.rev(iter_node)
117 120 if text is None:
118 121 text = revlog.revlog.revision(self, iter_node)
119 122
120 123 while chain:
121 124 delta = self._chunk(chain.pop())
122 125 text = mdiff.patches(text, [delta])
123 126
124 127 p1, p2 = self.parents(node)
125 128 if node != revlog.hash(text, p1, p2):
126 129 raise error.RevlogError(_("integrity check failed on %s:%d")
127 130 % (self.datafile, self.rev(node)))
128 131
129 132 self._cache = (node, self.rev(node), text)
130 133 return text
131 134
132 135 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
133 136 raise NotImplementedError
134 137 def addgroup(self, revs, linkmapper, transaction):
135 138 raise NotImplementedError
136 139 def strip(self, rev, minlink):
137 140 raise NotImplementedError
138 141 def checksize(self):
139 142 raise NotImplementedError
140 143
141 144 class bundlechangelog(bundlerevlog, changelog.changelog):
142 145 def __init__(self, opener, bundle):
143 146 changelog.changelog.__init__(self, opener)
144 147 bundlerevlog.__init__(self, opener, self.indexfile, bundle)
145 148
146 149 class bundlemanifest(bundlerevlog, manifest.manifest):
147 150 def __init__(self, opener, bundle, linkmapper):
148 151 manifest.manifest.__init__(self, opener)
149 152 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
150 153 linkmapper)
151 154
152 155 class bundlefilelog(bundlerevlog, filelog.filelog):
153 156 def __init__(self, opener, path, bundle, linkmapper):
154 157 filelog.filelog.__init__(self, opener, path)
155 158 bundlerevlog.__init__(self, opener, self.indexfile, bundle,
156 159 linkmapper)
157 160
158 161 class bundlerepository(localrepo.localrepository):
159 162 def __init__(self, ui, path, bundlename):
160 163 self._tempparent = None
161 164 try:
162 165 localrepo.localrepository.__init__(self, ui, path)
163 166 except error.RepoError:
164 167 self._tempparent = tempfile.mkdtemp()
165 168 localrepo.instance(ui, self._tempparent, 1)
166 169 localrepo.localrepository.__init__(self, ui, self._tempparent)
167 170
168 171 if path:
169 172 self._url = 'bundle:' + util.expandpath(path) + '+' + bundlename
170 173 else:
171 174 self._url = 'bundle:' + bundlename
172 175
173 176 self.tempfile = None
174 177 f = open(bundlename, "rb")
175 178 self.bundle = changegroup.readbundle(f, bundlename)
176 179 if self.bundle.compressed():
177 180 # we need a seekable, decompressed bundle
178 181 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
179 182 suffix=".hg10un", dir=self.path)
180 183 self.tempfile = temp
181 184 fptemp = os.fdopen(fdtemp, 'wb')
182 185
183 186 try:
184 187 fptemp.write("HG10UN")
185 188 while 1:
186 189 chunk = self.bundle.read(2**18)
187 190 if not chunk:
188 191 break
189 192 fptemp.write(chunk)
190 193 finally:
191 194 fptemp.close()
192 195
193 196 f = open(self.tempfile, "rb")
194 197 self.bundle = changegroup.readbundle(f, bundlename)
195 198
196 199 # dict with the mapping 'filename' -> position in the bundle
197 200 self.bundlefilespos = {}
198 201
199 202 @util.propertycache
200 203 def changelog(self):
201 204 c = bundlechangelog(self.sopener, self.bundle)
202 205 self.manstart = self.bundle.tell()
203 206 return c
204 207
205 208 @util.propertycache
206 209 def manifest(self):
207 210 self.bundle.seek(self.manstart)
208 211 m = bundlemanifest(self.sopener, self.bundle, self.changelog.rev)
209 212 self.filestart = self.bundle.tell()
210 213 return m
211 214
212 215 @util.propertycache
213 216 def manstart(self):
214 217 self.changelog
215 218 return self.manstart
216 219
217 220 @util.propertycache
218 221 def filestart(self):
219 222 self.manifest
220 223 return self.filestart
221 224
222 225 def url(self):
223 226 return self._url
224 227
225 228 def file(self, f):
226 229 if not self.bundlefilespos:
227 230 self.bundle.seek(self.filestart)
228 231 while 1:
229 232 chunk = self.bundle.chunk()
230 233 if not chunk:
231 234 break
232 235 self.bundlefilespos[chunk] = self.bundle.tell()
233 for c in self.bundle.chunks():
234 pass
236 while 1:
237 c = self.bundle.chunk()
238 if not c:
239 break
235 240
236 241 if f[0] == '/':
237 242 f = f[1:]
238 243 if f in self.bundlefilespos:
239 244 self.bundle.seek(self.bundlefilespos[f])
240 245 return bundlefilelog(self.sopener, f, self.bundle,
241 246 self.changelog.rev)
242 247 else:
243 248 return filelog.filelog(self.sopener, f)
244 249
245 250 def __del__(self):
246 251 del self.bundle
247 252 if tempfile is not None:
248 253 os.unlink(tempfile)
249 254 if self._tempparent:
250 255 shutil.rmtree(self._tempparent, True)
251 256
252 257 def cancopy(self):
253 258 return False
254 259
255 260 def getcwd(self):
256 261 return os.getcwd() # always outside the repo
257 262
258 263 def instance(ui, path, create):
259 264 if create:
260 265 raise util.Abort(_('cannot create new bundle repository'))
261 266 parentpath = ui.config("bundle", "mainreporoot", "")
262 267 if parentpath:
263 268 # Try to make the full path relative so we get a nice, short URL.
264 269 # In particular, we don't want temp dir names in test outputs.
265 270 cwd = os.getcwd()
266 271 if parentpath == cwd:
267 272 parentpath = ''
268 273 else:
269 274 cwd = os.path.join(cwd,'')
270 275 if parentpath.startswith(cwd):
271 276 parentpath = parentpath[len(cwd):]
272 277 path = util.drop_scheme('file', path)
273 278 if path.startswith('bundle:'):
274 279 path = util.drop_scheme('bundle', path)
275 280 s = path.split("+", 1)
276 281 if len(s) == 1:
277 282 repopath, bundlename = parentpath, s[0]
278 283 else:
279 284 repopath, bundlename = s
280 285 else:
281 286 repopath, bundlename = parentpath, path
282 287 return bundlerepository(ui, repopath, bundlename)
@@ -1,208 +1,193 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import util
10 10 import struct, os, bz2, zlib, tempfile
11 11
12 12 def getchunk(source):
13 13 """return the next chunk from changegroup 'source' as a string"""
14 14 d = source.read(4)
15 15 if not d:
16 16 return ""
17 17 l = struct.unpack(">l", d)[0]
18 18 if l <= 4:
19 19 return ""
20 20 d = source.read(l - 4)
21 21 if len(d) < l - 4:
22 22 raise util.Abort(_("premature EOF reading chunk"
23 23 " (got %d bytes, expected %d)")
24 24 % (len(d), l - 4))
25 25 return d
26 26
27 def chunkiter(source, progress=None):
28 """iterate through the chunks in source, yielding a sequence of chunks
29 (strings)"""
30 while 1:
31 c = getchunk(source)
32 if not c:
33 break
34 elif progress is not None:
35 progress()
36 yield c
37
38 27 def chunkheader(length):
39 28 """return a changegroup chunk header (string)"""
40 29 return struct.pack(">l", length + 4)
41 30
42 31 def closechunk():
43 32 """return a changegroup chunk header (string) for a zero-length chunk"""
44 33 return struct.pack(">l", 0)
45 34
46 35 class nocompress(object):
47 36 def compress(self, x):
48 37 return x
49 38 def flush(self):
50 39 return ""
51 40
52 41 bundletypes = {
53 42 "": ("", nocompress),
54 43 "HG10UN": ("HG10UN", nocompress),
55 44 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
56 45 "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
57 46 }
58 47
59 48 def collector(cl, mmfs, files):
60 49 # Gather information about changeset nodes going out in a bundle.
61 50 # We want to gather manifests needed and filelogs affected.
62 51 def collect(node):
63 52 c = cl.read(node)
64 53 files.update(c[3])
65 54 mmfs.setdefault(c[0], node)
66 55 return collect
67 56
68 57 # hgweb uses this list to communicate its preferred type
69 58 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
70 59
71 60 def writebundle(cg, filename, bundletype):
72 61 """Write a bundle file and return its filename.
73 62
74 63 Existing files will not be overwritten.
75 64 If no filename is specified, a temporary file is created.
76 65 bz2 compression can be turned off.
77 66 The bundle file will be deleted in case of errors.
78 67 """
79 68
80 69 fh = None
81 70 cleanup = None
82 71 try:
83 72 if filename:
84 73 fh = open(filename, "wb")
85 74 else:
86 75 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
87 76 fh = os.fdopen(fd, "wb")
88 77 cleanup = filename
89 78
90 79 header, compressor = bundletypes[bundletype]
91 80 fh.write(header)
92 81 z = compressor()
93 82
94 83 # parse the changegroup data, otherwise we will block
95 84 # in case of sshrepo because we don't know the end of the stream
96 85
97 # an empty chunkiter is the end of the changegroup
98 # a changegroup has at least 2 chunkiters (changelog and manifest).
99 # after that, an empty chunkiter is the end of the changegroup
86 # an empty chunkgroup is the end of the changegroup
87 # a changegroup has at least 2 chunkgroups (changelog and manifest).
88 # after that, an empty chunkgroup is the end of the changegroup
100 89 empty = False
101 90 count = 0
102 91 while not empty or count <= 2:
103 92 empty = True
104 93 count += 1
105 for chunk in chunkiter(cg):
94 while 1:
95 chunk = getchunk(cg)
96 if not chunk:
97 break
106 98 empty = False
107 99 fh.write(z.compress(chunkheader(len(chunk))))
108 100 pos = 0
109 101 while pos < len(chunk):
110 102 next = pos + 2**20
111 103 fh.write(z.compress(chunk[pos:next]))
112 104 pos = next
113 105 fh.write(z.compress(closechunk()))
114 106 fh.write(z.flush())
115 107 cleanup = None
116 108 return filename
117 109 finally:
118 110 if fh is not None:
119 111 fh.close()
120 112 if cleanup is not None:
121 113 os.unlink(cleanup)
122 114
123 115 def decompressor(fh, alg):
124 116 if alg == 'UN':
125 117 return fh
126 118 elif alg == 'GZ':
127 119 def generator(f):
128 120 zd = zlib.decompressobj()
129 121 for chunk in f:
130 122 yield zd.decompress(chunk)
131 123 elif alg == 'BZ':
132 124 def generator(f):
133 125 zd = bz2.BZ2Decompressor()
134 126 zd.decompress("BZ")
135 127 for chunk in util.filechunkiter(f, 4096):
136 128 yield zd.decompress(chunk)
137 129 else:
138 130 raise util.Abort("unknown bundle compression '%s'" % alg)
139 131 return util.chunkbuffer(generator(fh))
140 132
141 133 class unbundle10(object):
142 134 def __init__(self, fh, alg):
143 135 self._stream = decompressor(fh, alg)
144 136 self._type = alg
145 137 self.callback = None
146 138 def compressed(self):
147 139 return self._type != 'UN'
148 140 def read(self, l):
149 141 return self._stream.read(l)
150 142 def seek(self, pos):
151 143 return self._stream.seek(pos)
152 144 def tell(self):
153 145 return self._stream.tell()
154 146
155 147 def chunklength(self):
156 148 d = self.read(4)
157 149 if not d:
158 150 return 0
159 151 l = max(0, struct.unpack(">l", d)[0] - 4)
160 152 if l and self.callback:
161 153 self.callback()
162 154 return l
163 155
164 156 def chunk(self):
165 157 """return the next chunk from changegroup 'source' as a string"""
166 158 l = self.chunklength()
167 159 d = self.read(l)
168 160 if len(d) < l:
169 161 raise util.Abort(_("premature EOF reading chunk"
170 162 " (got %d bytes, expected %d)")
171 163 % (len(d), l))
172 164 return d
173 165
174 def chunks(self):
175 while 1:
176 c = self.chunk()
177 if not c:
178 break
179 yield c
180
181 166 class headerlessfixup(object):
182 167 def __init__(self, fh, h):
183 168 self._h = h
184 169 self._fh = fh
185 170 def read(self, n):
186 171 if self._h:
187 172 d, self._h = self._h[:n], self._h[n:]
188 173 if len(d) < n:
189 174 d += self._fh.read(n - len(d))
190 175 return d
191 176 return self._fh.read(n)
192 177
193 178 def readbundle(fh, fname):
194 179 header = fh.read(6)
195 180
196 181 if not fname:
197 182 fname = "stream"
198 183 if not header.startswith('HG') and header.startswith('\0'):
199 184 fh = headerlessfixup(fh, header)
200 185 header = "HG10UN"
201 186
202 187 magic, version, alg = header[0:2], header[2:4], header[4:6]
203 188
204 189 if magic != 'HG':
205 190 raise util.Abort(_('%s: not a Mercurial bundle') % fname)
206 191 if version != '10':
207 192 raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
208 193 return unbundle10(fh, alg)
@@ -1,1893 +1,1893 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import bin, hex, nullid, nullrev, short
9 9 from i18n import _
10 10 import repo, changegroup, subrepo, discovery, pushkey
11 11 import changelog, dirstate, filelog, manifest, context
12 12 import lock, transaction, store, encoding
13 13 import util, extensions, hook, error
14 14 import match as matchmod
15 15 import merge as mergemod
16 16 import tags as tagsmod
17 17 import url as urlmod
18 18 from lock import release
19 19 import weakref, errno, os, time, inspect
20 20 propertycache = util.propertycache
21 21
22 22 class localrepository(repo.repository):
23 23 capabilities = set(('lookup', 'changegroupsubset', 'branchmap', 'pushkey'))
24 24 supportedformats = set(('revlogv1', 'parentdelta'))
25 25 supported = supportedformats | set(('store', 'fncache', 'shared'))
26 26
27 27 def __init__(self, baseui, path=None, create=0):
28 28 repo.repository.__init__(self)
29 29 self.root = os.path.realpath(util.expandpath(path))
30 30 self.path = os.path.join(self.root, ".hg")
31 31 self.origroot = path
32 32 self.auditor = util.path_auditor(self.root, self._checknested)
33 33 self.opener = util.opener(self.path)
34 34 self.wopener = util.opener(self.root)
35 35 self.baseui = baseui
36 36 self.ui = baseui.copy()
37 37
38 38 try:
39 39 self.ui.readconfig(self.join("hgrc"), self.root)
40 40 extensions.loadall(self.ui)
41 41 except IOError:
42 42 pass
43 43
44 44 if not os.path.isdir(self.path):
45 45 if create:
46 46 if not os.path.exists(path):
47 47 util.makedirs(path)
48 48 os.mkdir(self.path)
49 49 requirements = ["revlogv1"]
50 50 if self.ui.configbool('format', 'usestore', True):
51 51 os.mkdir(os.path.join(self.path, "store"))
52 52 requirements.append("store")
53 53 if self.ui.configbool('format', 'usefncache', True):
54 54 requirements.append("fncache")
55 55 # create an invalid changelog
56 56 self.opener("00changelog.i", "a").write(
57 57 '\0\0\0\2' # represents revlogv2
58 58 ' dummy changelog to prevent using the old repo layout'
59 59 )
60 60 if self.ui.configbool('format', 'parentdelta', False):
61 61 requirements.append("parentdelta")
62 62 else:
63 63 raise error.RepoError(_("repository %s not found") % path)
64 64 elif create:
65 65 raise error.RepoError(_("repository %s already exists") % path)
66 66 else:
67 67 # find requirements
68 68 requirements = set()
69 69 try:
70 70 requirements = set(self.opener("requires").read().splitlines())
71 71 except IOError, inst:
72 72 if inst.errno != errno.ENOENT:
73 73 raise
74 74 for r in requirements - self.supported:
75 75 raise error.RepoError(_("requirement '%s' not supported") % r)
76 76
77 77 self.sharedpath = self.path
78 78 try:
79 79 s = os.path.realpath(self.opener("sharedpath").read())
80 80 if not os.path.exists(s):
81 81 raise error.RepoError(
82 82 _('.hg/sharedpath points to nonexistent directory %s') % s)
83 83 self.sharedpath = s
84 84 except IOError, inst:
85 85 if inst.errno != errno.ENOENT:
86 86 raise
87 87
88 88 self.store = store.store(requirements, self.sharedpath, util.opener)
89 89 self.spath = self.store.path
90 90 self.sopener = self.store.opener
91 91 self.sjoin = self.store.join
92 92 self.opener.createmode = self.store.createmode
93 93 self._applyrequirements(requirements)
94 94 if create:
95 95 self._writerequirements()
96 96
97 97 # These two define the set of tags for this repository. _tags
98 98 # maps tag name to node; _tagtypes maps tag name to 'global' or
99 99 # 'local'. (Global tags are defined by .hgtags across all
100 100 # heads, and local tags are defined in .hg/localtags.) They
101 101 # constitute the in-memory cache of tags.
102 102 self._tags = None
103 103 self._tagtypes = None
104 104
105 105 self._branchcache = None # in UTF-8
106 106 self._branchcachetip = None
107 107 self.nodetagscache = None
108 108 self.filterpats = {}
109 109 self._datafilters = {}
110 110 self._transref = self._lockref = self._wlockref = None
111 111
112 112 def _applyrequirements(self, requirements):
113 113 self.requirements = requirements
114 114 self.sopener.options = {}
115 115 if 'parentdelta' in requirements:
116 116 self.sopener.options['parentdelta'] = 1
117 117
118 118 def _writerequirements(self):
119 119 reqfile = self.opener("requires", "w")
120 120 for r in self.requirements:
121 121 reqfile.write("%s\n" % r)
122 122 reqfile.close()
123 123
124 124 def _checknested(self, path):
125 125 """Determine if path is a legal nested repository."""
126 126 if not path.startswith(self.root):
127 127 return False
128 128 subpath = path[len(self.root) + 1:]
129 129
130 130 # XXX: Checking against the current working copy is wrong in
131 131 # the sense that it can reject things like
132 132 #
133 133 # $ hg cat -r 10 sub/x.txt
134 134 #
135 135 # if sub/ is no longer a subrepository in the working copy
136 136 # parent revision.
137 137 #
138 138 # However, it can of course also allow things that would have
139 139 # been rejected before, such as the above cat command if sub/
140 140 # is a subrepository now, but was a normal directory before.
141 141 # The old path auditor would have rejected by mistake since it
142 142 # panics when it sees sub/.hg/.
143 143 #
144 144 # All in all, checking against the working copy seems sensible
145 145 # since we want to prevent access to nested repositories on
146 146 # the filesystem *now*.
147 147 ctx = self[None]
148 148 parts = util.splitpath(subpath)
149 149 while parts:
150 150 prefix = os.sep.join(parts)
151 151 if prefix in ctx.substate:
152 152 if prefix == subpath:
153 153 return True
154 154 else:
155 155 sub = ctx.sub(prefix)
156 156 return sub.checknested(subpath[len(prefix) + 1:])
157 157 else:
158 158 parts.pop()
159 159 return False
160 160
161 161
162 162 @propertycache
163 163 def changelog(self):
164 164 c = changelog.changelog(self.sopener)
165 165 if 'HG_PENDING' in os.environ:
166 166 p = os.environ['HG_PENDING']
167 167 if p.startswith(self.root):
168 168 c.readpending('00changelog.i.a')
169 169 self.sopener.options['defversion'] = c.version
170 170 return c
171 171
172 172 @propertycache
173 173 def manifest(self):
174 174 return manifest.manifest(self.sopener)
175 175
176 176 @propertycache
177 177 def dirstate(self):
178 178 return dirstate.dirstate(self.opener, self.ui, self.root)
179 179
180 180 def __getitem__(self, changeid):
181 181 if changeid is None:
182 182 return context.workingctx(self)
183 183 return context.changectx(self, changeid)
184 184
185 185 def __contains__(self, changeid):
186 186 try:
187 187 return bool(self.lookup(changeid))
188 188 except error.RepoLookupError:
189 189 return False
190 190
191 191 def __nonzero__(self):
192 192 return True
193 193
194 194 def __len__(self):
195 195 return len(self.changelog)
196 196
197 197 def __iter__(self):
198 198 for i in xrange(len(self)):
199 199 yield i
200 200
201 201 def url(self):
202 202 return 'file:' + self.root
203 203
204 204 def hook(self, name, throw=False, **args):
205 205 return hook.hook(self.ui, self, name, throw, **args)
206 206
207 207 tag_disallowed = ':\r\n'
208 208
209 209 def _tag(self, names, node, message, local, user, date, extra={}):
210 210 if isinstance(names, str):
211 211 allchars = names
212 212 names = (names,)
213 213 else:
214 214 allchars = ''.join(names)
215 215 for c in self.tag_disallowed:
216 216 if c in allchars:
217 217 raise util.Abort(_('%r cannot be used in a tag name') % c)
218 218
219 219 branches = self.branchmap()
220 220 for name in names:
221 221 self.hook('pretag', throw=True, node=hex(node), tag=name,
222 222 local=local)
223 223 if name in branches:
224 224 self.ui.warn(_("warning: tag %s conflicts with existing"
225 225 " branch name\n") % name)
226 226
227 227 def writetags(fp, names, munge, prevtags):
228 228 fp.seek(0, 2)
229 229 if prevtags and prevtags[-1] != '\n':
230 230 fp.write('\n')
231 231 for name in names:
232 232 m = munge and munge(name) or name
233 233 if self._tagtypes and name in self._tagtypes:
234 234 old = self._tags.get(name, nullid)
235 235 fp.write('%s %s\n' % (hex(old), m))
236 236 fp.write('%s %s\n' % (hex(node), m))
237 237 fp.close()
238 238
239 239 prevtags = ''
240 240 if local:
241 241 try:
242 242 fp = self.opener('localtags', 'r+')
243 243 except IOError:
244 244 fp = self.opener('localtags', 'a')
245 245 else:
246 246 prevtags = fp.read()
247 247
248 248 # local tags are stored in the current charset
249 249 writetags(fp, names, None, prevtags)
250 250 for name in names:
251 251 self.hook('tag', node=hex(node), tag=name, local=local)
252 252 return
253 253
254 254 try:
255 255 fp = self.wfile('.hgtags', 'rb+')
256 256 except IOError:
257 257 fp = self.wfile('.hgtags', 'ab')
258 258 else:
259 259 prevtags = fp.read()
260 260
261 261 # committed tags are stored in UTF-8
262 262 writetags(fp, names, encoding.fromlocal, prevtags)
263 263
264 264 if '.hgtags' not in self.dirstate:
265 265 self[None].add(['.hgtags'])
266 266
267 267 m = matchmod.exact(self.root, '', ['.hgtags'])
268 268 tagnode = self.commit(message, user, date, extra=extra, match=m)
269 269
270 270 for name in names:
271 271 self.hook('tag', node=hex(node), tag=name, local=local)
272 272
273 273 return tagnode
274 274
275 275 def tag(self, names, node, message, local, user, date):
276 276 '''tag a revision with one or more symbolic names.
277 277
278 278 names is a list of strings or, when adding a single tag, names may be a
279 279 string.
280 280
281 281 if local is True, the tags are stored in a per-repository file.
282 282 otherwise, they are stored in the .hgtags file, and a new
283 283 changeset is committed with the change.
284 284
285 285 keyword arguments:
286 286
287 287 local: whether to store tags in non-version-controlled file
288 288 (default False)
289 289
290 290 message: commit message to use if committing
291 291
292 292 user: name of user to use if committing
293 293
294 294 date: date tuple to use if committing'''
295 295
296 296 for x in self.status()[:5]:
297 297 if '.hgtags' in x:
298 298 raise util.Abort(_('working copy of .hgtags is changed '
299 299 '(please commit .hgtags manually)'))
300 300
301 301 self.tags() # instantiate the cache
302 302 self._tag(names, node, message, local, user, date)
303 303
304 304 def tags(self):
305 305 '''return a mapping of tag to node'''
306 306 if self._tags is None:
307 307 (self._tags, self._tagtypes) = self._findtags()
308 308
309 309 return self._tags
310 310
311 311 def _findtags(self):
312 312 '''Do the hard work of finding tags. Return a pair of dicts
313 313 (tags, tagtypes) where tags maps tag name to node, and tagtypes
314 314 maps tag name to a string like \'global\' or \'local\'.
315 315 Subclasses or extensions are free to add their own tags, but
316 316 should be aware that the returned dicts will be retained for the
317 317 duration of the localrepo object.'''
318 318
319 319 # XXX what tagtype should subclasses/extensions use? Currently
320 320 # mq and bookmarks add tags, but do not set the tagtype at all.
321 321 # Should each extension invent its own tag type? Should there
322 322 # be one tagtype for all such "virtual" tags? Or is the status
323 323 # quo fine?
324 324
325 325 alltags = {} # map tag name to (node, hist)
326 326 tagtypes = {}
327 327
328 328 tagsmod.findglobaltags(self.ui, self, alltags, tagtypes)
329 329 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
330 330
331 331 # Build the return dicts. Have to re-encode tag names because
332 332 # the tags module always uses UTF-8 (in order not to lose info
333 333 # writing to the cache), but the rest of Mercurial wants them in
334 334 # local encoding.
335 335 tags = {}
336 336 for (name, (node, hist)) in alltags.iteritems():
337 337 if node != nullid:
338 338 tags[encoding.tolocal(name)] = node
339 339 tags['tip'] = self.changelog.tip()
340 340 tagtypes = dict([(encoding.tolocal(name), value)
341 341 for (name, value) in tagtypes.iteritems()])
342 342 return (tags, tagtypes)
343 343
344 344 def tagtype(self, tagname):
345 345 '''
346 346 return the type of the given tag. result can be:
347 347
348 348 'local' : a local tag
349 349 'global' : a global tag
350 350 None : tag does not exist
351 351 '''
352 352
353 353 self.tags()
354 354
355 355 return self._tagtypes.get(tagname)
356 356
357 357 def tagslist(self):
358 358 '''return a list of tags ordered by revision'''
359 359 l = []
360 360 for t, n in self.tags().iteritems():
361 361 try:
362 362 r = self.changelog.rev(n)
363 363 except:
364 364 r = -2 # sort to the beginning of the list if unknown
365 365 l.append((r, t, n))
366 366 return [(t, n) for r, t, n in sorted(l)]
367 367
368 368 def nodetags(self, node):
369 369 '''return the tags associated with a node'''
370 370 if not self.nodetagscache:
371 371 self.nodetagscache = {}
372 372 for t, n in self.tags().iteritems():
373 373 self.nodetagscache.setdefault(n, []).append(t)
374 374 for tags in self.nodetagscache.itervalues():
375 375 tags.sort()
376 376 return self.nodetagscache.get(node, [])
377 377
378 378 def _branchtags(self, partial, lrev):
379 379 # TODO: rename this function?
380 380 tiprev = len(self) - 1
381 381 if lrev != tiprev:
382 382 ctxgen = (self[r] for r in xrange(lrev + 1, tiprev + 1))
383 383 self._updatebranchcache(partial, ctxgen)
384 384 self._writebranchcache(partial, self.changelog.tip(), tiprev)
385 385
386 386 return partial
387 387
388 388 def updatebranchcache(self):
389 389 tip = self.changelog.tip()
390 390 if self._branchcache is not None and self._branchcachetip == tip:
391 391 return self._branchcache
392 392
393 393 oldtip = self._branchcachetip
394 394 self._branchcachetip = tip
395 395 if oldtip is None or oldtip not in self.changelog.nodemap:
396 396 partial, last, lrev = self._readbranchcache()
397 397 else:
398 398 lrev = self.changelog.rev(oldtip)
399 399 partial = self._branchcache
400 400
401 401 self._branchtags(partial, lrev)
402 402 # this private cache holds all heads (not just tips)
403 403 self._branchcache = partial
404 404
405 405 def branchmap(self):
406 406 '''returns a dictionary {branch: [branchheads]}'''
407 407 self.updatebranchcache()
408 408 return self._branchcache
409 409
410 410 def branchtags(self):
411 411 '''return a dict where branch names map to the tipmost head of
412 412 the branch, open heads come before closed'''
413 413 bt = {}
414 414 for bn, heads in self.branchmap().iteritems():
415 415 tip = heads[-1]
416 416 for h in reversed(heads):
417 417 if 'close' not in self.changelog.read(h)[5]:
418 418 tip = h
419 419 break
420 420 bt[bn] = tip
421 421 return bt
422 422
423 423
424 424 def _readbranchcache(self):
425 425 partial = {}
426 426 try:
427 427 f = self.opener("branchheads.cache")
428 428 lines = f.read().split('\n')
429 429 f.close()
430 430 except (IOError, OSError):
431 431 return {}, nullid, nullrev
432 432
433 433 try:
434 434 last, lrev = lines.pop(0).split(" ", 1)
435 435 last, lrev = bin(last), int(lrev)
436 436 if lrev >= len(self) or self[lrev].node() != last:
437 437 # invalidate the cache
438 438 raise ValueError('invalidating branch cache (tip differs)')
439 439 for l in lines:
440 440 if not l:
441 441 continue
442 442 node, label = l.split(" ", 1)
443 443 partial.setdefault(label.strip(), []).append(bin(node))
444 444 except KeyboardInterrupt:
445 445 raise
446 446 except Exception, inst:
447 447 if self.ui.debugflag:
448 448 self.ui.warn(str(inst), '\n')
449 449 partial, last, lrev = {}, nullid, nullrev
450 450 return partial, last, lrev
451 451
452 452 def _writebranchcache(self, branches, tip, tiprev):
453 453 try:
454 454 f = self.opener("branchheads.cache", "w", atomictemp=True)
455 455 f.write("%s %s\n" % (hex(tip), tiprev))
456 456 for label, nodes in branches.iteritems():
457 457 for node in nodes:
458 458 f.write("%s %s\n" % (hex(node), label))
459 459 f.rename()
460 460 except (IOError, OSError):
461 461 pass
462 462
463 463 def _updatebranchcache(self, partial, ctxgen):
464 464 # collect new branch entries
465 465 newbranches = {}
466 466 for c in ctxgen:
467 467 newbranches.setdefault(c.branch(), []).append(c.node())
468 468 # if older branchheads are reachable from new ones, they aren't
469 469 # really branchheads. Note checking parents is insufficient:
470 470 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
471 471 for branch, newnodes in newbranches.iteritems():
472 472 bheads = partial.setdefault(branch, [])
473 473 bheads.extend(newnodes)
474 474 if len(bheads) <= 1:
475 475 continue
476 476 # starting from tip means fewer passes over reachable
477 477 while newnodes:
478 478 latest = newnodes.pop()
479 479 if latest not in bheads:
480 480 continue
481 481 minbhrev = self[min([self[bh].rev() for bh in bheads])].node()
482 482 reachable = self.changelog.reachable(latest, minbhrev)
483 483 reachable.remove(latest)
484 484 bheads = [b for b in bheads if b not in reachable]
485 485 partial[branch] = bheads
486 486
487 487 def lookup(self, key):
488 488 if isinstance(key, int):
489 489 return self.changelog.node(key)
490 490 elif key == '.':
491 491 return self.dirstate.parents()[0]
492 492 elif key == 'null':
493 493 return nullid
494 494 elif key == 'tip':
495 495 return self.changelog.tip()
496 496 n = self.changelog._match(key)
497 497 if n:
498 498 return n
499 499 if key in self.tags():
500 500 return self.tags()[key]
501 501 if key in self.branchtags():
502 502 return self.branchtags()[key]
503 503 n = self.changelog._partialmatch(key)
504 504 if n:
505 505 return n
506 506
507 507 # can't find key, check if it might have come from damaged dirstate
508 508 if key in self.dirstate.parents():
509 509 raise error.Abort(_("working directory has unknown parent '%s'!")
510 510 % short(key))
511 511 try:
512 512 if len(key) == 20:
513 513 key = hex(key)
514 514 except:
515 515 pass
516 516 raise error.RepoLookupError(_("unknown revision '%s'") % key)
517 517
518 518 def lookupbranch(self, key, remote=None):
519 519 repo = remote or self
520 520 if key in repo.branchmap():
521 521 return key
522 522
523 523 repo = (remote and remote.local()) and remote or self
524 524 return repo[key].branch()
525 525
526 526 def local(self):
527 527 return True
528 528
529 529 def join(self, f):
530 530 return os.path.join(self.path, f)
531 531
532 532 def wjoin(self, f):
533 533 return os.path.join(self.root, f)
534 534
535 535 def file(self, f):
536 536 if f[0] == '/':
537 537 f = f[1:]
538 538 return filelog.filelog(self.sopener, f)
539 539
540 540 def changectx(self, changeid):
541 541 return self[changeid]
542 542
543 543 def parents(self, changeid=None):
544 544 '''get list of changectxs for parents of changeid'''
545 545 return self[changeid].parents()
546 546
547 547 def filectx(self, path, changeid=None, fileid=None):
548 548 """changeid can be a changeset revision, node, or tag.
549 549 fileid can be a file revision or node."""
550 550 return context.filectx(self, path, changeid, fileid)
551 551
552 552 def getcwd(self):
553 553 return self.dirstate.getcwd()
554 554
555 555 def pathto(self, f, cwd=None):
556 556 return self.dirstate.pathto(f, cwd)
557 557
558 558 def wfile(self, f, mode='r'):
559 559 return self.wopener(f, mode)
560 560
561 561 def _link(self, f):
562 562 return os.path.islink(self.wjoin(f))
563 563
564 564 def _loadfilter(self, filter):
565 565 if filter not in self.filterpats:
566 566 l = []
567 567 for pat, cmd in self.ui.configitems(filter):
568 568 if cmd == '!':
569 569 continue
570 570 mf = matchmod.match(self.root, '', [pat])
571 571 fn = None
572 572 params = cmd
573 573 for name, filterfn in self._datafilters.iteritems():
574 574 if cmd.startswith(name):
575 575 fn = filterfn
576 576 params = cmd[len(name):].lstrip()
577 577 break
578 578 if not fn:
579 579 fn = lambda s, c, **kwargs: util.filter(s, c)
580 580 # Wrap old filters not supporting keyword arguments
581 581 if not inspect.getargspec(fn)[2]:
582 582 oldfn = fn
583 583 fn = lambda s, c, **kwargs: oldfn(s, c)
584 584 l.append((mf, fn, params))
585 585 self.filterpats[filter] = l
586 586
587 587 def _filter(self, filter, filename, data):
588 588 self._loadfilter(filter)
589 589
590 590 for mf, fn, cmd in self.filterpats[filter]:
591 591 if mf(filename):
592 592 self.ui.debug("filtering %s through %s\n" % (filename, cmd))
593 593 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
594 594 break
595 595
596 596 return data
597 597
598 598 def adddatafilter(self, name, filter):
599 599 self._datafilters[name] = filter
600 600
601 601 def wread(self, filename):
602 602 if self._link(filename):
603 603 data = os.readlink(self.wjoin(filename))
604 604 else:
605 605 data = self.wopener(filename, 'r').read()
606 606 return self._filter("encode", filename, data)
607 607
608 608 def wwrite(self, filename, data, flags):
609 609 data = self._filter("decode", filename, data)
610 610 try:
611 611 os.unlink(self.wjoin(filename))
612 612 except OSError:
613 613 pass
614 614 if 'l' in flags:
615 615 self.wopener.symlink(data, filename)
616 616 else:
617 617 self.wopener(filename, 'w').write(data)
618 618 if 'x' in flags:
619 619 util.set_flags(self.wjoin(filename), False, True)
620 620
621 621 def wwritedata(self, filename, data):
622 622 return self._filter("decode", filename, data)
623 623
624 624 def transaction(self, desc):
625 625 tr = self._transref and self._transref() or None
626 626 if tr and tr.running():
627 627 return tr.nest()
628 628
629 629 # abort here if the journal already exists
630 630 if os.path.exists(self.sjoin("journal")):
631 631 raise error.RepoError(
632 632 _("abandoned transaction found - run hg recover"))
633 633
634 634 # save dirstate for rollback
635 635 try:
636 636 ds = self.opener("dirstate").read()
637 637 except IOError:
638 638 ds = ""
639 639 self.opener("journal.dirstate", "w").write(ds)
640 640 self.opener("journal.branch", "w").write(self.dirstate.branch())
641 641 self.opener("journal.desc", "w").write("%d\n%s\n" % (len(self), desc))
642 642
643 643 renames = [(self.sjoin("journal"), self.sjoin("undo")),
644 644 (self.join("journal.dirstate"), self.join("undo.dirstate")),
645 645 (self.join("journal.branch"), self.join("undo.branch")),
646 646 (self.join("journal.desc"), self.join("undo.desc"))]
647 647 tr = transaction.transaction(self.ui.warn, self.sopener,
648 648 self.sjoin("journal"),
649 649 aftertrans(renames),
650 650 self.store.createmode)
651 651 self._transref = weakref.ref(tr)
652 652 return tr
653 653
654 654 def recover(self):
655 655 lock = self.lock()
656 656 try:
657 657 if os.path.exists(self.sjoin("journal")):
658 658 self.ui.status(_("rolling back interrupted transaction\n"))
659 659 transaction.rollback(self.sopener, self.sjoin("journal"),
660 660 self.ui.warn)
661 661 self.invalidate()
662 662 return True
663 663 else:
664 664 self.ui.warn(_("no interrupted transaction available\n"))
665 665 return False
666 666 finally:
667 667 lock.release()
668 668
669 669 def rollback(self, dryrun=False):
670 670 wlock = lock = None
671 671 try:
672 672 wlock = self.wlock()
673 673 lock = self.lock()
674 674 if os.path.exists(self.sjoin("undo")):
675 675 try:
676 676 args = self.opener("undo.desc", "r").read().splitlines()
677 677 if len(args) >= 3 and self.ui.verbose:
678 678 desc = _("rolling back to revision %s"
679 679 " (undo %s: %s)\n") % (
680 680 int(args[0]) - 1, args[1], args[2])
681 681 elif len(args) >= 2:
682 682 desc = _("rolling back to revision %s (undo %s)\n") % (
683 683 int(args[0]) - 1, args[1])
684 684 except IOError:
685 685 desc = _("rolling back unknown transaction\n")
686 686 self.ui.status(desc)
687 687 if dryrun:
688 688 return
689 689 transaction.rollback(self.sopener, self.sjoin("undo"),
690 690 self.ui.warn)
691 691 util.rename(self.join("undo.dirstate"), self.join("dirstate"))
692 692 try:
693 693 branch = self.opener("undo.branch").read()
694 694 self.dirstate.setbranch(branch)
695 695 except IOError:
696 696 self.ui.warn(_("Named branch could not be reset, "
697 697 "current branch still is: %s\n")
698 698 % encoding.tolocal(self.dirstate.branch()))
699 699 self.invalidate()
700 700 self.dirstate.invalidate()
701 701 self.destroyed()
702 702 else:
703 703 self.ui.warn(_("no rollback information available\n"))
704 704 return 1
705 705 finally:
706 706 release(lock, wlock)
707 707
708 708 def invalidatecaches(self):
709 709 self._tags = None
710 710 self._tagtypes = None
711 711 self.nodetagscache = None
712 712 self._branchcache = None # in UTF-8
713 713 self._branchcachetip = None
714 714
715 715 def invalidate(self):
716 716 for a in "changelog manifest".split():
717 717 if a in self.__dict__:
718 718 delattr(self, a)
719 719 self.invalidatecaches()
720 720
721 721 def _lock(self, lockname, wait, releasefn, acquirefn, desc):
722 722 try:
723 723 l = lock.lock(lockname, 0, releasefn, desc=desc)
724 724 except error.LockHeld, inst:
725 725 if not wait:
726 726 raise
727 727 self.ui.warn(_("waiting for lock on %s held by %r\n") %
728 728 (desc, inst.locker))
729 729 # default to 600 seconds timeout
730 730 l = lock.lock(lockname, int(self.ui.config("ui", "timeout", "600")),
731 731 releasefn, desc=desc)
732 732 if acquirefn:
733 733 acquirefn()
734 734 return l
735 735
736 736 def lock(self, wait=True):
737 737 '''Lock the repository store (.hg/store) and return a weak reference
738 738 to the lock. Use this before modifying the store (e.g. committing or
739 739 stripping). If you are opening a transaction, get a lock as well.)'''
740 740 l = self._lockref and self._lockref()
741 741 if l is not None and l.held:
742 742 l.lock()
743 743 return l
744 744
745 745 l = self._lock(self.sjoin("lock"), wait, None, self.invalidate,
746 746 _('repository %s') % self.origroot)
747 747 self._lockref = weakref.ref(l)
748 748 return l
749 749
750 750 def wlock(self, wait=True):
751 751 '''Lock the non-store parts of the repository (everything under
752 752 .hg except .hg/store) and return a weak reference to the lock.
753 753 Use this before modifying files in .hg.'''
754 754 l = self._wlockref and self._wlockref()
755 755 if l is not None and l.held:
756 756 l.lock()
757 757 return l
758 758
759 759 l = self._lock(self.join("wlock"), wait, self.dirstate.write,
760 760 self.dirstate.invalidate, _('working directory of %s') %
761 761 self.origroot)
762 762 self._wlockref = weakref.ref(l)
763 763 return l
764 764
765 765 def _filecommit(self, fctx, manifest1, manifest2, linkrev, tr, changelist):
766 766 """
767 767 commit an individual file as part of a larger transaction
768 768 """
769 769
770 770 fname = fctx.path()
771 771 text = fctx.data()
772 772 flog = self.file(fname)
773 773 fparent1 = manifest1.get(fname, nullid)
774 774 fparent2 = fparent2o = manifest2.get(fname, nullid)
775 775
776 776 meta = {}
777 777 copy = fctx.renamed()
778 778 if copy and copy[0] != fname:
779 779 # Mark the new revision of this file as a copy of another
780 780 # file. This copy data will effectively act as a parent
781 781 # of this new revision. If this is a merge, the first
782 782 # parent will be the nullid (meaning "look up the copy data")
783 783 # and the second one will be the other parent. For example:
784 784 #
785 785 # 0 --- 1 --- 3 rev1 changes file foo
786 786 # \ / rev2 renames foo to bar and changes it
787 787 # \- 2 -/ rev3 should have bar with all changes and
788 788 # should record that bar descends from
789 789 # bar in rev2 and foo in rev1
790 790 #
791 791 # this allows this merge to succeed:
792 792 #
793 793 # 0 --- 1 --- 3 rev4 reverts the content change from rev2
794 794 # \ / merging rev3 and rev4 should use bar@rev2
795 795 # \- 2 --- 4 as the merge base
796 796 #
797 797
798 798 cfname = copy[0]
799 799 crev = manifest1.get(cfname)
800 800 newfparent = fparent2
801 801
802 802 if manifest2: # branch merge
803 803 if fparent2 == nullid or crev is None: # copied on remote side
804 804 if cfname in manifest2:
805 805 crev = manifest2[cfname]
806 806 newfparent = fparent1
807 807
808 808 # find source in nearest ancestor if we've lost track
809 809 if not crev:
810 810 self.ui.debug(" %s: searching for copy revision for %s\n" %
811 811 (fname, cfname))
812 812 for ancestor in self['.'].ancestors():
813 813 if cfname in ancestor:
814 814 crev = ancestor[cfname].filenode()
815 815 break
816 816
817 817 self.ui.debug(" %s: copy %s:%s\n" % (fname, cfname, hex(crev)))
818 818 meta["copy"] = cfname
819 819 meta["copyrev"] = hex(crev)
820 820 fparent1, fparent2 = nullid, newfparent
821 821 elif fparent2 != nullid:
822 822 # is one parent an ancestor of the other?
823 823 fparentancestor = flog.ancestor(fparent1, fparent2)
824 824 if fparentancestor == fparent1:
825 825 fparent1, fparent2 = fparent2, nullid
826 826 elif fparentancestor == fparent2:
827 827 fparent2 = nullid
828 828
829 829 # is the file changed?
830 830 if fparent2 != nullid or flog.cmp(fparent1, text) or meta:
831 831 changelist.append(fname)
832 832 return flog.add(text, meta, tr, linkrev, fparent1, fparent2)
833 833
834 834 # are just the flags changed during merge?
835 835 if fparent1 != fparent2o and manifest1.flags(fname) != fctx.flags():
836 836 changelist.append(fname)
837 837
838 838 return fparent1
839 839
840 840 def commit(self, text="", user=None, date=None, match=None, force=False,
841 841 editor=False, extra={}):
842 842 """Add a new revision to current repository.
843 843
844 844 Revision information is gathered from the working directory,
845 845 match can be used to filter the committed files. If editor is
846 846 supplied, it is called to get a commit message.
847 847 """
848 848
849 849 def fail(f, msg):
850 850 raise util.Abort('%s: %s' % (f, msg))
851 851
852 852 if not match:
853 853 match = matchmod.always(self.root, '')
854 854
855 855 if not force:
856 856 vdirs = []
857 857 match.dir = vdirs.append
858 858 match.bad = fail
859 859
860 860 wlock = self.wlock()
861 861 try:
862 862 wctx = self[None]
863 863 merge = len(wctx.parents()) > 1
864 864
865 865 if (not force and merge and match and
866 866 (match.files() or match.anypats())):
867 867 raise util.Abort(_('cannot partially commit a merge '
868 868 '(do not specify files or patterns)'))
869 869
870 870 changes = self.status(match=match, clean=force)
871 871 if force:
872 872 changes[0].extend(changes[6]) # mq may commit unchanged files
873 873
874 874 # check subrepos
875 875 subs = []
876 876 removedsubs = set()
877 877 for p in wctx.parents():
878 878 removedsubs.update(s for s in p.substate if match(s))
879 879 for s in wctx.substate:
880 880 removedsubs.discard(s)
881 881 if match(s) and wctx.sub(s).dirty():
882 882 subs.append(s)
883 883 if (subs or removedsubs):
884 884 if (not match('.hgsub') and
885 885 '.hgsub' in (wctx.modified() + wctx.added())):
886 886 raise util.Abort(_("can't commit subrepos without .hgsub"))
887 887 if '.hgsubstate' not in changes[0]:
888 888 changes[0].insert(0, '.hgsubstate')
889 889
890 890 # make sure all explicit patterns are matched
891 891 if not force and match.files():
892 892 matched = set(changes[0] + changes[1] + changes[2])
893 893
894 894 for f in match.files():
895 895 if f == '.' or f in matched or f in wctx.substate:
896 896 continue
897 897 if f in changes[3]: # missing
898 898 fail(f, _('file not found!'))
899 899 if f in vdirs: # visited directory
900 900 d = f + '/'
901 901 for mf in matched:
902 902 if mf.startswith(d):
903 903 break
904 904 else:
905 905 fail(f, _("no match under directory!"))
906 906 elif f not in self.dirstate:
907 907 fail(f, _("file not tracked!"))
908 908
909 909 if (not force and not extra.get("close") and not merge
910 910 and not (changes[0] or changes[1] or changes[2])
911 911 and wctx.branch() == wctx.p1().branch()):
912 912 return None
913 913
914 914 ms = mergemod.mergestate(self)
915 915 for f in changes[0]:
916 916 if f in ms and ms[f] == 'u':
917 917 raise util.Abort(_("unresolved merge conflicts "
918 918 "(see hg resolve)"))
919 919
920 920 cctx = context.workingctx(self, text, user, date, extra, changes)
921 921 if editor:
922 922 cctx._text = editor(self, cctx, subs)
923 923 edited = (text != cctx._text)
924 924
925 925 # commit subs
926 926 if subs or removedsubs:
927 927 state = wctx.substate.copy()
928 928 for s in sorted(subs):
929 929 sub = wctx.sub(s)
930 930 self.ui.status(_('committing subrepository %s\n') %
931 931 subrepo.relpath(sub))
932 932 sr = sub.commit(cctx._text, user, date)
933 933 state[s] = (state[s][0], sr)
934 934 subrepo.writestate(self, state)
935 935
936 936 # Save commit message in case this transaction gets rolled back
937 937 # (e.g. by a pretxncommit hook). Leave the content alone on
938 938 # the assumption that the user will use the same editor again.
939 939 msgfile = self.opener('last-message.txt', 'wb')
940 940 msgfile.write(cctx._text)
941 941 msgfile.close()
942 942
943 943 p1, p2 = self.dirstate.parents()
944 944 hookp1, hookp2 = hex(p1), (p2 != nullid and hex(p2) or '')
945 945 try:
946 946 self.hook("precommit", throw=True, parent1=hookp1, parent2=hookp2)
947 947 ret = self.commitctx(cctx, True)
948 948 except:
949 949 if edited:
950 950 msgfn = self.pathto(msgfile.name[len(self.root)+1:])
951 951 self.ui.write(
952 952 _('note: commit message saved in %s\n') % msgfn)
953 953 raise
954 954
955 955 # update dirstate and mergestate
956 956 for f in changes[0] + changes[1]:
957 957 self.dirstate.normal(f)
958 958 for f in changes[2]:
959 959 self.dirstate.forget(f)
960 960 self.dirstate.setparents(ret)
961 961 ms.reset()
962 962 finally:
963 963 wlock.release()
964 964
965 965 self.hook("commit", node=hex(ret), parent1=hookp1, parent2=hookp2)
966 966 return ret
967 967
968 968 def commitctx(self, ctx, error=False):
969 969 """Add a new revision to current repository.
970 970 Revision information is passed via the context argument.
971 971 """
972 972
973 973 tr = lock = None
974 974 removed = ctx.removed()
975 975 p1, p2 = ctx.p1(), ctx.p2()
976 976 m1 = p1.manifest().copy()
977 977 m2 = p2.manifest()
978 978 user = ctx.user()
979 979
980 980 lock = self.lock()
981 981 try:
982 982 tr = self.transaction("commit")
983 983 trp = weakref.proxy(tr)
984 984
985 985 # check in files
986 986 new = {}
987 987 changed = []
988 988 linkrev = len(self)
989 989 for f in sorted(ctx.modified() + ctx.added()):
990 990 self.ui.note(f + "\n")
991 991 try:
992 992 fctx = ctx[f]
993 993 new[f] = self._filecommit(fctx, m1, m2, linkrev, trp,
994 994 changed)
995 995 m1.set(f, fctx.flags())
996 996 except OSError, inst:
997 997 self.ui.warn(_("trouble committing %s!\n") % f)
998 998 raise
999 999 except IOError, inst:
1000 1000 errcode = getattr(inst, 'errno', errno.ENOENT)
1001 1001 if error or errcode and errcode != errno.ENOENT:
1002 1002 self.ui.warn(_("trouble committing %s!\n") % f)
1003 1003 raise
1004 1004 else:
1005 1005 removed.append(f)
1006 1006
1007 1007 # update manifest
1008 1008 m1.update(new)
1009 1009 removed = [f for f in sorted(removed) if f in m1 or f in m2]
1010 1010 drop = [f for f in removed if f in m1]
1011 1011 for f in drop:
1012 1012 del m1[f]
1013 1013 mn = self.manifest.add(m1, trp, linkrev, p1.manifestnode(),
1014 1014 p2.manifestnode(), (new, drop))
1015 1015
1016 1016 # update changelog
1017 1017 self.changelog.delayupdate()
1018 1018 n = self.changelog.add(mn, changed + removed, ctx.description(),
1019 1019 trp, p1.node(), p2.node(),
1020 1020 user, ctx.date(), ctx.extra().copy())
1021 1021 p = lambda: self.changelog.writepending() and self.root or ""
1022 1022 xp1, xp2 = p1.hex(), p2 and p2.hex() or ''
1023 1023 self.hook('pretxncommit', throw=True, node=hex(n), parent1=xp1,
1024 1024 parent2=xp2, pending=p)
1025 1025 self.changelog.finalize(trp)
1026 1026 tr.close()
1027 1027
1028 1028 if self._branchcache:
1029 1029 self.updatebranchcache()
1030 1030 return n
1031 1031 finally:
1032 1032 if tr:
1033 1033 tr.release()
1034 1034 lock.release()
1035 1035
1036 1036 def destroyed(self):
1037 1037 '''Inform the repository that nodes have been destroyed.
1038 1038 Intended for use by strip and rollback, so there's a common
1039 1039 place for anything that has to be done after destroying history.'''
1040 1040 # XXX it might be nice if we could take the list of destroyed
1041 1041 # nodes, but I don't see an easy way for rollback() to do that
1042 1042
1043 1043 # Ensure the persistent tag cache is updated. Doing it now
1044 1044 # means that the tag cache only has to worry about destroyed
1045 1045 # heads immediately after a strip/rollback. That in turn
1046 1046 # guarantees that "cachetip == currenttip" (comparing both rev
1047 1047 # and node) always means no nodes have been added or destroyed.
1048 1048
1049 1049 # XXX this is suboptimal when qrefresh'ing: we strip the current
1050 1050 # head, refresh the tag cache, then immediately add a new head.
1051 1051 # But I think doing it this way is necessary for the "instant
1052 1052 # tag cache retrieval" case to work.
1053 1053 self.invalidatecaches()
1054 1054
1055 1055 def walk(self, match, node=None):
1056 1056 '''
1057 1057 walk recursively through the directory tree or a given
1058 1058 changeset, finding all files matched by the match
1059 1059 function
1060 1060 '''
1061 1061 return self[node].walk(match)
1062 1062
1063 1063 def status(self, node1='.', node2=None, match=None,
1064 1064 ignored=False, clean=False, unknown=False,
1065 1065 listsubrepos=False):
1066 1066 """return status of files between two nodes or node and working directory
1067 1067
1068 1068 If node1 is None, use the first dirstate parent instead.
1069 1069 If node2 is None, compare node1 with working directory.
1070 1070 """
1071 1071
1072 1072 def mfmatches(ctx):
1073 1073 mf = ctx.manifest().copy()
1074 1074 for fn in mf.keys():
1075 1075 if not match(fn):
1076 1076 del mf[fn]
1077 1077 return mf
1078 1078
1079 1079 if isinstance(node1, context.changectx):
1080 1080 ctx1 = node1
1081 1081 else:
1082 1082 ctx1 = self[node1]
1083 1083 if isinstance(node2, context.changectx):
1084 1084 ctx2 = node2
1085 1085 else:
1086 1086 ctx2 = self[node2]
1087 1087
1088 1088 working = ctx2.rev() is None
1089 1089 parentworking = working and ctx1 == self['.']
1090 1090 match = match or matchmod.always(self.root, self.getcwd())
1091 1091 listignored, listclean, listunknown = ignored, clean, unknown
1092 1092
1093 1093 # load earliest manifest first for caching reasons
1094 1094 if not working and ctx2.rev() < ctx1.rev():
1095 1095 ctx2.manifest()
1096 1096
1097 1097 if not parentworking:
1098 1098 def bad(f, msg):
1099 1099 if f not in ctx1:
1100 1100 self.ui.warn('%s: %s\n' % (self.dirstate.pathto(f), msg))
1101 1101 match.bad = bad
1102 1102
1103 1103 if working: # we need to scan the working dir
1104 1104 subrepos = []
1105 1105 if '.hgsub' in self.dirstate:
1106 1106 subrepos = ctx1.substate.keys()
1107 1107 s = self.dirstate.status(match, subrepos, listignored,
1108 1108 listclean, listunknown)
1109 1109 cmp, modified, added, removed, deleted, unknown, ignored, clean = s
1110 1110
1111 1111 # check for any possibly clean files
1112 1112 if parentworking and cmp:
1113 1113 fixup = []
1114 1114 # do a full compare of any files that might have changed
1115 1115 for f in sorted(cmp):
1116 1116 if (f not in ctx1 or ctx2.flags(f) != ctx1.flags(f)
1117 1117 or ctx1[f].cmp(ctx2[f])):
1118 1118 modified.append(f)
1119 1119 else:
1120 1120 fixup.append(f)
1121 1121
1122 1122 # update dirstate for files that are actually clean
1123 1123 if fixup:
1124 1124 if listclean:
1125 1125 clean += fixup
1126 1126
1127 1127 try:
1128 1128 # updating the dirstate is optional
1129 1129 # so we don't wait on the lock
1130 1130 wlock = self.wlock(False)
1131 1131 try:
1132 1132 for f in fixup:
1133 1133 self.dirstate.normal(f)
1134 1134 finally:
1135 1135 wlock.release()
1136 1136 except error.LockError:
1137 1137 pass
1138 1138
1139 1139 if not parentworking:
1140 1140 mf1 = mfmatches(ctx1)
1141 1141 if working:
1142 1142 # we are comparing working dir against non-parent
1143 1143 # generate a pseudo-manifest for the working dir
1144 1144 mf2 = mfmatches(self['.'])
1145 1145 for f in cmp + modified + added:
1146 1146 mf2[f] = None
1147 1147 mf2.set(f, ctx2.flags(f))
1148 1148 for f in removed:
1149 1149 if f in mf2:
1150 1150 del mf2[f]
1151 1151 else:
1152 1152 # we are comparing two revisions
1153 1153 deleted, unknown, ignored = [], [], []
1154 1154 mf2 = mfmatches(ctx2)
1155 1155
1156 1156 modified, added, clean = [], [], []
1157 1157 for fn in mf2:
1158 1158 if fn in mf1:
1159 1159 if (mf1.flags(fn) != mf2.flags(fn) or
1160 1160 (mf1[fn] != mf2[fn] and
1161 1161 (mf2[fn] or ctx1[fn].cmp(ctx2[fn])))):
1162 1162 modified.append(fn)
1163 1163 elif listclean:
1164 1164 clean.append(fn)
1165 1165 del mf1[fn]
1166 1166 else:
1167 1167 added.append(fn)
1168 1168 removed = mf1.keys()
1169 1169
1170 1170 r = modified, added, removed, deleted, unknown, ignored, clean
1171 1171
1172 1172 if listsubrepos:
1173 1173 for subpath, sub in subrepo.itersubrepos(ctx1, ctx2):
1174 1174 if working:
1175 1175 rev2 = None
1176 1176 else:
1177 1177 rev2 = ctx2.substate[subpath][1]
1178 1178 try:
1179 1179 submatch = matchmod.narrowmatcher(subpath, match)
1180 1180 s = sub.status(rev2, match=submatch, ignored=listignored,
1181 1181 clean=listclean, unknown=listunknown,
1182 1182 listsubrepos=True)
1183 1183 for rfiles, sfiles in zip(r, s):
1184 1184 rfiles.extend("%s/%s" % (subpath, f) for f in sfiles)
1185 1185 except error.LookupError:
1186 1186 self.ui.status(_("skipping missing subrepository: %s\n")
1187 1187 % subpath)
1188 1188
1189 1189 [l.sort() for l in r]
1190 1190 return r
1191 1191
1192 1192 def heads(self, start=None):
1193 1193 heads = self.changelog.heads(start)
1194 1194 # sort the output in rev descending order
1195 1195 heads = [(-self.changelog.rev(h), h) for h in heads]
1196 1196 return [n for (r, n) in sorted(heads)]
1197 1197
1198 1198 def branchheads(self, branch=None, start=None, closed=False):
1199 1199 '''return a (possibly filtered) list of heads for the given branch
1200 1200
1201 1201 Heads are returned in topological order, from newest to oldest.
1202 1202 If branch is None, use the dirstate branch.
1203 1203 If start is not None, return only heads reachable from start.
1204 1204 If closed is True, return heads that are marked as closed as well.
1205 1205 '''
1206 1206 if branch is None:
1207 1207 branch = self[None].branch()
1208 1208 branches = self.branchmap()
1209 1209 if branch not in branches:
1210 1210 return []
1211 1211 # the cache returns heads ordered lowest to highest
1212 1212 bheads = list(reversed(branches[branch]))
1213 1213 if start is not None:
1214 1214 # filter out the heads that cannot be reached from startrev
1215 1215 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
1216 1216 bheads = [h for h in bheads if h in fbheads]
1217 1217 if not closed:
1218 1218 bheads = [h for h in bheads if
1219 1219 ('close' not in self.changelog.read(h)[5])]
1220 1220 return bheads
1221 1221
1222 1222 def branches(self, nodes):
1223 1223 if not nodes:
1224 1224 nodes = [self.changelog.tip()]
1225 1225 b = []
1226 1226 for n in nodes:
1227 1227 t = n
1228 1228 while 1:
1229 1229 p = self.changelog.parents(n)
1230 1230 if p[1] != nullid or p[0] == nullid:
1231 1231 b.append((t, n, p[0], p[1]))
1232 1232 break
1233 1233 n = p[0]
1234 1234 return b
1235 1235
1236 1236 def between(self, pairs):
1237 1237 r = []
1238 1238
1239 1239 for top, bottom in pairs:
1240 1240 n, l, i = top, [], 0
1241 1241 f = 1
1242 1242
1243 1243 while n != bottom and n != nullid:
1244 1244 p = self.changelog.parents(n)[0]
1245 1245 if i == f:
1246 1246 l.append(n)
1247 1247 f = f * 2
1248 1248 n = p
1249 1249 i += 1
1250 1250
1251 1251 r.append(l)
1252 1252
1253 1253 return r
1254 1254
1255 1255 def pull(self, remote, heads=None, force=False):
1256 1256 lock = self.lock()
1257 1257 try:
1258 1258 tmp = discovery.findcommonincoming(self, remote, heads=heads,
1259 1259 force=force)
1260 1260 common, fetch, rheads = tmp
1261 1261 if not fetch:
1262 1262 self.ui.status(_("no changes found\n"))
1263 1263 return 0
1264 1264
1265 1265 if fetch == [nullid]:
1266 1266 self.ui.status(_("requesting all changes\n"))
1267 1267 elif heads is None and remote.capable('changegroupsubset'):
1268 1268 # issue1320, avoid a race if remote changed after discovery
1269 1269 heads = rheads
1270 1270
1271 1271 if heads is None:
1272 1272 cg = remote.changegroup(fetch, 'pull')
1273 1273 else:
1274 1274 if not remote.capable('changegroupsubset'):
1275 1275 raise util.Abort(_("partial pull cannot be done because "
1276 1276 "other repository doesn't support "
1277 1277 "changegroupsubset."))
1278 1278 cg = remote.changegroupsubset(fetch, heads, 'pull')
1279 1279 return self.addchangegroup(cg, 'pull', remote.url(), lock=lock)
1280 1280 finally:
1281 1281 lock.release()
1282 1282
1283 1283 def push(self, remote, force=False, revs=None, newbranch=False):
1284 1284 '''Push outgoing changesets (limited by revs) from the current
1285 1285 repository to remote. Return an integer:
1286 1286 - 0 means HTTP error *or* nothing to push
1287 1287 - 1 means we pushed and remote head count is unchanged *or*
1288 1288 we have outgoing changesets but refused to push
1289 1289 - other values as described by addchangegroup()
1290 1290 '''
1291 1291 # there are two ways to push to remote repo:
1292 1292 #
1293 1293 # addchangegroup assumes local user can lock remote
1294 1294 # repo (local filesystem, old ssh servers).
1295 1295 #
1296 1296 # unbundle assumes local user cannot lock remote repo (new ssh
1297 1297 # servers, http servers).
1298 1298
1299 1299 lock = None
1300 1300 unbundle = remote.capable('unbundle')
1301 1301 if not unbundle:
1302 1302 lock = remote.lock()
1303 1303 try:
1304 1304 ret = discovery.prepush(self, remote, force, revs, newbranch)
1305 1305 if ret[0] is None:
1306 1306 # and here we return 0 for "nothing to push" or 1 for
1307 1307 # "something to push but I refuse"
1308 1308 return ret[1]
1309 1309
1310 1310 cg, remote_heads = ret
1311 1311 if unbundle:
1312 1312 # local repo finds heads on server, finds out what revs it must
1313 1313 # push. once revs transferred, if server finds it has
1314 1314 # different heads (someone else won commit/push race), server
1315 1315 # aborts.
1316 1316 if force:
1317 1317 remote_heads = ['force']
1318 1318 # ssh: return remote's addchangegroup()
1319 1319 # http: return remote's addchangegroup() or 0 for error
1320 1320 return remote.unbundle(cg, remote_heads, 'push')
1321 1321 else:
1322 1322 # we return an integer indicating remote head count change
1323 1323 return remote.addchangegroup(cg, 'push', self.url(), lock=lock)
1324 1324 finally:
1325 1325 if lock is not None:
1326 1326 lock.release()
1327 1327
1328 1328 def changegroupinfo(self, nodes, source):
1329 1329 if self.ui.verbose or source == 'bundle':
1330 1330 self.ui.status(_("%d changesets found\n") % len(nodes))
1331 1331 if self.ui.debugflag:
1332 1332 self.ui.debug("list of changesets:\n")
1333 1333 for node in nodes:
1334 1334 self.ui.debug("%s\n" % hex(node))
1335 1335
1336 1336 def changegroupsubset(self, bases, heads, source, extranodes=None):
1337 1337 """Compute a changegroup consisting of all the nodes that are
1338 1338 descendents of any of the bases and ancestors of any of the heads.
1339 1339 Return a chunkbuffer object whose read() method will return
1340 1340 successive changegroup chunks.
1341 1341
1342 1342 It is fairly complex as determining which filenodes and which
1343 1343 manifest nodes need to be included for the changeset to be complete
1344 1344 is non-trivial.
1345 1345
1346 1346 Another wrinkle is doing the reverse, figuring out which changeset in
1347 1347 the changegroup a particular filenode or manifestnode belongs to.
1348 1348
1349 1349 The caller can specify some nodes that must be included in the
1350 1350 changegroup using the extranodes argument. It should be a dict
1351 1351 where the keys are the filenames (or 1 for the manifest), and the
1352 1352 values are lists of (node, linknode) tuples, where node is a wanted
1353 1353 node and linknode is the changelog node that should be transmitted as
1354 1354 the linkrev.
1355 1355 """
1356 1356
1357 1357 # Set up some initial variables
1358 1358 # Make it easy to refer to self.changelog
1359 1359 cl = self.changelog
1360 1360 # Compute the list of changesets in this changegroup.
1361 1361 # Some bases may turn out to be superfluous, and some heads may be
1362 1362 # too. nodesbetween will return the minimal set of bases and heads
1363 1363 # necessary to re-create the changegroup.
1364 1364 if not bases:
1365 1365 bases = [nullid]
1366 1366 msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
1367 1367
1368 1368 if extranodes is None:
1369 1369 # can we go through the fast path ?
1370 1370 heads.sort()
1371 1371 allheads = self.heads()
1372 1372 allheads.sort()
1373 1373 if heads == allheads:
1374 1374 return self._changegroup(msng_cl_lst, source)
1375 1375
1376 1376 # slow path
1377 1377 self.hook('preoutgoing', throw=True, source=source)
1378 1378
1379 1379 self.changegroupinfo(msng_cl_lst, source)
1380 1380
1381 1381 # We assume that all ancestors of bases are known
1382 1382 commonrevs = set(cl.ancestors(*[cl.rev(n) for n in bases]))
1383 1383
1384 1384 # Make it easy to refer to self.manifest
1385 1385 mnfst = self.manifest
1386 1386 # We don't know which manifests are missing yet
1387 1387 msng_mnfst_set = {}
1388 1388 # Nor do we know which filenodes are missing.
1389 1389 msng_filenode_set = {}
1390 1390
1391 1391 junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
1392 1392 junk = None
1393 1393
1394 1394 # A changeset always belongs to itself, so the changenode lookup
1395 1395 # function for a changenode is identity.
1396 1396 def identity(x):
1397 1397 return x
1398 1398
1399 1399 # A function generating function that sets up the initial environment
1400 1400 # the inner function.
1401 1401 def filenode_collector(changedfiles):
1402 1402 # This gathers information from each manifestnode included in the
1403 1403 # changegroup about which filenodes the manifest node references
1404 1404 # so we can include those in the changegroup too.
1405 1405 #
1406 1406 # It also remembers which changenode each filenode belongs to. It
1407 1407 # does this by assuming the a filenode belongs to the changenode
1408 1408 # the first manifest that references it belongs to.
1409 1409 def collect_msng_filenodes(mnfstnode):
1410 1410 r = mnfst.rev(mnfstnode)
1411 1411 if r - 1 in mnfst.parentrevs(r):
1412 1412 # If the previous rev is one of the parents,
1413 1413 # we only need to see a diff.
1414 1414 deltamf = mnfst.readdelta(mnfstnode)
1415 1415 # For each line in the delta
1416 1416 for f, fnode in deltamf.iteritems():
1417 1417 # And if the file is in the list of files we care
1418 1418 # about.
1419 1419 if f in changedfiles:
1420 1420 # Get the changenode this manifest belongs to
1421 1421 clnode = msng_mnfst_set[mnfstnode]
1422 1422 # Create the set of filenodes for the file if
1423 1423 # there isn't one already.
1424 1424 ndset = msng_filenode_set.setdefault(f, {})
1425 1425 # And set the filenode's changelog node to the
1426 1426 # manifest's if it hasn't been set already.
1427 1427 ndset.setdefault(fnode, clnode)
1428 1428 else:
1429 1429 # Otherwise we need a full manifest.
1430 1430 m = mnfst.read(mnfstnode)
1431 1431 # For every file in we care about.
1432 1432 for f in changedfiles:
1433 1433 fnode = m.get(f, None)
1434 1434 # If it's in the manifest
1435 1435 if fnode is not None:
1436 1436 # See comments above.
1437 1437 clnode = msng_mnfst_set[mnfstnode]
1438 1438 ndset = msng_filenode_set.setdefault(f, {})
1439 1439 ndset.setdefault(fnode, clnode)
1440 1440 return collect_msng_filenodes
1441 1441
1442 1442 # If we determine that a particular file or manifest node must be a
1443 1443 # node that the recipient of the changegroup will already have, we can
1444 1444 # also assume the recipient will have all the parents. This function
1445 1445 # prunes them from the set of missing nodes.
1446 1446 def prune(revlog, missingnodes):
1447 1447 hasset = set()
1448 1448 # If a 'missing' filenode thinks it belongs to a changenode we
1449 1449 # assume the recipient must have, then the recipient must have
1450 1450 # that filenode.
1451 1451 for n in missingnodes:
1452 1452 clrev = revlog.linkrev(revlog.rev(n))
1453 1453 if clrev in commonrevs:
1454 1454 hasset.add(n)
1455 1455 for n in hasset:
1456 1456 missingnodes.pop(n, None)
1457 1457 for r in revlog.ancestors(*[revlog.rev(n) for n in hasset]):
1458 1458 missingnodes.pop(revlog.node(r), None)
1459 1459
1460 1460 # Add the nodes that were explicitly requested.
1461 1461 def add_extra_nodes(name, nodes):
1462 1462 if not extranodes or name not in extranodes:
1463 1463 return
1464 1464
1465 1465 for node, linknode in extranodes[name]:
1466 1466 if node not in nodes:
1467 1467 nodes[node] = linknode
1468 1468
1469 1469 # Now that we have all theses utility functions to help out and
1470 1470 # logically divide up the task, generate the group.
1471 1471 def gengroup():
1472 1472 # The set of changed files starts empty.
1473 1473 changedfiles = set()
1474 1474 collect = changegroup.collector(cl, msng_mnfst_set, changedfiles)
1475 1475
1476 1476 # Create a changenode group generator that will call our functions
1477 1477 # back to lookup the owning changenode and collect information.
1478 1478 group = cl.group(msng_cl_lst, identity, collect)
1479 1479 for cnt, chnk in enumerate(group):
1480 1480 yield chnk
1481 1481 self.ui.progress(_('bundling changes'), cnt, unit=_('chunks'))
1482 1482 self.ui.progress(_('bundling changes'), None)
1483 1483
1484 1484 prune(mnfst, msng_mnfst_set)
1485 1485 add_extra_nodes(1, msng_mnfst_set)
1486 1486 msng_mnfst_lst = msng_mnfst_set.keys()
1487 1487 # Sort the manifestnodes by revision number.
1488 1488 msng_mnfst_lst.sort(key=mnfst.rev)
1489 1489 # Create a generator for the manifestnodes that calls our lookup
1490 1490 # and data collection functions back.
1491 1491 group = mnfst.group(msng_mnfst_lst,
1492 1492 lambda mnode: msng_mnfst_set[mnode],
1493 1493 filenode_collector(changedfiles))
1494 1494 for cnt, chnk in enumerate(group):
1495 1495 yield chnk
1496 1496 self.ui.progress(_('bundling manifests'), cnt, unit=_('chunks'))
1497 1497 self.ui.progress(_('bundling manifests'), None)
1498 1498
1499 1499 # These are no longer needed, dereference and toss the memory for
1500 1500 # them.
1501 1501 msng_mnfst_lst = None
1502 1502 msng_mnfst_set.clear()
1503 1503
1504 1504 if extranodes:
1505 1505 for fname in extranodes:
1506 1506 if isinstance(fname, int):
1507 1507 continue
1508 1508 msng_filenode_set.setdefault(fname, {})
1509 1509 changedfiles.add(fname)
1510 1510 # Go through all our files in order sorted by name.
1511 1511 cnt = 0
1512 1512 for fname in sorted(changedfiles):
1513 1513 filerevlog = self.file(fname)
1514 1514 if not len(filerevlog):
1515 1515 raise util.Abort(_("empty or missing revlog for %s") % fname)
1516 1516 # Toss out the filenodes that the recipient isn't really
1517 1517 # missing.
1518 1518 missingfnodes = msng_filenode_set.pop(fname, {})
1519 1519 prune(filerevlog, missingfnodes)
1520 1520 add_extra_nodes(fname, missingfnodes)
1521 1521 # If any filenodes are left, generate the group for them,
1522 1522 # otherwise don't bother.
1523 1523 if missingfnodes:
1524 1524 yield changegroup.chunkheader(len(fname))
1525 1525 yield fname
1526 1526 # Sort the filenodes by their revision # (topological order)
1527 1527 nodeiter = list(missingfnodes)
1528 1528 nodeiter.sort(key=filerevlog.rev)
1529 1529 # Create a group generator and only pass in a changenode
1530 1530 # lookup function as we need to collect no information
1531 1531 # from filenodes.
1532 1532 group = filerevlog.group(nodeiter,
1533 1533 lambda fnode: missingfnodes[fnode])
1534 1534 for chnk in group:
1535 1535 self.ui.progress(
1536 1536 _('bundling files'), cnt, item=fname, unit=_('chunks'))
1537 1537 cnt += 1
1538 1538 yield chnk
1539 1539 # Signal that no more groups are left.
1540 1540 yield changegroup.closechunk()
1541 1541 self.ui.progress(_('bundling files'), None)
1542 1542
1543 1543 if msng_cl_lst:
1544 1544 self.hook('outgoing', node=hex(msng_cl_lst[0]), source=source)
1545 1545
1546 1546 return util.chunkbuffer(gengroup())
1547 1547
1548 1548 def changegroup(self, basenodes, source):
1549 1549 # to avoid a race we use changegroupsubset() (issue1320)
1550 1550 return self.changegroupsubset(basenodes, self.heads(), source)
1551 1551
1552 1552 def _changegroup(self, nodes, source):
1553 1553 """Compute the changegroup of all nodes that we have that a recipient
1554 1554 doesn't. Return a chunkbuffer object whose read() method will return
1555 1555 successive changegroup chunks.
1556 1556
1557 1557 This is much easier than the previous function as we can assume that
1558 1558 the recipient has any changenode we aren't sending them.
1559 1559
1560 1560 nodes is the set of nodes to send"""
1561 1561
1562 1562 self.hook('preoutgoing', throw=True, source=source)
1563 1563
1564 1564 cl = self.changelog
1565 1565 revset = set([cl.rev(n) for n in nodes])
1566 1566 self.changegroupinfo(nodes, source)
1567 1567
1568 1568 def identity(x):
1569 1569 return x
1570 1570
1571 1571 def gennodelst(log):
1572 1572 for r in log:
1573 1573 if log.linkrev(r) in revset:
1574 1574 yield log.node(r)
1575 1575
1576 1576 def lookuplinkrev_func(revlog):
1577 1577 def lookuplinkrev(n):
1578 1578 return cl.node(revlog.linkrev(revlog.rev(n)))
1579 1579 return lookuplinkrev
1580 1580
1581 1581 def gengroup():
1582 1582 '''yield a sequence of changegroup chunks (strings)'''
1583 1583 # construct a list of all changed files
1584 1584 changedfiles = set()
1585 1585 mmfs = {}
1586 1586 collect = changegroup.collector(cl, mmfs, changedfiles)
1587 1587
1588 1588 for cnt, chnk in enumerate(cl.group(nodes, identity, collect)):
1589 1589 self.ui.progress(_('bundling changes'), cnt, unit=_('chunks'))
1590 1590 yield chnk
1591 1591 self.ui.progress(_('bundling changes'), None)
1592 1592
1593 1593 mnfst = self.manifest
1594 1594 nodeiter = gennodelst(mnfst)
1595 1595 for cnt, chnk in enumerate(mnfst.group(nodeiter,
1596 1596 lookuplinkrev_func(mnfst))):
1597 1597 self.ui.progress(_('bundling manifests'), cnt, unit=_('chunks'))
1598 1598 yield chnk
1599 1599 self.ui.progress(_('bundling manifests'), None)
1600 1600
1601 1601 cnt = 0
1602 1602 for fname in sorted(changedfiles):
1603 1603 filerevlog = self.file(fname)
1604 1604 if not len(filerevlog):
1605 1605 raise util.Abort(_("empty or missing revlog for %s") % fname)
1606 1606 nodeiter = gennodelst(filerevlog)
1607 1607 nodeiter = list(nodeiter)
1608 1608 if nodeiter:
1609 1609 yield changegroup.chunkheader(len(fname))
1610 1610 yield fname
1611 1611 lookup = lookuplinkrev_func(filerevlog)
1612 1612 for chnk in filerevlog.group(nodeiter, lookup):
1613 1613 self.ui.progress(
1614 1614 _('bundling files'), cnt, item=fname, unit=_('chunks'))
1615 1615 cnt += 1
1616 1616 yield chnk
1617 1617 self.ui.progress(_('bundling files'), None)
1618 1618
1619 1619 yield changegroup.closechunk()
1620 1620
1621 1621 if nodes:
1622 1622 self.hook('outgoing', node=hex(nodes[0]), source=source)
1623 1623
1624 1624 return util.chunkbuffer(gengroup())
1625 1625
1626 1626 def addchangegroup(self, source, srctype, url, emptyok=False, lock=None):
1627 1627 """Add the changegroup returned by source.read() to this repo.
1628 1628 srctype is a string like 'push', 'pull', or 'unbundle'. url is
1629 1629 the URL of the repo where this changegroup is coming from.
1630 1630
1631 1631 Return an integer summarizing the change to this repo:
1632 1632 - nothing changed or no source: 0
1633 1633 - more heads than before: 1+added heads (2..n)
1634 1634 - fewer heads than before: -1-removed heads (-2..-n)
1635 1635 - number of heads stays the same: 1
1636 1636 """
1637 1637 def csmap(x):
1638 1638 self.ui.debug("add changeset %s\n" % short(x))
1639 1639 return len(cl)
1640 1640
1641 1641 def revmap(x):
1642 1642 return cl.rev(x)
1643 1643
1644 1644 if not source:
1645 1645 return 0
1646 1646
1647 1647 if not hasattr(source, 'chunk'):
1648 1648 source = changegroup.unbundle10(source, 'UN')
1649 1649
1650 1650 self.hook('prechangegroup', throw=True, source=srctype, url=url)
1651 1651
1652 1652 changesets = files = revisions = 0
1653 1653 efiles = set()
1654 1654
1655 1655 # write changelog data to temp files so concurrent readers will not see
1656 1656 # inconsistent view
1657 1657 cl = self.changelog
1658 1658 cl.delayupdate()
1659 1659 oldheads = len(cl.heads())
1660 1660
1661 1661 tr = self.transaction("\n".join([srctype, urlmod.hidepassword(url)]))
1662 1662 try:
1663 1663 trp = weakref.proxy(tr)
1664 1664 # pull off the changeset group
1665 1665 self.ui.status(_("adding changesets\n"))
1666 1666 clstart = len(cl)
1667 1667 class prog(object):
1668 1668 step = _('changesets')
1669 1669 count = 1
1670 1670 ui = self.ui
1671 1671 total = None
1672 1672 def __call__(self):
1673 1673 self.ui.progress(self.step, self.count, unit=_('chunks'),
1674 1674 total=self.total)
1675 1675 self.count += 1
1676 1676 pr = prog()
1677 1677 source.callback = pr
1678 1678
1679 if (cl.addgroup(source.chunks(), csmap, trp) is None
1679 if (cl.addgroup(source, csmap, trp) is None
1680 1680 and not emptyok):
1681 1681 raise util.Abort(_("received changelog group is empty"))
1682 1682 clend = len(cl)
1683 1683 changesets = clend - clstart
1684 1684 for c in xrange(clstart, clend):
1685 1685 efiles.update(self[c].files())
1686 1686 efiles = len(efiles)
1687 1687 self.ui.progress(_('changesets'), None)
1688 1688
1689 1689 # pull off the manifest group
1690 1690 self.ui.status(_("adding manifests\n"))
1691 1691 pr.step = _('manifests')
1692 1692 pr.count = 1
1693 1693 pr.total = changesets # manifests <= changesets
1694 1694 # no need to check for empty manifest group here:
1695 1695 # if the result of the merge of 1 and 2 is the same in 3 and 4,
1696 1696 # no new manifest will be created and the manifest group will
1697 1697 # be empty during the pull
1698 self.manifest.addgroup(source.chunks(), revmap, trp)
1698 self.manifest.addgroup(source, revmap, trp)
1699 1699 self.ui.progress(_('manifests'), None)
1700 1700
1701 1701 needfiles = {}
1702 1702 if self.ui.configbool('server', 'validate', default=False):
1703 1703 # validate incoming csets have their manifests
1704 1704 for cset in xrange(clstart, clend):
1705 1705 mfest = self.changelog.read(self.changelog.node(cset))[0]
1706 1706 mfest = self.manifest.readdelta(mfest)
1707 1707 # store file nodes we must see
1708 1708 for f, n in mfest.iteritems():
1709 1709 needfiles.setdefault(f, set()).add(n)
1710 1710
1711 1711 # process the files
1712 1712 self.ui.status(_("adding file changes\n"))
1713 1713 pr.step = 'files'
1714 1714 pr.count = 1
1715 1715 pr.total = efiles
1716 1716 source.callback = None
1717 1717
1718 1718 while 1:
1719 1719 f = source.chunk()
1720 1720 if not f:
1721 1721 break
1722 1722 self.ui.debug("adding %s revisions\n" % f)
1723 1723 pr()
1724 1724 fl = self.file(f)
1725 1725 o = len(fl)
1726 if fl.addgroup(source.chunks(), revmap, trp) is None:
1726 if fl.addgroup(source, revmap, trp) is None:
1727 1727 raise util.Abort(_("received file revlog group is empty"))
1728 1728 revisions += len(fl) - o
1729 1729 files += 1
1730 1730 if f in needfiles:
1731 1731 needs = needfiles[f]
1732 1732 for new in xrange(o, len(fl)):
1733 1733 n = fl.node(new)
1734 1734 if n in needs:
1735 1735 needs.remove(n)
1736 1736 if not needs:
1737 1737 del needfiles[f]
1738 1738 self.ui.progress(_('files'), None)
1739 1739
1740 1740 for f, needs in needfiles.iteritems():
1741 1741 fl = self.file(f)
1742 1742 for n in needs:
1743 1743 try:
1744 1744 fl.rev(n)
1745 1745 except error.LookupError:
1746 1746 raise util.Abort(
1747 1747 _('missing file data for %s:%s - run hg verify') %
1748 1748 (f, hex(n)))
1749 1749
1750 1750 newheads = len(cl.heads())
1751 1751 heads = ""
1752 1752 if oldheads and newheads != oldheads:
1753 1753 heads = _(" (%+d heads)") % (newheads - oldheads)
1754 1754
1755 1755 self.ui.status(_("added %d changesets"
1756 1756 " with %d changes to %d files%s\n")
1757 1757 % (changesets, revisions, files, heads))
1758 1758
1759 1759 if changesets > 0:
1760 1760 p = lambda: cl.writepending() and self.root or ""
1761 1761 self.hook('pretxnchangegroup', throw=True,
1762 1762 node=hex(cl.node(clstart)), source=srctype,
1763 1763 url=url, pending=p)
1764 1764
1765 1765 # make changelog see real files again
1766 1766 cl.finalize(trp)
1767 1767
1768 1768 tr.close()
1769 1769 finally:
1770 1770 tr.release()
1771 1771 if lock:
1772 1772 lock.release()
1773 1773
1774 1774 if changesets > 0:
1775 1775 # forcefully update the on-disk branch cache
1776 1776 self.ui.debug("updating the branch cache\n")
1777 1777 self.updatebranchcache()
1778 1778 self.hook("changegroup", node=hex(cl.node(clstart)),
1779 1779 source=srctype, url=url)
1780 1780
1781 1781 for i in xrange(clstart, clend):
1782 1782 self.hook("incoming", node=hex(cl.node(i)),
1783 1783 source=srctype, url=url)
1784 1784
1785 1785 # never return 0 here:
1786 1786 if newheads < oldheads:
1787 1787 return newheads - oldheads - 1
1788 1788 else:
1789 1789 return newheads - oldheads + 1
1790 1790
1791 1791
1792 1792 def stream_in(self, remote, requirements):
1793 1793 fp = remote.stream_out()
1794 1794 l = fp.readline()
1795 1795 try:
1796 1796 resp = int(l)
1797 1797 except ValueError:
1798 1798 raise error.ResponseError(
1799 1799 _('Unexpected response from remote server:'), l)
1800 1800 if resp == 1:
1801 1801 raise util.Abort(_('operation forbidden by server'))
1802 1802 elif resp == 2:
1803 1803 raise util.Abort(_('locking the remote repository failed'))
1804 1804 elif resp != 0:
1805 1805 raise util.Abort(_('the server sent an unknown error code'))
1806 1806 self.ui.status(_('streaming all changes\n'))
1807 1807 l = fp.readline()
1808 1808 try:
1809 1809 total_files, total_bytes = map(int, l.split(' ', 1))
1810 1810 except (ValueError, TypeError):
1811 1811 raise error.ResponseError(
1812 1812 _('Unexpected response from remote server:'), l)
1813 1813 self.ui.status(_('%d files to transfer, %s of data\n') %
1814 1814 (total_files, util.bytecount(total_bytes)))
1815 1815 start = time.time()
1816 1816 for i in xrange(total_files):
1817 1817 # XXX doesn't support '\n' or '\r' in filenames
1818 1818 l = fp.readline()
1819 1819 try:
1820 1820 name, size = l.split('\0', 1)
1821 1821 size = int(size)
1822 1822 except (ValueError, TypeError):
1823 1823 raise error.ResponseError(
1824 1824 _('Unexpected response from remote server:'), l)
1825 1825 self.ui.debug('adding %s (%s)\n' % (name, util.bytecount(size)))
1826 1826 # for backwards compat, name was partially encoded
1827 1827 ofp = self.sopener(store.decodedir(name), 'w')
1828 1828 for chunk in util.filechunkiter(fp, limit=size):
1829 1829 ofp.write(chunk)
1830 1830 ofp.close()
1831 1831 elapsed = time.time() - start
1832 1832 if elapsed <= 0:
1833 1833 elapsed = 0.001
1834 1834 self.ui.status(_('transferred %s in %.1f seconds (%s/sec)\n') %
1835 1835 (util.bytecount(total_bytes), elapsed,
1836 1836 util.bytecount(total_bytes / elapsed)))
1837 1837
1838 1838 # new requirements = old non-format requirements + new format-related
1839 1839 # requirements from the streamed-in repository
1840 1840 requirements.update(set(self.requirements) - self.supportedformats)
1841 1841 self._applyrequirements(requirements)
1842 1842 self._writerequirements()
1843 1843
1844 1844 self.invalidate()
1845 1845 return len(self.heads()) + 1
1846 1846
1847 1847 def clone(self, remote, heads=[], stream=False):
1848 1848 '''clone remote repository.
1849 1849
1850 1850 keyword arguments:
1851 1851 heads: list of revs to clone (forces use of pull)
1852 1852 stream: use streaming clone if possible'''
1853 1853
1854 1854 # now, all clients that can request uncompressed clones can
1855 1855 # read repo formats supported by all servers that can serve
1856 1856 # them.
1857 1857
1858 1858 # if revlog format changes, client will have to check version
1859 1859 # and format flags on "stream" capability, and use
1860 1860 # uncompressed only if compatible.
1861 1861
1862 1862 if stream and not heads:
1863 1863 # 'stream' means remote revlog format is revlogv1 only
1864 1864 if remote.capable('stream'):
1865 1865 return self.stream_in(remote, set(('revlogv1',)))
1866 1866 # otherwise, 'streamreqs' contains the remote revlog format
1867 1867 streamreqs = remote.capable('streamreqs')
1868 1868 if streamreqs:
1869 1869 streamreqs = set(streamreqs.split(','))
1870 1870 # if we support it, stream in and adjust our requirements
1871 1871 if not streamreqs - self.supportedformats:
1872 1872 return self.stream_in(remote, streamreqs)
1873 1873 return self.pull(remote, heads)
1874 1874
1875 1875 def pushkey(self, namespace, key, old, new):
1876 1876 return pushkey.push(self, namespace, key, old, new)
1877 1877
1878 1878 def listkeys(self, namespace):
1879 1879 return pushkey.list(self, namespace)
1880 1880
1881 1881 # used to avoid circular references so destructors work
1882 1882 def aftertrans(files):
1883 1883 renamefiles = [tuple(t) for t in files]
1884 1884 def a():
1885 1885 for src, dest in renamefiles:
1886 1886 util.rename(src, dest)
1887 1887 return a
1888 1888
1889 1889 def instance(ui, path, create):
1890 1890 return localrepository(ui, util.drop_scheme('file', path), create)
1891 1891
1892 1892 def islocal(path):
1893 1893 return True
@@ -1,1481 +1,1484 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # amount of data read unconditionally, should be >= 4
42 42 # when not inline: threshold for using lazy index
43 43 _prereadsize = 1048576
44 44 # max size of revlog with inline data
45 45 _maxinline = 131072
46 46
47 47 RevlogError = error.RevlogError
48 48 LookupError = error.LookupError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def compress(text):
83 83 """ generate a possibly-compressed representation of text """
84 84 if not text:
85 85 return ("", text)
86 86 l = len(text)
87 87 bin = None
88 88 if l < 44:
89 89 pass
90 90 elif l > 1000000:
91 91 # zlib makes an internal copy, thus doubling memory usage for
92 92 # large files, so lets do this in pieces
93 93 z = zlib.compressobj()
94 94 p = []
95 95 pos = 0
96 96 while pos < l:
97 97 pos2 = pos + 2**20
98 98 p.append(z.compress(text[pos:pos2]))
99 99 pos = pos2
100 100 p.append(z.flush())
101 101 if sum(map(len, p)) < l:
102 102 bin = "".join(p)
103 103 else:
104 104 bin = _compress(text)
105 105 if bin is None or len(bin) > l:
106 106 if text[0] == '\0':
107 107 return ("", text)
108 108 return ('u', text)
109 109 return ("", bin)
110 110
111 111 def decompress(bin):
112 112 """ decompress the given input """
113 113 if not bin:
114 114 return bin
115 115 t = bin[0]
116 116 if t == '\0':
117 117 return bin
118 118 if t == 'x':
119 119 return _decompress(bin)
120 120 if t == 'u':
121 121 return bin[1:]
122 122 raise RevlogError(_("unknown compression type %r") % t)
123 123
124 124 class lazyparser(object):
125 125 """
126 126 this class avoids the need to parse the entirety of large indices
127 127 """
128 128
129 129 # lazyparser is not safe to use on windows if win32 extensions not
130 130 # available. it keeps file handle open, which make it not possible
131 131 # to break hardlinks on local cloned repos.
132 132
133 133 def __init__(self, dataf):
134 134 try:
135 135 size = util.fstat(dataf).st_size
136 136 except AttributeError:
137 137 size = 0
138 138 self.dataf = dataf
139 139 self.s = struct.calcsize(indexformatng)
140 140 self.datasize = size
141 141 self.l = size // self.s
142 142 self.index = [None] * self.l
143 143 self.map = {nullid: nullrev}
144 144 self.allmap = 0
145 145 self.all = 0
146 146 self.mapfind_count = 0
147 147
148 148 def loadmap(self):
149 149 """
150 150 during a commit, we need to make sure the rev being added is
151 151 not a duplicate. This requires loading the entire index,
152 152 which is fairly slow. loadmap can load up just the node map,
153 153 which takes much less time.
154 154 """
155 155 if self.allmap:
156 156 return
157 157 end = self.datasize
158 158 self.allmap = 1
159 159 cur = 0
160 160 count = 0
161 161 blocksize = self.s * 256
162 162 self.dataf.seek(0)
163 163 while cur < end:
164 164 data = self.dataf.read(blocksize)
165 165 off = 0
166 166 for x in xrange(256):
167 167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 168 self.map[n] = count
169 169 count += 1
170 170 if count >= self.l:
171 171 break
172 172 off += self.s
173 173 cur += blocksize
174 174
175 175 def loadblock(self, blockstart, blocksize, data=None):
176 176 if self.all:
177 177 return
178 178 if data is None:
179 179 self.dataf.seek(blockstart)
180 180 if blockstart + blocksize > self.datasize:
181 181 # the revlog may have grown since we've started running,
182 182 # but we don't have space in self.index for more entries.
183 183 # limit blocksize so that we don't get too much data.
184 184 blocksize = max(self.datasize - blockstart, 0)
185 185 data = self.dataf.read(blocksize)
186 186 lend = len(data) // self.s
187 187 i = blockstart // self.s
188 188 off = 0
189 189 # lazyindex supports __delitem__
190 190 if lend > len(self.index) - i:
191 191 lend = len(self.index) - i
192 192 for x in xrange(lend):
193 193 if self.index[i + x] is None:
194 194 b = data[off : off + self.s]
195 195 self.index[i + x] = b
196 196 n = b[ngshaoffset:ngshaoffset + 20]
197 197 self.map[n] = i + x
198 198 off += self.s
199 199
200 200 def findnode(self, node):
201 201 """search backwards through the index file for a specific node"""
202 202 if self.allmap:
203 203 return None
204 204
205 205 # hg log will cause many many searches for the manifest
206 206 # nodes. After we get called a few times, just load the whole
207 207 # thing.
208 208 if self.mapfind_count > 8:
209 209 self.loadmap()
210 210 if node in self.map:
211 211 return node
212 212 return None
213 213 self.mapfind_count += 1
214 214 last = self.l - 1
215 215 while self.index[last] != None:
216 216 if last == 0:
217 217 self.all = 1
218 218 self.allmap = 1
219 219 return None
220 220 last -= 1
221 221 end = (last + 1) * self.s
222 222 blocksize = self.s * 256
223 223 while end >= 0:
224 224 start = max(end - blocksize, 0)
225 225 self.dataf.seek(start)
226 226 data = self.dataf.read(end - start)
227 227 findend = end - start
228 228 while True:
229 229 # we're searching backwards, so we have to make sure
230 230 # we don't find a changeset where this node is a parent
231 231 off = data.find(node, 0, findend)
232 232 findend = off
233 233 if off >= 0:
234 234 i = off / self.s
235 235 off = i * self.s
236 236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 237 if n == node:
238 238 self.map[n] = i + start / self.s
239 239 return node
240 240 else:
241 241 break
242 242 end -= blocksize
243 243 return None
244 244
245 245 def loadindex(self, i=None, end=None):
246 246 if self.all:
247 247 return
248 248 all = False
249 249 if i is None:
250 250 blockstart = 0
251 251 blocksize = (65536 / self.s) * self.s
252 252 end = self.datasize
253 253 all = True
254 254 else:
255 255 if end:
256 256 blockstart = i * self.s
257 257 end = end * self.s
258 258 blocksize = end - blockstart
259 259 else:
260 260 blockstart = (i & ~1023) * self.s
261 261 blocksize = self.s * 1024
262 262 end = blockstart + blocksize
263 263 while blockstart < end:
264 264 self.loadblock(blockstart, blocksize)
265 265 blockstart += blocksize
266 266 if all:
267 267 self.all = True
268 268
269 269 class lazyindex(object):
270 270 """a lazy version of the index array"""
271 271 def __init__(self, parser):
272 272 self.p = parser
273 273 def __len__(self):
274 274 return len(self.p.index)
275 275 def load(self, pos):
276 276 if pos < 0:
277 277 pos += len(self.p.index)
278 278 self.p.loadindex(pos)
279 279 return self.p.index[pos]
280 280 def __getitem__(self, pos):
281 281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 282 def __setitem__(self, pos, item):
283 283 self.p.index[pos] = _pack(indexformatng, *item)
284 284 def __delitem__(self, pos):
285 285 del self.p.index[pos]
286 286 def insert(self, pos, e):
287 287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 288 def append(self, e):
289 289 self.p.index.append(_pack(indexformatng, *e))
290 290
291 291 class lazymap(object):
292 292 """a lazy version of the node map"""
293 293 def __init__(self, parser):
294 294 self.p = parser
295 295 def load(self, key):
296 296 n = self.p.findnode(key)
297 297 if n is None:
298 298 raise KeyError(key)
299 299 def __contains__(self, key):
300 300 if key in self.p.map:
301 301 return True
302 302 self.p.loadmap()
303 303 return key in self.p.map
304 304 def __iter__(self):
305 305 yield nullid
306 306 for i, ret in enumerate(self.p.index):
307 307 if not ret:
308 308 self.p.loadindex(i)
309 309 ret = self.p.index[i]
310 310 if isinstance(ret, str):
311 311 ret = _unpack(indexformatng, ret)
312 312 yield ret[7]
313 313 def __getitem__(self, key):
314 314 try:
315 315 return self.p.map[key]
316 316 except KeyError:
317 317 try:
318 318 self.load(key)
319 319 return self.p.map[key]
320 320 except KeyError:
321 321 raise KeyError("node " + hex(key))
322 322 def __setitem__(self, key, val):
323 323 self.p.map[key] = val
324 324 def __delitem__(self, key):
325 325 del self.p.map[key]
326 326
327 327 indexformatv0 = ">4l20s20s20s"
328 328 v0shaoffset = 56
329 329
330 330 class revlogoldio(object):
331 331 def __init__(self):
332 332 self.size = struct.calcsize(indexformatv0)
333 333
334 334 def parseindex(self, fp, data, inline):
335 335 s = self.size
336 336 index = []
337 337 nodemap = {nullid: nullrev}
338 338 n = off = 0
339 339 if len(data) == _prereadsize:
340 340 data += fp.read() # read the rest
341 341 l = len(data)
342 342 while off + s <= l:
343 343 cur = data[off:off + s]
344 344 off += s
345 345 e = _unpack(indexformatv0, cur)
346 346 # transform to revlogv1 format
347 347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 349 index.append(e2)
350 350 nodemap[e[6]] = n
351 351 n += 1
352 352
353 353 return index, nodemap, None
354 354
355 355 def packentry(self, entry, node, version, rev):
356 356 if gettype(entry[0]):
357 357 raise RevlogError(_("index entry flags need RevlogNG"))
358 358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 359 node(entry[5]), node(entry[6]), entry[7])
360 360 return _pack(indexformatv0, *e2)
361 361
362 362 # index ng:
363 363 # 6 bytes: offset
364 364 # 2 bytes: flags
365 365 # 4 bytes: compressed length
366 366 # 4 bytes: uncompressed length
367 367 # 4 bytes: base rev
368 368 # 4 bytes: link rev
369 369 # 4 bytes: parent 1 rev
370 370 # 4 bytes: parent 2 rev
371 371 # 32 bytes: nodeid
372 372 indexformatng = ">Qiiiiii20s12x"
373 373 ngshaoffset = 32
374 374 versionformat = ">I"
375 375
376 376 class revlogio(object):
377 377 def __init__(self):
378 378 self.size = struct.calcsize(indexformatng)
379 379
380 380 def parseindex(self, fp, data, inline):
381 381 if len(data) == _prereadsize:
382 382 if util.openhardlinks() and not inline:
383 383 # big index, let's parse it on demand
384 384 parser = lazyparser(fp)
385 385 index = lazyindex(parser)
386 386 nodemap = lazymap(parser)
387 387 e = list(index[0])
388 388 type = gettype(e[0])
389 389 e[0] = offset_type(0, type)
390 390 index[0] = e
391 391 return index, nodemap, None
392 392 else:
393 393 data += fp.read()
394 394
395 395 # call the C implementation to parse the index data
396 396 index, nodemap, cache = parsers.parse_index(data, inline)
397 397 return index, nodemap, cache
398 398
399 399 def packentry(self, entry, node, version, rev):
400 400 p = _pack(indexformatng, *entry)
401 401 if rev == 0:
402 402 p = _pack(versionformat, version) + p[4:]
403 403 return p
404 404
405 405 class revlog(object):
406 406 """
407 407 the underlying revision storage object
408 408
409 409 A revlog consists of two parts, an index and the revision data.
410 410
411 411 The index is a file with a fixed record size containing
412 412 information on each revision, including its nodeid (hash), the
413 413 nodeids of its parents, the position and offset of its data within
414 414 the data file, and the revision it's based on. Finally, each entry
415 415 contains a linkrev entry that can serve as a pointer to external
416 416 data.
417 417
418 418 The revision data itself is a linear collection of data chunks.
419 419 Each chunk represents a revision and is usually represented as a
420 420 delta against the previous chunk. To bound lookup time, runs of
421 421 deltas are limited to about 2 times the length of the original
422 422 version data. This makes retrieval of a version proportional to
423 423 its size, or O(1) relative to the number of revisions.
424 424
425 425 Both pieces of the revlog are written to in an append-only
426 426 fashion, which means we never need to rewrite a file to insert or
427 427 remove data, and can use some simple techniques to avoid the need
428 428 for locking while reading.
429 429 """
430 430 def __init__(self, opener, indexfile, shallowroot=None):
431 431 """
432 432 create a revlog object
433 433
434 434 opener is a function that abstracts the file opening operation
435 435 and can be used to implement COW semantics or the like.
436 436 """
437 437 self.indexfile = indexfile
438 438 self.datafile = indexfile[:-2] + ".d"
439 439 self.opener = opener
440 440 self._cache = None
441 441 self._chunkcache = (0, '')
442 442 self.nodemap = {nullid: nullrev}
443 443 self.index = []
444 444 self._shallowroot = shallowroot
445 445 self._parentdelta = 0
446 446
447 447 v = REVLOG_DEFAULT_VERSION
448 448 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 449 v = opener.options['defversion']
450 450 if v & REVLOGNG:
451 451 v |= REVLOGNGINLINEDATA
452 452 if v & REVLOGNG and 'parentdelta' in opener.options:
453 453 self._parentdelta = 1
454 454
455 455 if shallowroot:
456 456 v |= REVLOGSHALLOW
457 457
458 458 i = ''
459 459 try:
460 460 f = self.opener(self.indexfile)
461 461 if "nonlazy" in getattr(self.opener, 'options', {}):
462 462 i = f.read()
463 463 else:
464 464 i = f.read(_prereadsize)
465 465 if len(i) > 0:
466 466 v = struct.unpack(versionformat, i[:4])[0]
467 467 except IOError, inst:
468 468 if inst.errno != errno.ENOENT:
469 469 raise
470 470
471 471 self.version = v
472 472 self._inline = v & REVLOGNGINLINEDATA
473 473 self._shallow = v & REVLOGSHALLOW
474 474 flags = v & ~0xFFFF
475 475 fmt = v & 0xFFFF
476 476 if fmt == REVLOGV0 and flags:
477 477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 478 % (self.indexfile, flags >> 16))
479 479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 481 % (self.indexfile, flags >> 16))
482 482 elif fmt > REVLOGNG:
483 483 raise RevlogError(_("index %s unknown format %d")
484 484 % (self.indexfile, fmt))
485 485
486 486 self._io = revlogio()
487 487 if self.version == REVLOGV0:
488 488 self._io = revlogoldio()
489 489 if i:
490 490 try:
491 491 d = self._io.parseindex(f, i, self._inline)
492 492 except (ValueError, IndexError):
493 493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 494 self.index, self.nodemap, self._chunkcache = d
495 495 if not self._chunkcache:
496 496 self._chunkclear()
497 497
498 498 # add the magic null revision at -1 (if it hasn't been done already)
499 499 if (self.index == [] or isinstance(self.index, lazyindex) or
500 500 self.index[-1][7] != nullid) :
501 501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502 502
503 503 def _loadindex(self, start, end):
504 504 """load a block of indexes all at once from the lazy parser"""
505 505 if isinstance(self.index, lazyindex):
506 506 self.index.p.loadindex(start, end)
507 507
508 508 def _loadindexmap(self):
509 509 """loads both the map and the index from the lazy parser"""
510 510 if isinstance(self.index, lazyindex):
511 511 p = self.index.p
512 512 p.loadindex()
513 513 self.nodemap = p.map
514 514
515 515 def _loadmap(self):
516 516 """loads the map from the lazy parser"""
517 517 if isinstance(self.nodemap, lazymap):
518 518 self.nodemap.p.loadmap()
519 519 self.nodemap = self.nodemap.p.map
520 520
521 521 def tip(self):
522 522 return self.node(len(self.index) - 2)
523 523 def __len__(self):
524 524 return len(self.index) - 1
525 525 def __iter__(self):
526 526 for i in xrange(len(self)):
527 527 yield i
528 528 def rev(self, node):
529 529 try:
530 530 return self.nodemap[node]
531 531 except KeyError:
532 532 raise LookupError(node, self.indexfile, _('no node'))
533 533 def node(self, rev):
534 534 return self.index[rev][7]
535 535 def linkrev(self, rev):
536 536 return self.index[rev][4]
537 537 def parents(self, node):
538 538 i = self.index
539 539 d = i[self.rev(node)]
540 540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 541 def parentrevs(self, rev):
542 542 return self.index[rev][5:7]
543 543 def start(self, rev):
544 544 return int(self.index[rev][0] >> 16)
545 545 def end(self, rev):
546 546 return self.start(rev) + self.length(rev)
547 547 def length(self, rev):
548 548 return self.index[rev][1]
549 549 def base(self, rev):
550 550 return self.index[rev][3]
551 551 def flags(self, rev):
552 552 return self.index[rev][0] & 0xFFFF
553 553 def rawsize(self, rev):
554 554 """return the length of the uncompressed text for a given revision"""
555 555 l = self.index[rev][2]
556 556 if l >= 0:
557 557 return l
558 558
559 559 t = self.revision(self.node(rev))
560 560 return len(t)
561 561 size = rawsize
562 562
563 563 def reachable(self, node, stop=None):
564 564 """return the set of all nodes ancestral to a given node, including
565 565 the node itself, stopping when stop is matched"""
566 566 reachable = set((node,))
567 567 visit = [node]
568 568 if stop:
569 569 stopn = self.rev(stop)
570 570 else:
571 571 stopn = 0
572 572 while visit:
573 573 n = visit.pop(0)
574 574 if n == stop:
575 575 continue
576 576 if n == nullid:
577 577 continue
578 578 for p in self.parents(n):
579 579 if self.rev(p) < stopn:
580 580 continue
581 581 if p not in reachable:
582 582 reachable.add(p)
583 583 visit.append(p)
584 584 return reachable
585 585
586 586 def ancestors(self, *revs):
587 587 """Generate the ancestors of 'revs' in reverse topological order.
588 588
589 589 Yield a sequence of revision numbers starting with the parents
590 590 of each revision in revs, i.e., each revision is *not* considered
591 591 an ancestor of itself. Results are in breadth-first order:
592 592 parents of each rev in revs, then parents of those, etc. Result
593 593 does not include the null revision."""
594 594 visit = list(revs)
595 595 seen = set([nullrev])
596 596 while visit:
597 597 for parent in self.parentrevs(visit.pop(0)):
598 598 if parent not in seen:
599 599 visit.append(parent)
600 600 seen.add(parent)
601 601 yield parent
602 602
603 603 def descendants(self, *revs):
604 604 """Generate the descendants of 'revs' in revision order.
605 605
606 606 Yield a sequence of revision numbers starting with a child of
607 607 some rev in revs, i.e., each revision is *not* considered a
608 608 descendant of itself. Results are ordered by revision number (a
609 609 topological sort)."""
610 610 seen = set(revs)
611 611 for i in xrange(min(revs) + 1, len(self)):
612 612 for x in self.parentrevs(i):
613 613 if x != nullrev and x in seen:
614 614 seen.add(i)
615 615 yield i
616 616 break
617 617
618 618 def findmissing(self, common=None, heads=None):
619 619 """Return the ancestors of heads that are not ancestors of common.
620 620
621 621 More specifically, return a list of nodes N such that every N
622 622 satisfies the following constraints:
623 623
624 624 1. N is an ancestor of some node in 'heads'
625 625 2. N is not an ancestor of any node in 'common'
626 626
627 627 The list is sorted by revision number, meaning it is
628 628 topologically sorted.
629 629
630 630 'heads' and 'common' are both lists of node IDs. If heads is
631 631 not supplied, uses all of the revlog's heads. If common is not
632 632 supplied, uses nullid."""
633 633 if common is None:
634 634 common = [nullid]
635 635 if heads is None:
636 636 heads = self.heads()
637 637
638 638 common = [self.rev(n) for n in common]
639 639 heads = [self.rev(n) for n in heads]
640 640
641 641 # we want the ancestors, but inclusive
642 642 has = set(self.ancestors(*common))
643 643 has.add(nullrev)
644 644 has.update(common)
645 645
646 646 # take all ancestors from heads that aren't in has
647 647 missing = set()
648 648 visit = [r for r in heads if r not in has]
649 649 while visit:
650 650 r = visit.pop(0)
651 651 if r in missing:
652 652 continue
653 653 else:
654 654 missing.add(r)
655 655 for p in self.parentrevs(r):
656 656 if p not in has:
657 657 visit.append(p)
658 658 missing = list(missing)
659 659 missing.sort()
660 660 return [self.node(r) for r in missing]
661 661
662 662 def nodesbetween(self, roots=None, heads=None):
663 663 """Return a topological path from 'roots' to 'heads'.
664 664
665 665 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
666 666 topologically sorted list of all nodes N that satisfy both of
667 667 these constraints:
668 668
669 669 1. N is a descendant of some node in 'roots'
670 670 2. N is an ancestor of some node in 'heads'
671 671
672 672 Every node is considered to be both a descendant and an ancestor
673 673 of itself, so every reachable node in 'roots' and 'heads' will be
674 674 included in 'nodes'.
675 675
676 676 'outroots' is the list of reachable nodes in 'roots', i.e., the
677 677 subset of 'roots' that is returned in 'nodes'. Likewise,
678 678 'outheads' is the subset of 'heads' that is also in 'nodes'.
679 679
680 680 'roots' and 'heads' are both lists of node IDs. If 'roots' is
681 681 unspecified, uses nullid as the only root. If 'heads' is
682 682 unspecified, uses list of all of the revlog's heads."""
683 683 nonodes = ([], [], [])
684 684 if roots is not None:
685 685 roots = list(roots)
686 686 if not roots:
687 687 return nonodes
688 688 lowestrev = min([self.rev(n) for n in roots])
689 689 else:
690 690 roots = [nullid] # Everybody's a descendent of nullid
691 691 lowestrev = nullrev
692 692 if (lowestrev == nullrev) and (heads is None):
693 693 # We want _all_ the nodes!
694 694 return ([self.node(r) for r in self], [nullid], list(self.heads()))
695 695 if heads is None:
696 696 # All nodes are ancestors, so the latest ancestor is the last
697 697 # node.
698 698 highestrev = len(self) - 1
699 699 # Set ancestors to None to signal that every node is an ancestor.
700 700 ancestors = None
701 701 # Set heads to an empty dictionary for later discovery of heads
702 702 heads = {}
703 703 else:
704 704 heads = list(heads)
705 705 if not heads:
706 706 return nonodes
707 707 ancestors = set()
708 708 # Turn heads into a dictionary so we can remove 'fake' heads.
709 709 # Also, later we will be using it to filter out the heads we can't
710 710 # find from roots.
711 711 heads = dict.fromkeys(heads, 0)
712 712 # Start at the top and keep marking parents until we're done.
713 713 nodestotag = set(heads)
714 714 # Remember where the top was so we can use it as a limit later.
715 715 highestrev = max([self.rev(n) for n in nodestotag])
716 716 while nodestotag:
717 717 # grab a node to tag
718 718 n = nodestotag.pop()
719 719 # Never tag nullid
720 720 if n == nullid:
721 721 continue
722 722 # A node's revision number represents its place in a
723 723 # topologically sorted list of nodes.
724 724 r = self.rev(n)
725 725 if r >= lowestrev:
726 726 if n not in ancestors:
727 727 # If we are possibly a descendent of one of the roots
728 728 # and we haven't already been marked as an ancestor
729 729 ancestors.add(n) # Mark as ancestor
730 730 # Add non-nullid parents to list of nodes to tag.
731 731 nodestotag.update([p for p in self.parents(n) if
732 732 p != nullid])
733 733 elif n in heads: # We've seen it before, is it a fake head?
734 734 # So it is, real heads should not be the ancestors of
735 735 # any other heads.
736 736 heads.pop(n)
737 737 if not ancestors:
738 738 return nonodes
739 739 # Now that we have our set of ancestors, we want to remove any
740 740 # roots that are not ancestors.
741 741
742 742 # If one of the roots was nullid, everything is included anyway.
743 743 if lowestrev > nullrev:
744 744 # But, since we weren't, let's recompute the lowest rev to not
745 745 # include roots that aren't ancestors.
746 746
747 747 # Filter out roots that aren't ancestors of heads
748 748 roots = [n for n in roots if n in ancestors]
749 749 # Recompute the lowest revision
750 750 if roots:
751 751 lowestrev = min([self.rev(n) for n in roots])
752 752 else:
753 753 # No more roots? Return empty list
754 754 return nonodes
755 755 else:
756 756 # We are descending from nullid, and don't need to care about
757 757 # any other roots.
758 758 lowestrev = nullrev
759 759 roots = [nullid]
760 760 # Transform our roots list into a set.
761 761 descendents = set(roots)
762 762 # Also, keep the original roots so we can filter out roots that aren't
763 763 # 'real' roots (i.e. are descended from other roots).
764 764 roots = descendents.copy()
765 765 # Our topologically sorted list of output nodes.
766 766 orderedout = []
767 767 # Don't start at nullid since we don't want nullid in our output list,
768 768 # and if nullid shows up in descedents, empty parents will look like
769 769 # they're descendents.
770 770 for r in xrange(max(lowestrev, 0), highestrev + 1):
771 771 n = self.node(r)
772 772 isdescendent = False
773 773 if lowestrev == nullrev: # Everybody is a descendent of nullid
774 774 isdescendent = True
775 775 elif n in descendents:
776 776 # n is already a descendent
777 777 isdescendent = True
778 778 # This check only needs to be done here because all the roots
779 779 # will start being marked is descendents before the loop.
780 780 if n in roots:
781 781 # If n was a root, check if it's a 'real' root.
782 782 p = tuple(self.parents(n))
783 783 # If any of its parents are descendents, it's not a root.
784 784 if (p[0] in descendents) or (p[1] in descendents):
785 785 roots.remove(n)
786 786 else:
787 787 p = tuple(self.parents(n))
788 788 # A node is a descendent if either of its parents are
789 789 # descendents. (We seeded the dependents list with the roots
790 790 # up there, remember?)
791 791 if (p[0] in descendents) or (p[1] in descendents):
792 792 descendents.add(n)
793 793 isdescendent = True
794 794 if isdescendent and ((ancestors is None) or (n in ancestors)):
795 795 # Only include nodes that are both descendents and ancestors.
796 796 orderedout.append(n)
797 797 if (ancestors is not None) and (n in heads):
798 798 # We're trying to figure out which heads are reachable
799 799 # from roots.
800 800 # Mark this head as having been reached
801 801 heads[n] = 1
802 802 elif ancestors is None:
803 803 # Otherwise, we're trying to discover the heads.
804 804 # Assume this is a head because if it isn't, the next step
805 805 # will eventually remove it.
806 806 heads[n] = 1
807 807 # But, obviously its parents aren't.
808 808 for p in self.parents(n):
809 809 heads.pop(p, None)
810 810 heads = [n for n in heads.iterkeys() if heads[n] != 0]
811 811 roots = list(roots)
812 812 assert orderedout
813 813 assert roots
814 814 assert heads
815 815 return (orderedout, roots, heads)
816 816
817 817 def heads(self, start=None, stop=None):
818 818 """return the list of all nodes that have no children
819 819
820 820 if start is specified, only heads that are descendants of
821 821 start will be returned
822 822 if stop is specified, it will consider all the revs from stop
823 823 as if they had no children
824 824 """
825 825 if start is None and stop is None:
826 826 count = len(self)
827 827 if not count:
828 828 return [nullid]
829 829 ishead = [1] * (count + 1)
830 830 index = self.index
831 831 for r in xrange(count):
832 832 e = index[r]
833 833 ishead[e[5]] = ishead[e[6]] = 0
834 834 return [self.node(r) for r in xrange(count) if ishead[r]]
835 835
836 836 if start is None:
837 837 start = nullid
838 838 if stop is None:
839 839 stop = []
840 840 stoprevs = set([self.rev(n) for n in stop])
841 841 startrev = self.rev(start)
842 842 reachable = set((startrev,))
843 843 heads = set((startrev,))
844 844
845 845 parentrevs = self.parentrevs
846 846 for r in xrange(startrev + 1, len(self)):
847 847 for p in parentrevs(r):
848 848 if p in reachable:
849 849 if r not in stoprevs:
850 850 reachable.add(r)
851 851 heads.add(r)
852 852 if p in heads and p not in stoprevs:
853 853 heads.remove(p)
854 854
855 855 return [self.node(r) for r in heads]
856 856
857 857 def children(self, node):
858 858 """find the children of a given node"""
859 859 c = []
860 860 p = self.rev(node)
861 861 for r in range(p + 1, len(self)):
862 862 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
863 863 if prevs:
864 864 for pr in prevs:
865 865 if pr == p:
866 866 c.append(self.node(r))
867 867 elif p == nullrev:
868 868 c.append(self.node(r))
869 869 return c
870 870
871 871 def descendant(self, start, end):
872 872 for i in self.descendants(start):
873 873 if i == end:
874 874 return True
875 875 elif i > end:
876 876 break
877 877 return False
878 878
879 879 def ancestor(self, a, b):
880 880 """calculate the least common ancestor of nodes a and b"""
881 881
882 882 # fast path, check if it is a descendant
883 883 a, b = self.rev(a), self.rev(b)
884 884 start, end = sorted((a, b))
885 885 if self.descendant(start, end):
886 886 return self.node(start)
887 887
888 888 def parents(rev):
889 889 return [p for p in self.parentrevs(rev) if p != nullrev]
890 890
891 891 c = ancestor.ancestor(a, b, parents)
892 892 if c is None:
893 893 return nullid
894 894
895 895 return self.node(c)
896 896
897 897 def _match(self, id):
898 898 if isinstance(id, (long, int)):
899 899 # rev
900 900 return self.node(id)
901 901 if len(id) == 20:
902 902 # possibly a binary node
903 903 # odds of a binary node being all hex in ASCII are 1 in 10**25
904 904 try:
905 905 node = id
906 906 self.rev(node) # quick search the index
907 907 return node
908 908 except LookupError:
909 909 pass # may be partial hex id
910 910 try:
911 911 # str(rev)
912 912 rev = int(id)
913 913 if str(rev) != id:
914 914 raise ValueError
915 915 if rev < 0:
916 916 rev = len(self) + rev
917 917 if rev < 0 or rev >= len(self):
918 918 raise ValueError
919 919 return self.node(rev)
920 920 except (ValueError, OverflowError):
921 921 pass
922 922 if len(id) == 40:
923 923 try:
924 924 # a full hex nodeid?
925 925 node = bin(id)
926 926 self.rev(node)
927 927 return node
928 928 except (TypeError, LookupError):
929 929 pass
930 930
931 931 def _partialmatch(self, id):
932 932 if len(id) < 40:
933 933 try:
934 934 # hex(node)[:...]
935 935 l = len(id) // 2 # grab an even number of digits
936 936 bin_id = bin(id[:l * 2])
937 937 nl = [n for n in self.nodemap if n[:l] == bin_id]
938 938 nl = [n for n in nl if hex(n).startswith(id)]
939 939 if len(nl) > 0:
940 940 if len(nl) == 1:
941 941 return nl[0]
942 942 raise LookupError(id, self.indexfile,
943 943 _('ambiguous identifier'))
944 944 return None
945 945 except TypeError:
946 946 pass
947 947
948 948 def lookup(self, id):
949 949 """locate a node based on:
950 950 - revision number or str(revision number)
951 951 - nodeid or subset of hex nodeid
952 952 """
953 953 n = self._match(id)
954 954 if n is not None:
955 955 return n
956 956 n = self._partialmatch(id)
957 957 if n:
958 958 return n
959 959
960 960 raise LookupError(id, self.indexfile, _('no match found'))
961 961
962 962 def cmp(self, node, text):
963 963 """compare text with a given file revision
964 964
965 965 returns True if text is different than what is stored.
966 966 """
967 967 p1, p2 = self.parents(node)
968 968 return hash(text, p1, p2) != node
969 969
970 970 def _addchunk(self, offset, data):
971 971 o, d = self._chunkcache
972 972 # try to add to existing cache
973 973 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
974 974 self._chunkcache = o, d + data
975 975 else:
976 976 self._chunkcache = offset, data
977 977
978 978 def _loadchunk(self, offset, length):
979 979 if self._inline:
980 980 df = self.opener(self.indexfile)
981 981 else:
982 982 df = self.opener(self.datafile)
983 983
984 984 readahead = max(65536, length)
985 985 df.seek(offset)
986 986 d = df.read(readahead)
987 987 self._addchunk(offset, d)
988 988 if readahead > length:
989 989 return d[:length]
990 990 return d
991 991
992 992 def _getchunk(self, offset, length):
993 993 o, d = self._chunkcache
994 994 l = len(d)
995 995
996 996 # is it in the cache?
997 997 cachestart = offset - o
998 998 cacheend = cachestart + length
999 999 if cachestart >= 0 and cacheend <= l:
1000 1000 if cachestart == 0 and cacheend == l:
1001 1001 return d # avoid a copy
1002 1002 return d[cachestart:cacheend]
1003 1003
1004 1004 return self._loadchunk(offset, length)
1005 1005
1006 1006 def _chunkraw(self, startrev, endrev):
1007 1007 start = self.start(startrev)
1008 1008 length = self.end(endrev) - start
1009 1009 if self._inline:
1010 1010 start += (startrev + 1) * self._io.size
1011 1011 return self._getchunk(start, length)
1012 1012
1013 1013 def _chunk(self, rev):
1014 1014 return decompress(self._chunkraw(rev, rev))
1015 1015
1016 1016 def _chunkclear(self):
1017 1017 self._chunkcache = (0, '')
1018 1018
1019 1019 def deltaparent(self, rev):
1020 1020 """return previous revision or parentrev according to flags"""
1021 1021 if self.flags(rev) & REVIDX_PARENTDELTA:
1022 1022 return self.parentrevs(rev)[0]
1023 1023 else:
1024 1024 return rev - 1
1025 1025
1026 1026 def revdiff(self, rev1, rev2):
1027 1027 """return or calculate a delta between two revisions"""
1028 1028 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
1029 1029 return self._chunk(rev2)
1030 1030
1031 1031 return mdiff.textdiff(self.revision(self.node(rev1)),
1032 1032 self.revision(self.node(rev2)))
1033 1033
1034 1034 def revision(self, node):
1035 1035 """return an uncompressed revision of a given node"""
1036 1036 cachedrev = None
1037 1037 if node == nullid:
1038 1038 return ""
1039 1039 if self._cache:
1040 1040 if self._cache[0] == node:
1041 1041 return self._cache[2]
1042 1042 cachedrev = self._cache[1]
1043 1043
1044 1044 # look up what we need to read
1045 1045 text = None
1046 1046 rev = self.rev(node)
1047 1047 base = self.base(rev)
1048 1048
1049 1049 # check rev flags
1050 1050 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1051 1051 raise RevlogError(_('incompatible revision flag %x') %
1052 1052 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1053 1053
1054 1054 # build delta chain
1055 1055 self._loadindex(base, rev + 1)
1056 1056 chain = []
1057 1057 index = self.index # for performance
1058 1058 iterrev = rev
1059 1059 e = index[iterrev]
1060 1060 while iterrev != base and iterrev != cachedrev:
1061 1061 chain.append(iterrev)
1062 1062 if e[0] & REVIDX_PARENTDELTA:
1063 1063 iterrev = e[5]
1064 1064 else:
1065 1065 iterrev -= 1
1066 1066 e = index[iterrev]
1067 1067 chain.reverse()
1068 1068 base = iterrev
1069 1069
1070 1070 if iterrev == cachedrev:
1071 1071 # cache hit
1072 1072 text = self._cache[2]
1073 1073
1074 1074 # drop cache to save memory
1075 1075 self._cache = None
1076 1076
1077 1077 self._chunkraw(base, rev)
1078 1078 if text is None:
1079 1079 text = self._chunk(base)
1080 1080
1081 1081 bins = [self._chunk(r) for r in chain]
1082 1082 text = mdiff.patches(text, bins)
1083 1083 p1, p2 = self.parents(node)
1084 1084 if (node != hash(text, p1, p2) and
1085 1085 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1086 1086 raise RevlogError(_("integrity check failed on %s:%d")
1087 1087 % (self.indexfile, rev))
1088 1088
1089 1089 self._cache = (node, rev, text)
1090 1090 return text
1091 1091
1092 1092 def checkinlinesize(self, tr, fp=None):
1093 1093 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1094 1094 return
1095 1095
1096 1096 trinfo = tr.find(self.indexfile)
1097 1097 if trinfo is None:
1098 1098 raise RevlogError(_("%s not found in the transaction")
1099 1099 % self.indexfile)
1100 1100
1101 1101 trindex = trinfo[2]
1102 1102 dataoff = self.start(trindex)
1103 1103
1104 1104 tr.add(self.datafile, dataoff)
1105 1105
1106 1106 if fp:
1107 1107 fp.flush()
1108 1108 fp.close()
1109 1109
1110 1110 df = self.opener(self.datafile, 'w')
1111 1111 try:
1112 1112 for r in self:
1113 1113 df.write(self._chunkraw(r, r))
1114 1114 finally:
1115 1115 df.close()
1116 1116
1117 1117 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1118 1118 self.version &= ~(REVLOGNGINLINEDATA)
1119 1119 self._inline = False
1120 1120 for i in self:
1121 1121 e = self._io.packentry(self.index[i], self.node, self.version, i)
1122 1122 fp.write(e)
1123 1123
1124 1124 # if we don't call rename, the temp file will never replace the
1125 1125 # real index
1126 1126 fp.rename()
1127 1127
1128 1128 tr.replace(self.indexfile, trindex * self._io.size)
1129 1129 self._chunkclear()
1130 1130
1131 1131 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1132 1132 """add a revision to the log
1133 1133
1134 1134 text - the revision data to add
1135 1135 transaction - the transaction object used for rollback
1136 1136 link - the linkrev data to add
1137 1137 p1, p2 - the parent nodeids of the revision
1138 1138 cachedelta - an optional precomputed delta
1139 1139 """
1140 1140 node = hash(text, p1, p2)
1141 1141 if (node in self.nodemap and
1142 1142 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1143 1143 return node
1144 1144
1145 1145 dfh = None
1146 1146 if not self._inline:
1147 1147 dfh = self.opener(self.datafile, "a")
1148 1148 ifh = self.opener(self.indexfile, "a+")
1149 1149 try:
1150 1150 return self._addrevision(node, text, transaction, link, p1, p2,
1151 1151 cachedelta, ifh, dfh)
1152 1152 finally:
1153 1153 if dfh:
1154 1154 dfh.close()
1155 1155 ifh.close()
1156 1156
1157 1157 def _addrevision(self, node, text, transaction, link, p1, p2,
1158 1158 cachedelta, ifh, dfh):
1159 1159 curr = len(self)
1160 1160 prev = curr - 1
1161 1161 base = curr
1162 1162 offset = self.end(prev)
1163 1163 flags = 0
1164 1164 d = None
1165 1165
1166 1166 if self._parentdelta:
1167 1167 deltarev, deltanode = self.rev(p1), p1
1168 1168 flags = REVIDX_PARENTDELTA
1169 1169 else:
1170 1170 deltarev, deltanode = prev, self.node(prev)
1171 1171
1172 1172 # should we try to build a delta?
1173 1173 if deltarev != nullrev:
1174 1174 # can we use the cached delta?
1175 1175 if cachedelta:
1176 1176 cacherev, d = cachedelta
1177 1177 if cacherev != deltarev:
1178 1178 d = None
1179 1179 if d is None:
1180 1180 ptext = self.revision(deltanode)
1181 1181 d = mdiff.textdiff(ptext, text)
1182 1182 data = compress(d)
1183 1183 l = len(data[1]) + len(data[0])
1184 1184 base = self.base(deltarev)
1185 1185 dist = l + offset - self.start(base)
1186 1186
1187 1187 # full versions are inserted when the needed deltas
1188 1188 # become comparable to the uncompressed text
1189 1189 # or the base revision is punched
1190 1190 if (d is None or dist > len(text) * 2 or
1191 1191 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1192 1192 data = compress(text)
1193 1193 l = len(data[1]) + len(data[0])
1194 1194 base = curr
1195 1195
1196 1196 e = (offset_type(offset, flags), l, len(text),
1197 1197 base, link, self.rev(p1), self.rev(p2), node)
1198 1198 self.index.insert(-1, e)
1199 1199 self.nodemap[node] = curr
1200 1200
1201 1201 entry = self._io.packentry(e, self.node, self.version, curr)
1202 1202 if not self._inline:
1203 1203 transaction.add(self.datafile, offset)
1204 1204 transaction.add(self.indexfile, curr * len(entry))
1205 1205 if data[0]:
1206 1206 dfh.write(data[0])
1207 1207 dfh.write(data[1])
1208 1208 dfh.flush()
1209 1209 ifh.write(entry)
1210 1210 else:
1211 1211 offset += curr * self._io.size
1212 1212 transaction.add(self.indexfile, offset, curr)
1213 1213 ifh.write(entry)
1214 1214 ifh.write(data[0])
1215 1215 ifh.write(data[1])
1216 1216 self.checkinlinesize(transaction, ifh)
1217 1217
1218 1218 if type(text) == str: # only accept immutable objects
1219 1219 self._cache = (node, curr, text)
1220 1220 return node
1221 1221
1222 1222 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1223 1223 """Calculate a delta group, yielding a sequence of changegroup chunks
1224 1224 (strings).
1225 1225
1226 1226 Given a list of changeset revs, return a set of deltas and
1227 1227 metadata corresponding to nodes. The first delta is
1228 1228 first parent(nodelist[0]) -> nodelist[0], the receiver is
1229 1229 guaranteed to have this parent as it has all history before
1230 1230 these changesets. In the case firstparent is nullrev the
1231 1231 changegroup starts with a full revision.
1232 1232 fullrev forces the insertion of the full revision, necessary
1233 1233 in the case of shallow clones where the first parent might
1234 1234 not exist at the reciever.
1235 1235 """
1236 1236
1237 1237 revs = [self.rev(n) for n in nodelist]
1238 1238
1239 1239 # if we don't have any revisions touched by these changesets, bail
1240 1240 if not revs:
1241 1241 yield changegroup.closechunk()
1242 1242 return
1243 1243
1244 1244 # add the parent of the first rev
1245 1245 p = self.parentrevs(revs[0])[0]
1246 1246 revs.insert(0, p)
1247 1247 if p == nullrev:
1248 1248 fullrev = True
1249 1249
1250 1250 # build deltas
1251 1251 for d in xrange(len(revs) - 1):
1252 1252 a, b = revs[d], revs[d + 1]
1253 1253 nb = self.node(b)
1254 1254
1255 1255 if infocollect is not None:
1256 1256 infocollect(nb)
1257 1257
1258 1258 p = self.parents(nb)
1259 1259 meta = nb + p[0] + p[1] + lookup(nb)
1260 1260 if fullrev:
1261 1261 d = self.revision(nb)
1262 1262 meta += mdiff.trivialdiffheader(len(d))
1263 1263 fullrev = False
1264 1264 else:
1265 1265 d = self.revdiff(a, b)
1266 1266 yield changegroup.chunkheader(len(meta) + len(d))
1267 1267 yield meta
1268 1268 yield d
1269 1269
1270 1270 yield changegroup.closechunk()
1271 1271
1272 def addgroup(self, revs, linkmapper, transaction):
1272 def addgroup(self, bundle, linkmapper, transaction):
1273 1273 """
1274 1274 add a delta group
1275 1275
1276 1276 given a set of deltas, add them to the revision log. the
1277 1277 first delta is against its parent, which should be in our
1278 1278 log, the rest are against the previous delta.
1279 1279 """
1280 1280
1281 1281 #track the base of the current delta log
1282 1282 r = len(self)
1283 1283 t = r - 1
1284 1284 node = None
1285 1285
1286 1286 base = prev = nullrev
1287 1287 start = end = textlen = 0
1288 1288 if r:
1289 1289 end = self.end(t)
1290 1290
1291 1291 ifh = self.opener(self.indexfile, "a+")
1292 1292 isize = r * self._io.size
1293 1293 if self._inline:
1294 1294 transaction.add(self.indexfile, end + isize, r)
1295 1295 dfh = None
1296 1296 else:
1297 1297 transaction.add(self.indexfile, isize, r)
1298 1298 transaction.add(self.datafile, end)
1299 1299 dfh = self.opener(self.datafile, "a")
1300 1300
1301 1301 try:
1302 1302 # loop through our set of deltas
1303 1303 chain = None
1304 for chunk in revs:
1304 while 1:
1305 chunk = bundle.chunk()
1306 if not chunk:
1307 break
1305 1308 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1306 1309 link = linkmapper(cs)
1307 1310 if (node in self.nodemap and
1308 1311 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1309 1312 # this can happen if two branches make the same change
1310 1313 chain = node
1311 1314 continue
1312 1315 delta = buffer(chunk, 80)
1313 1316 del chunk
1314 1317
1315 1318 for p in (p1, p2):
1316 1319 if not p in self.nodemap:
1317 1320 if self._shallow:
1318 1321 # add null entries for missing parents
1319 1322 if base == nullrev:
1320 1323 base = len(self)
1321 1324 e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1322 1325 0, 0, base, nullrev, nullrev, nullrev, p)
1323 1326 self.index.insert(-1, e)
1324 1327 self.nodemap[p] = r
1325 1328 entry = self._io.packentry(e, self.node,
1326 1329 self.version, r)
1327 1330 ifh.write(entry)
1328 1331 t, r = r, r + 1
1329 1332 else:
1330 1333 raise LookupError(p, self.indexfile,
1331 1334 _('unknown parent'))
1332 1335
1333 1336 if not chain:
1334 1337 # retrieve the parent revision of the delta chain
1335 1338 chain = p1
1336 1339 if not chain in self.nodemap:
1337 1340 raise LookupError(chain, self.indexfile, _('unknown base'))
1338 1341
1339 1342 # full versions are inserted when the needed deltas become
1340 1343 # comparable to the uncompressed text or when the previous
1341 1344 # version is not the one we have a delta against. We use
1342 1345 # the size of the previous full rev as a proxy for the
1343 1346 # current size.
1344 1347
1345 1348 if chain == prev:
1346 1349 cdelta = compress(delta)
1347 1350 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1348 1351 textlen = mdiff.patchedsize(textlen, delta)
1349 1352
1350 1353 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1351 1354 # flush our writes here so we can read it in revision
1352 1355 if dfh:
1353 1356 dfh.flush()
1354 1357 ifh.flush()
1355 1358 text = self.revision(chain)
1356 1359 text = mdiff.patch(text, delta)
1357 1360 del delta
1358 1361 chk = self._addrevision(node, text, transaction, link,
1359 1362 p1, p2, None, ifh, dfh)
1360 1363 if not dfh and not self._inline:
1361 1364 # addrevision switched from inline to conventional
1362 1365 # reopen the index
1363 1366 dfh = self.opener(self.datafile, "a")
1364 1367 ifh = self.opener(self.indexfile, "a")
1365 1368 if chk != node:
1366 1369 raise RevlogError(_("consistency error adding group"))
1367 1370 textlen = len(text)
1368 1371 else:
1369 1372 e = (offset_type(end, 0), cdeltalen, textlen, base,
1370 1373 link, self.rev(p1), self.rev(p2), node)
1371 1374 self.index.insert(-1, e)
1372 1375 self.nodemap[node] = r
1373 1376 entry = self._io.packentry(e, self.node, self.version, r)
1374 1377 if self._inline:
1375 1378 ifh.write(entry)
1376 1379 ifh.write(cdelta[0])
1377 1380 ifh.write(cdelta[1])
1378 1381 self.checkinlinesize(transaction, ifh)
1379 1382 if not self._inline:
1380 1383 dfh = self.opener(self.datafile, "a")
1381 1384 ifh = self.opener(self.indexfile, "a")
1382 1385 else:
1383 1386 dfh.write(cdelta[0])
1384 1387 dfh.write(cdelta[1])
1385 1388 ifh.write(entry)
1386 1389
1387 1390 t, r, chain, prev = r, r + 1, node, node
1388 1391 base = self.base(t)
1389 1392 start = self.start(base)
1390 1393 end = self.end(t)
1391 1394 finally:
1392 1395 if dfh:
1393 1396 dfh.close()
1394 1397 ifh.close()
1395 1398
1396 1399 return node
1397 1400
1398 1401 def strip(self, minlink, transaction):
1399 1402 """truncate the revlog on the first revision with a linkrev >= minlink
1400 1403
1401 1404 This function is called when we're stripping revision minlink and
1402 1405 its descendants from the repository.
1403 1406
1404 1407 We have to remove all revisions with linkrev >= minlink, because
1405 1408 the equivalent changelog revisions will be renumbered after the
1406 1409 strip.
1407 1410
1408 1411 So we truncate the revlog on the first of these revisions, and
1409 1412 trust that the caller has saved the revisions that shouldn't be
1410 1413 removed and that it'll readd them after this truncation.
1411 1414 """
1412 1415 if len(self) == 0:
1413 1416 return
1414 1417
1415 1418 if isinstance(self.index, lazyindex):
1416 1419 self._loadindexmap()
1417 1420
1418 1421 for rev in self:
1419 1422 if self.index[rev][4] >= minlink:
1420 1423 break
1421 1424 else:
1422 1425 return
1423 1426
1424 1427 # first truncate the files on disk
1425 1428 end = self.start(rev)
1426 1429 if not self._inline:
1427 1430 transaction.add(self.datafile, end)
1428 1431 end = rev * self._io.size
1429 1432 else:
1430 1433 end += rev * self._io.size
1431 1434
1432 1435 transaction.add(self.indexfile, end)
1433 1436
1434 1437 # then reset internal state in memory to forget those revisions
1435 1438 self._cache = None
1436 1439 self._chunkclear()
1437 1440 for x in xrange(rev, len(self)):
1438 1441 del self.nodemap[self.node(x)]
1439 1442
1440 1443 del self.index[rev:-1]
1441 1444
1442 1445 def checksize(self):
1443 1446 expected = 0
1444 1447 if len(self):
1445 1448 expected = max(0, self.end(len(self) - 1))
1446 1449
1447 1450 try:
1448 1451 f = self.opener(self.datafile)
1449 1452 f.seek(0, 2)
1450 1453 actual = f.tell()
1451 1454 dd = actual - expected
1452 1455 except IOError, inst:
1453 1456 if inst.errno != errno.ENOENT:
1454 1457 raise
1455 1458 dd = 0
1456 1459
1457 1460 try:
1458 1461 f = self.opener(self.indexfile)
1459 1462 f.seek(0, 2)
1460 1463 actual = f.tell()
1461 1464 s = self._io.size
1462 1465 i = max(0, actual // s)
1463 1466 di = actual - (i * s)
1464 1467 if self._inline:
1465 1468 databytes = 0
1466 1469 for r in self:
1467 1470 databytes += max(0, self.length(r))
1468 1471 dd = 0
1469 1472 di = actual - len(self) * s - databytes
1470 1473 except IOError, inst:
1471 1474 if inst.errno != errno.ENOENT:
1472 1475 raise
1473 1476 di = 0
1474 1477
1475 1478 return (dd, di)
1476 1479
1477 1480 def files(self):
1478 1481 res = [self.indexfile]
1479 1482 if not self._inline:
1480 1483 res.append(self.datafile)
1481 1484 return res
General Comments 0
You need to be logged in to leave comments. Login now