##// END OF EJS Templates
changegroup: combine infocollect and lookup callbacks
Matt Mackall -
r13782:9131724c default
parent child Browse files
Show More
@@ -1,287 +1,290 b''
1 1 """\
2 2 reorder a revlog (the manifest by default) to save space
3 3
4 4 Specifically, this topologically sorts the revisions in the revlog so that
5 5 revisions on the same branch are adjacent as much as possible. This is a
6 6 workaround for the fact that Mercurial computes deltas relative to the
7 7 previous revision rather than relative to a parent revision.
8 8
9 9 This is *not* safe to run on a changelog.
10 10 """
11 11
12 12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 14 # renamed by Greg Ward <greg at gerg.ca>.
15 15
16 16 # XXX would be nice to have a way to verify the repository after shrinking,
17 17 # e.g. by comparing "before" and "after" states of random changesets
18 18 # (maybe: export before, shrink, export after, diff).
19 19
20 20 import os, tempfile, errno
21 21 from mercurial import revlog, transaction, node, util
22 22 from mercurial import changegroup
23 23 from mercurial.i18n import _
24 24
25 25
26 26 def postorder(start, edges):
27 27 result = []
28 28 visit = list(start)
29 29 finished = set()
30 30
31 31 while visit:
32 32 cur = visit[-1]
33 33 for p in edges[cur]:
34 34 if p not in finished:
35 35 visit.append(p)
36 36 break
37 37 else:
38 38 result.append(cur)
39 39 finished.add(cur)
40 40 visit.pop()
41 41
42 42 return result
43 43
44 44 def toposort_reversepostorder(ui, rl):
45 45 # postorder of the reverse directed graph
46 46
47 47 # map rev to list of parent revs (p2 first)
48 48 parents = {}
49 49 heads = set()
50 50 ui.status(_('reading revs\n'))
51 51 try:
52 52 for rev in rl:
53 53 ui.progress(_('reading'), rev, total=len(rl))
54 54 (p1, p2) = rl.parentrevs(rev)
55 55 if p1 == p2 == node.nullrev:
56 56 parents[rev] = () # root node
57 57 elif p1 == p2 or p2 == node.nullrev:
58 58 parents[rev] = (p1,) # normal node
59 59 else:
60 60 parents[rev] = (p2, p1) # merge node
61 61 heads.add(rev)
62 62 for p in parents[rev]:
63 63 heads.discard(p)
64 64 finally:
65 65 ui.progress(_('reading'), None)
66 66
67 67 heads = list(heads)
68 68 heads.sort(reverse=True)
69 69
70 70 ui.status(_('sorting revs\n'))
71 71 return postorder(heads, parents)
72 72
73 73 def toposort_postorderreverse(ui, rl):
74 74 # reverse-postorder of the reverse directed graph
75 75
76 76 children = {}
77 77 roots = set()
78 78 ui.status(_('reading revs\n'))
79 79 try:
80 80 for rev in rl:
81 81 ui.progress(_('reading'), rev, total=len(rl))
82 82 (p1, p2) = rl.parentrevs(rev)
83 83 if p1 == p2 == node.nullrev:
84 84 roots.add(rev)
85 85 children[rev] = []
86 86 if p1 != node.nullrev:
87 87 children[p1].append(rev)
88 88 if p2 != node.nullrev:
89 89 children[p2].append(rev)
90 90 finally:
91 91 ui.progress(_('reading'), None)
92 92
93 93 roots = list(roots)
94 94 roots.sort()
95 95
96 96 ui.status(_('sorting revs\n'))
97 97 result = postorder(roots, children)
98 98 result.reverse()
99 99 return result
100 100
101 101 def writerevs(ui, r1, r2, order, tr):
102 102
103 103 ui.status(_('writing revs\n'))
104 104
105 105 count = [0]
106 106 def progress(*args):
107 107 ui.progress(_('writing'), count[0], total=len(order))
108 108 count[0] += 1
109 109
110 110 order = [r1.node(r) for r in order]
111 111
112 112 # this is a bit ugly, but it works
113 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
113 def lookup(x):
114 progress(x)
115 return "%020d" % r1.linkrev(r1.rev(x))
116
114 117 unlookup = lambda x: int(x, 10)
115 118
116 119 try:
117 120 group = util.chunkbuffer(r1.group(order, lookup, progress))
118 121 group = changegroup.unbundle10(group, "UN")
119 122 r2.addgroup(group, unlookup, tr)
120 123 finally:
121 124 ui.progress(_('writing'), None)
122 125
123 126 def report(ui, r1, r2):
124 127 def getsize(r):
125 128 s = 0
126 129 for fn in (r.indexfile, r.datafile):
127 130 try:
128 131 s += os.stat(fn).st_size
129 132 except OSError, inst:
130 133 if inst.errno != errno.ENOENT:
131 134 raise
132 135 return s
133 136
134 137 oldsize = float(getsize(r1))
135 138 newsize = float(getsize(r2))
136 139
137 140 # argh: have to pass an int to %d, because a float >= 2^32
138 141 # blows up under Python 2.5 or earlier
139 142 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
140 143 % (int(oldsize), oldsize / 1024 / 1024))
141 144 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
142 145 % (int(newsize), newsize / 1024 / 1024))
143 146
144 147 shrink_percent = (oldsize - newsize) / oldsize * 100
145 148 shrink_factor = oldsize / newsize
146 149 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
147 150 % (shrink_percent, shrink_factor))
148 151
149 152 def shrink(ui, repo, **opts):
150 153 """shrink a revlog by reordering revisions
151 154
152 155 Rewrites all the entries in some revlog of the current repository
153 156 (by default, the manifest log) to save space.
154 157
155 158 Different sort algorithms have different performance
156 159 characteristics. Use ``--sort`` to select a sort algorithm so you
157 160 can determine which works best for your data.
158 161 """
159 162
160 163 if not repo.local():
161 164 raise util.Abort(_('not a local repository: %s') % repo.root)
162 165
163 166 fn = opts.get('revlog')
164 167 if not fn:
165 168 indexfn = repo.sjoin('00manifest.i')
166 169 else:
167 170 if not fn.endswith('.i'):
168 171 raise util.Abort(_('--revlog option must specify the revlog index '
169 172 'file (*.i), not %s') % opts.get('revlog'))
170 173
171 174 indexfn = os.path.realpath(fn)
172 175 store = repo.sjoin('')
173 176 if not indexfn.startswith(store):
174 177 raise util.Abort(_('--revlog option must specify a revlog in %s, '
175 178 'not %s') % (store, indexfn))
176 179
177 180 sortname = opts['sort']
178 181 try:
179 182 toposort = globals()['toposort_' + sortname]
180 183 except KeyError:
181 184 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
182 185
183 186 if not os.path.exists(indexfn):
184 187 raise util.Abort(_('no such file: %s') % indexfn)
185 188 if '00changelog' in indexfn:
186 189 raise util.Abort(_('shrinking the changelog '
187 190 'will corrupt your repository'))
188 191
189 192 ui.write(_('shrinking %s\n') % indexfn)
190 193 prefix = os.path.basename(indexfn)[:-1]
191 194 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
192 195
193 196 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
194 197 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
195 198
196 199 datafn, tmpdatafn = r1.datafile, r2.datafile
197 200
198 201 oldindexfn = indexfn + '.old'
199 202 olddatafn = datafn + '.old'
200 203 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
201 204 raise util.Abort(_('one or both of\n'
202 205 ' %s\n'
203 206 ' %s\n'
204 207 'exists from a previous run; please clean up '
205 208 'before running again') % (oldindexfn, olddatafn))
206 209
207 210 # Don't use repo.transaction(), because then things get hairy with
208 211 # paths: some need to be relative to .hg, and some need to be
209 212 # absolute. Doing it this way keeps things simple: everything is an
210 213 # absolute path.
211 214 lock = repo.lock(wait=False)
212 215 tr = transaction.transaction(ui.warn,
213 216 open,
214 217 repo.sjoin('journal'))
215 218
216 219 def ignoremissing(func):
217 220 def f(*args, **kw):
218 221 try:
219 222 return func(*args, **kw)
220 223 except OSError, inst:
221 224 if inst.errno != errno.ENOENT:
222 225 raise
223 226 return f
224 227
225 228 try:
226 229 try:
227 230 order = toposort(ui, r1)
228 231
229 232 suboptimal = 0
230 233 for i in xrange(1, len(order)):
231 234 parents = [p for p in r1.parentrevs(order[i])
232 235 if p != node.nullrev]
233 236 if parents and order[i - 1] not in parents:
234 237 suboptimal += 1
235 238 ui.note(_('%d suboptimal nodes\n') % suboptimal)
236 239
237 240 writerevs(ui, r1, r2, order, tr)
238 241 report(ui, r1, r2)
239 242 tr.close()
240 243 except:
241 244 # Abort transaction first, so we truncate the files before
242 245 # deleting them.
243 246 tr.abort()
244 247 for fn in (tmpindexfn, tmpdatafn):
245 248 ignoremissing(os.unlink)(fn)
246 249 raise
247 250 if not opts.get('dry_run'):
248 251 # racy, both files cannot be renamed atomically
249 252 # copy files
250 253 util.os_link(indexfn, oldindexfn)
251 254 ignoremissing(util.os_link)(datafn, olddatafn)
252 255
253 256 # rename
254 257 util.rename(tmpindexfn, indexfn)
255 258 try:
256 259 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
257 260 util.rename(tmpdatafn, datafn)
258 261 except OSError, inst:
259 262 if inst.errno != errno.ENOENT:
260 263 raise
261 264 ignoremissing(os.unlink)(datafn)
262 265 else:
263 266 for fn in (tmpindexfn, tmpdatafn):
264 267 ignoremissing(os.unlink)(fn)
265 268 finally:
266 269 lock.release()
267 270
268 271 if not opts.get('dry_run'):
269 272 ui.write(_('note: old revlog saved in:\n'
270 273 ' %s\n'
271 274 ' %s\n'
272 275 '(You can delete those files when you are satisfied that your\n'
273 276 'repository is still sane. '
274 277 'Running \'hg verify\' is strongly recommended.)\n')
275 278 % (oldindexfn, olddatafn))
276 279
277 280 cmdtable = {
278 281 'shrink': (shrink,
279 282 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
280 283 ('n', 'dry-run', None, _('do not shrink, simulate only')),
281 284 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
282 285 ],
283 286 _('hg shrink [--revlog PATH]'))
284 287 }
285 288
286 289 if __name__ == "__main__":
287 290 print "shrink-revlog.py is now an extension (see hg help extensions)"
@@ -1,1955 +1,1975 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import bin, hex, nullid, nullrev, short
9 9 from i18n import _
10 10 import repo, changegroup, subrepo, discovery, pushkey
11 11 import changelog, dirstate, filelog, manifest, context, bookmarks
12 12 import lock, transaction, store, encoding
13 13 import util, extensions, hook, error
14 14 import match as matchmod
15 15 import merge as mergemod
16 16 import tags as tagsmod
17 17 import url as urlmod
18 18 from lock import release
19 19 import weakref, errno, os, time, inspect
20 20 propertycache = util.propertycache
21 21
22 22 class localrepository(repo.repository):
23 23 capabilities = set(('lookup', 'changegroupsubset', 'branchmap', 'pushkey',
24 24 'known', 'getbundle'))
25 25 supportedformats = set(('revlogv1', 'parentdelta'))
26 26 supported = supportedformats | set(('store', 'fncache', 'shared',
27 27 'dotencode'))
28 28
29 29 def __init__(self, baseui, path=None, create=0):
30 30 repo.repository.__init__(self)
31 31 self.root = os.path.realpath(util.expandpath(path))
32 32 self.path = os.path.join(self.root, ".hg")
33 33 self.origroot = path
34 34 self.auditor = util.path_auditor(self.root, self._checknested)
35 35 self.opener = util.opener(self.path)
36 36 self.wopener = util.opener(self.root)
37 37 self.baseui = baseui
38 38 self.ui = baseui.copy()
39 39
40 40 try:
41 41 self.ui.readconfig(self.join("hgrc"), self.root)
42 42 extensions.loadall(self.ui)
43 43 except IOError:
44 44 pass
45 45
46 46 if not os.path.isdir(self.path):
47 47 if create:
48 48 if not os.path.exists(path):
49 49 util.makedirs(path)
50 50 os.mkdir(self.path)
51 51 requirements = ["revlogv1"]
52 52 if self.ui.configbool('format', 'usestore', True):
53 53 os.mkdir(os.path.join(self.path, "store"))
54 54 requirements.append("store")
55 55 if self.ui.configbool('format', 'usefncache', True):
56 56 requirements.append("fncache")
57 57 if self.ui.configbool('format', 'dotencode', True):
58 58 requirements.append('dotencode')
59 59 # create an invalid changelog
60 60 self.opener("00changelog.i", "a").write(
61 61 '\0\0\0\2' # represents revlogv2
62 62 ' dummy changelog to prevent using the old repo layout'
63 63 )
64 64 if self.ui.configbool('format', 'parentdelta', False):
65 65 requirements.append("parentdelta")
66 66 else:
67 67 raise error.RepoError(_("repository %s not found") % path)
68 68 elif create:
69 69 raise error.RepoError(_("repository %s already exists") % path)
70 70 else:
71 71 # find requirements
72 72 requirements = set()
73 73 try:
74 74 requirements = set(self.opener("requires").read().splitlines())
75 75 except IOError, inst:
76 76 if inst.errno != errno.ENOENT:
77 77 raise
78 78 for r in requirements - self.supported:
79 79 raise error.RequirementError(
80 80 _("requirement '%s' not supported") % r)
81 81
82 82 self.sharedpath = self.path
83 83 try:
84 84 s = os.path.realpath(self.opener("sharedpath").read())
85 85 if not os.path.exists(s):
86 86 raise error.RepoError(
87 87 _('.hg/sharedpath points to nonexistent directory %s') % s)
88 88 self.sharedpath = s
89 89 except IOError, inst:
90 90 if inst.errno != errno.ENOENT:
91 91 raise
92 92
93 93 self.store = store.store(requirements, self.sharedpath, util.opener)
94 94 self.spath = self.store.path
95 95 self.sopener = self.store.opener
96 96 self.sjoin = self.store.join
97 97 self.opener.createmode = self.store.createmode
98 98 self._applyrequirements(requirements)
99 99 if create:
100 100 self._writerequirements()
101 101
102 102 # These two define the set of tags for this repository. _tags
103 103 # maps tag name to node; _tagtypes maps tag name to 'global' or
104 104 # 'local'. (Global tags are defined by .hgtags across all
105 105 # heads, and local tags are defined in .hg/localtags.) They
106 106 # constitute the in-memory cache of tags.
107 107 self._tags = None
108 108 self._tagtypes = None
109 109
110 110 self._branchcache = None
111 111 self._branchcachetip = None
112 112 self.nodetagscache = None
113 113 self.filterpats = {}
114 114 self._datafilters = {}
115 115 self._transref = self._lockref = self._wlockref = None
116 116
117 117 def _applyrequirements(self, requirements):
118 118 self.requirements = requirements
119 119 self.sopener.options = {}
120 120 if 'parentdelta' in requirements:
121 121 self.sopener.options['parentdelta'] = 1
122 122
123 123 def _writerequirements(self):
124 124 reqfile = self.opener("requires", "w")
125 125 for r in self.requirements:
126 126 reqfile.write("%s\n" % r)
127 127 reqfile.close()
128 128
129 129 def _checknested(self, path):
130 130 """Determine if path is a legal nested repository."""
131 131 if not path.startswith(self.root):
132 132 return False
133 133 subpath = path[len(self.root) + 1:]
134 134
135 135 # XXX: Checking against the current working copy is wrong in
136 136 # the sense that it can reject things like
137 137 #
138 138 # $ hg cat -r 10 sub/x.txt
139 139 #
140 140 # if sub/ is no longer a subrepository in the working copy
141 141 # parent revision.
142 142 #
143 143 # However, it can of course also allow things that would have
144 144 # been rejected before, such as the above cat command if sub/
145 145 # is a subrepository now, but was a normal directory before.
146 146 # The old path auditor would have rejected by mistake since it
147 147 # panics when it sees sub/.hg/.
148 148 #
149 149 # All in all, checking against the working copy seems sensible
150 150 # since we want to prevent access to nested repositories on
151 151 # the filesystem *now*.
152 152 ctx = self[None]
153 153 parts = util.splitpath(subpath)
154 154 while parts:
155 155 prefix = os.sep.join(parts)
156 156 if prefix in ctx.substate:
157 157 if prefix == subpath:
158 158 return True
159 159 else:
160 160 sub = ctx.sub(prefix)
161 161 return sub.checknested(subpath[len(prefix) + 1:])
162 162 else:
163 163 parts.pop()
164 164 return False
165 165
166 166 @util.propertycache
167 167 def _bookmarks(self):
168 168 return bookmarks.read(self)
169 169
170 170 @util.propertycache
171 171 def _bookmarkcurrent(self):
172 172 return bookmarks.readcurrent(self)
173 173
174 174 @propertycache
175 175 def changelog(self):
176 176 c = changelog.changelog(self.sopener)
177 177 if 'HG_PENDING' in os.environ:
178 178 p = os.environ['HG_PENDING']
179 179 if p.startswith(self.root):
180 180 c.readpending('00changelog.i.a')
181 181 self.sopener.options['defversion'] = c.version
182 182 return c
183 183
184 184 @propertycache
185 185 def manifest(self):
186 186 return manifest.manifest(self.sopener)
187 187
188 188 @propertycache
189 189 def dirstate(self):
190 190 warned = [0]
191 191 def validate(node):
192 192 try:
193 193 r = self.changelog.rev(node)
194 194 return node
195 195 except error.LookupError:
196 196 if not warned[0]:
197 197 warned[0] = True
198 198 self.ui.warn(_("warning: ignoring unknown"
199 199 " working parent %s!\n") % short(node))
200 200 return nullid
201 201
202 202 return dirstate.dirstate(self.opener, self.ui, self.root, validate)
203 203
204 204 def __getitem__(self, changeid):
205 205 if changeid is None:
206 206 return context.workingctx(self)
207 207 return context.changectx(self, changeid)
208 208
209 209 def __contains__(self, changeid):
210 210 try:
211 211 return bool(self.lookup(changeid))
212 212 except error.RepoLookupError:
213 213 return False
214 214
215 215 def __nonzero__(self):
216 216 return True
217 217
218 218 def __len__(self):
219 219 return len(self.changelog)
220 220
221 221 def __iter__(self):
222 222 for i in xrange(len(self)):
223 223 yield i
224 224
225 225 def url(self):
226 226 return 'file:' + self.root
227 227
228 228 def hook(self, name, throw=False, **args):
229 229 return hook.hook(self.ui, self, name, throw, **args)
230 230
231 231 tag_disallowed = ':\r\n'
232 232
233 233 def _tag(self, names, node, message, local, user, date, extra={}):
234 234 if isinstance(names, str):
235 235 allchars = names
236 236 names = (names,)
237 237 else:
238 238 allchars = ''.join(names)
239 239 for c in self.tag_disallowed:
240 240 if c in allchars:
241 241 raise util.Abort(_('%r cannot be used in a tag name') % c)
242 242
243 243 branches = self.branchmap()
244 244 for name in names:
245 245 self.hook('pretag', throw=True, node=hex(node), tag=name,
246 246 local=local)
247 247 if name in branches:
248 248 self.ui.warn(_("warning: tag %s conflicts with existing"
249 249 " branch name\n") % name)
250 250
251 251 def writetags(fp, names, munge, prevtags):
252 252 fp.seek(0, 2)
253 253 if prevtags and prevtags[-1] != '\n':
254 254 fp.write('\n')
255 255 for name in names:
256 256 m = munge and munge(name) or name
257 257 if self._tagtypes and name in self._tagtypes:
258 258 old = self._tags.get(name, nullid)
259 259 fp.write('%s %s\n' % (hex(old), m))
260 260 fp.write('%s %s\n' % (hex(node), m))
261 261 fp.close()
262 262
263 263 prevtags = ''
264 264 if local:
265 265 try:
266 266 fp = self.opener('localtags', 'r+')
267 267 except IOError:
268 268 fp = self.opener('localtags', 'a')
269 269 else:
270 270 prevtags = fp.read()
271 271
272 272 # local tags are stored in the current charset
273 273 writetags(fp, names, None, prevtags)
274 274 for name in names:
275 275 self.hook('tag', node=hex(node), tag=name, local=local)
276 276 return
277 277
278 278 try:
279 279 fp = self.wfile('.hgtags', 'rb+')
280 280 except IOError:
281 281 fp = self.wfile('.hgtags', 'ab')
282 282 else:
283 283 prevtags = fp.read()
284 284
285 285 # committed tags are stored in UTF-8
286 286 writetags(fp, names, encoding.fromlocal, prevtags)
287 287
288 288 fp.close()
289 289
290 290 if '.hgtags' not in self.dirstate:
291 291 self[None].add(['.hgtags'])
292 292
293 293 m = matchmod.exact(self.root, '', ['.hgtags'])
294 294 tagnode = self.commit(message, user, date, extra=extra, match=m)
295 295
296 296 for name in names:
297 297 self.hook('tag', node=hex(node), tag=name, local=local)
298 298
299 299 return tagnode
300 300
301 301 def tag(self, names, node, message, local, user, date):
302 302 '''tag a revision with one or more symbolic names.
303 303
304 304 names is a list of strings or, when adding a single tag, names may be a
305 305 string.
306 306
307 307 if local is True, the tags are stored in a per-repository file.
308 308 otherwise, they are stored in the .hgtags file, and a new
309 309 changeset is committed with the change.
310 310
311 311 keyword arguments:
312 312
313 313 local: whether to store tags in non-version-controlled file
314 314 (default False)
315 315
316 316 message: commit message to use if committing
317 317
318 318 user: name of user to use if committing
319 319
320 320 date: date tuple to use if committing'''
321 321
322 322 if not local:
323 323 for x in self.status()[:5]:
324 324 if '.hgtags' in x:
325 325 raise util.Abort(_('working copy of .hgtags is changed '
326 326 '(please commit .hgtags manually)'))
327 327
328 328 self.tags() # instantiate the cache
329 329 self._tag(names, node, message, local, user, date)
330 330
331 331 def tags(self):
332 332 '''return a mapping of tag to node'''
333 333 if self._tags is None:
334 334 (self._tags, self._tagtypes) = self._findtags()
335 335
336 336 return self._tags
337 337
338 338 def _findtags(self):
339 339 '''Do the hard work of finding tags. Return a pair of dicts
340 340 (tags, tagtypes) where tags maps tag name to node, and tagtypes
341 341 maps tag name to a string like \'global\' or \'local\'.
342 342 Subclasses or extensions are free to add their own tags, but
343 343 should be aware that the returned dicts will be retained for the
344 344 duration of the localrepo object.'''
345 345
346 346 # XXX what tagtype should subclasses/extensions use? Currently
347 347 # mq and bookmarks add tags, but do not set the tagtype at all.
348 348 # Should each extension invent its own tag type? Should there
349 349 # be one tagtype for all such "virtual" tags? Or is the status
350 350 # quo fine?
351 351
352 352 alltags = {} # map tag name to (node, hist)
353 353 tagtypes = {}
354 354
355 355 tagsmod.findglobaltags(self.ui, self, alltags, tagtypes)
356 356 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
357 357
358 358 # Build the return dicts. Have to re-encode tag names because
359 359 # the tags module always uses UTF-8 (in order not to lose info
360 360 # writing to the cache), but the rest of Mercurial wants them in
361 361 # local encoding.
362 362 tags = {}
363 363 for (name, (node, hist)) in alltags.iteritems():
364 364 if node != nullid:
365 365 tags[encoding.tolocal(name)] = node
366 366 tags['tip'] = self.changelog.tip()
367 367 tagtypes = dict([(encoding.tolocal(name), value)
368 368 for (name, value) in tagtypes.iteritems()])
369 369 return (tags, tagtypes)
370 370
371 371 def tagtype(self, tagname):
372 372 '''
373 373 return the type of the given tag. result can be:
374 374
375 375 'local' : a local tag
376 376 'global' : a global tag
377 377 None : tag does not exist
378 378 '''
379 379
380 380 self.tags()
381 381
382 382 return self._tagtypes.get(tagname)
383 383
384 384 def tagslist(self):
385 385 '''return a list of tags ordered by revision'''
386 386 l = []
387 387 for t, n in self.tags().iteritems():
388 388 try:
389 389 r = self.changelog.rev(n)
390 390 except:
391 391 r = -2 # sort to the beginning of the list if unknown
392 392 l.append((r, t, n))
393 393 return [(t, n) for r, t, n in sorted(l)]
394 394
395 395 def nodetags(self, node):
396 396 '''return the tags associated with a node'''
397 397 if not self.nodetagscache:
398 398 self.nodetagscache = {}
399 399 for t, n in self.tags().iteritems():
400 400 self.nodetagscache.setdefault(n, []).append(t)
401 401 for tags in self.nodetagscache.itervalues():
402 402 tags.sort()
403 403 return self.nodetagscache.get(node, [])
404 404
405 405 def nodebookmarks(self, node):
406 406 marks = []
407 407 for bookmark, n in self._bookmarks.iteritems():
408 408 if n == node:
409 409 marks.append(bookmark)
410 410 return sorted(marks)
411 411
412 412 def _branchtags(self, partial, lrev):
413 413 # TODO: rename this function?
414 414 tiprev = len(self) - 1
415 415 if lrev != tiprev:
416 416 ctxgen = (self[r] for r in xrange(lrev + 1, tiprev + 1))
417 417 self._updatebranchcache(partial, ctxgen)
418 418 self._writebranchcache(partial, self.changelog.tip(), tiprev)
419 419
420 420 return partial
421 421
422 422 def updatebranchcache(self):
423 423 tip = self.changelog.tip()
424 424 if self._branchcache is not None and self._branchcachetip == tip:
425 425 return self._branchcache
426 426
427 427 oldtip = self._branchcachetip
428 428 self._branchcachetip = tip
429 429 if oldtip is None or oldtip not in self.changelog.nodemap:
430 430 partial, last, lrev = self._readbranchcache()
431 431 else:
432 432 lrev = self.changelog.rev(oldtip)
433 433 partial = self._branchcache
434 434
435 435 self._branchtags(partial, lrev)
436 436 # this private cache holds all heads (not just tips)
437 437 self._branchcache = partial
438 438
439 439 def branchmap(self):
440 440 '''returns a dictionary {branch: [branchheads]}'''
441 441 self.updatebranchcache()
442 442 return self._branchcache
443 443
444 444 def branchtags(self):
445 445 '''return a dict where branch names map to the tipmost head of
446 446 the branch, open heads come before closed'''
447 447 bt = {}
448 448 for bn, heads in self.branchmap().iteritems():
449 449 tip = heads[-1]
450 450 for h in reversed(heads):
451 451 if 'close' not in self.changelog.read(h)[5]:
452 452 tip = h
453 453 break
454 454 bt[bn] = tip
455 455 return bt
456 456
457 457 def _readbranchcache(self):
458 458 partial = {}
459 459 try:
460 460 f = self.opener("cache/branchheads")
461 461 lines = f.read().split('\n')
462 462 f.close()
463 463 except (IOError, OSError):
464 464 return {}, nullid, nullrev
465 465
466 466 try:
467 467 last, lrev = lines.pop(0).split(" ", 1)
468 468 last, lrev = bin(last), int(lrev)
469 469 if lrev >= len(self) or self[lrev].node() != last:
470 470 # invalidate the cache
471 471 raise ValueError('invalidating branch cache (tip differs)')
472 472 for l in lines:
473 473 if not l:
474 474 continue
475 475 node, label = l.split(" ", 1)
476 476 label = encoding.tolocal(label.strip())
477 477 partial.setdefault(label, []).append(bin(node))
478 478 except KeyboardInterrupt:
479 479 raise
480 480 except Exception, inst:
481 481 if self.ui.debugflag:
482 482 self.ui.warn(str(inst), '\n')
483 483 partial, last, lrev = {}, nullid, nullrev
484 484 return partial, last, lrev
485 485
486 486 def _writebranchcache(self, branches, tip, tiprev):
487 487 try:
488 488 f = self.opener("cache/branchheads", "w", atomictemp=True)
489 489 f.write("%s %s\n" % (hex(tip), tiprev))
490 490 for label, nodes in branches.iteritems():
491 491 for node in nodes:
492 492 f.write("%s %s\n" % (hex(node), encoding.fromlocal(label)))
493 493 f.rename()
494 494 except (IOError, OSError):
495 495 pass
496 496
497 497 def _updatebranchcache(self, partial, ctxgen):
498 498 # collect new branch entries
499 499 newbranches = {}
500 500 for c in ctxgen:
501 501 newbranches.setdefault(c.branch(), []).append(c.node())
502 502 # if older branchheads are reachable from new ones, they aren't
503 503 # really branchheads. Note checking parents is insufficient:
504 504 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
505 505 for branch, newnodes in newbranches.iteritems():
506 506 bheads = partial.setdefault(branch, [])
507 507 bheads.extend(newnodes)
508 508 if len(bheads) <= 1:
509 509 continue
510 510 # starting from tip means fewer passes over reachable
511 511 while newnodes:
512 512 latest = newnodes.pop()
513 513 if latest not in bheads:
514 514 continue
515 515 minbhrev = self[min([self[bh].rev() for bh in bheads])].node()
516 516 reachable = self.changelog.reachable(latest, minbhrev)
517 517 reachable.remove(latest)
518 518 bheads = [b for b in bheads if b not in reachable]
519 519 partial[branch] = bheads
520 520
521 521 def lookup(self, key):
522 522 if isinstance(key, int):
523 523 return self.changelog.node(key)
524 524 elif key == '.':
525 525 return self.dirstate.parents()[0]
526 526 elif key == 'null':
527 527 return nullid
528 528 elif key == 'tip':
529 529 return self.changelog.tip()
530 530 n = self.changelog._match(key)
531 531 if n:
532 532 return n
533 533 if key in self._bookmarks:
534 534 return self._bookmarks[key]
535 535 if key in self.tags():
536 536 return self.tags()[key]
537 537 if key in self.branchtags():
538 538 return self.branchtags()[key]
539 539 n = self.changelog._partialmatch(key)
540 540 if n:
541 541 return n
542 542
543 543 # can't find key, check if it might have come from damaged dirstate
544 544 if key in self.dirstate.parents():
545 545 raise error.Abort(_("working directory has unknown parent '%s'!")
546 546 % short(key))
547 547 try:
548 548 if len(key) == 20:
549 549 key = hex(key)
550 550 except:
551 551 pass
552 552 raise error.RepoLookupError(_("unknown revision '%s'") % key)
553 553
554 554 def lookupbranch(self, key, remote=None):
555 555 repo = remote or self
556 556 if key in repo.branchmap():
557 557 return key
558 558
559 559 repo = (remote and remote.local()) and remote or self
560 560 return repo[key].branch()
561 561
562 562 def known(self, nodes):
563 563 nm = self.changelog.nodemap
564 564 return [(n in nm) for n in nodes]
565 565
566 566 def local(self):
567 567 return True
568 568
569 569 def join(self, f):
570 570 return os.path.join(self.path, f)
571 571
572 572 def wjoin(self, f):
573 573 return os.path.join(self.root, f)
574 574
575 575 def file(self, f):
576 576 if f[0] == '/':
577 577 f = f[1:]
578 578 return filelog.filelog(self.sopener, f)
579 579
580 580 def changectx(self, changeid):
581 581 return self[changeid]
582 582
583 583 def parents(self, changeid=None):
584 584 '''get list of changectxs for parents of changeid'''
585 585 return self[changeid].parents()
586 586
587 587 def filectx(self, path, changeid=None, fileid=None):
588 588 """changeid can be a changeset revision, node, or tag.
589 589 fileid can be a file revision or node."""
590 590 return context.filectx(self, path, changeid, fileid)
591 591
592 592 def getcwd(self):
593 593 return self.dirstate.getcwd()
594 594
595 595 def pathto(self, f, cwd=None):
596 596 return self.dirstate.pathto(f, cwd)
597 597
598 598 def wfile(self, f, mode='r'):
599 599 return self.wopener(f, mode)
600 600
601 601 def _link(self, f):
602 602 return os.path.islink(self.wjoin(f))
603 603
604 604 def _loadfilter(self, filter):
605 605 if filter not in self.filterpats:
606 606 l = []
607 607 for pat, cmd in self.ui.configitems(filter):
608 608 if cmd == '!':
609 609 continue
610 610 mf = matchmod.match(self.root, '', [pat])
611 611 fn = None
612 612 params = cmd
613 613 for name, filterfn in self._datafilters.iteritems():
614 614 if cmd.startswith(name):
615 615 fn = filterfn
616 616 params = cmd[len(name):].lstrip()
617 617 break
618 618 if not fn:
619 619 fn = lambda s, c, **kwargs: util.filter(s, c)
620 620 # Wrap old filters not supporting keyword arguments
621 621 if not inspect.getargspec(fn)[2]:
622 622 oldfn = fn
623 623 fn = lambda s, c, **kwargs: oldfn(s, c)
624 624 l.append((mf, fn, params))
625 625 self.filterpats[filter] = l
626 626 return self.filterpats[filter]
627 627
628 628 def _filter(self, filterpats, filename, data):
629 629 for mf, fn, cmd in filterpats:
630 630 if mf(filename):
631 631 self.ui.debug("filtering %s through %s\n" % (filename, cmd))
632 632 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
633 633 break
634 634
635 635 return data
636 636
637 637 @propertycache
638 638 def _encodefilterpats(self):
639 639 return self._loadfilter('encode')
640 640
641 641 @propertycache
642 642 def _decodefilterpats(self):
643 643 return self._loadfilter('decode')
644 644
645 645 def adddatafilter(self, name, filter):
646 646 self._datafilters[name] = filter
647 647
648 648 def wread(self, filename):
649 649 if self._link(filename):
650 650 data = os.readlink(self.wjoin(filename))
651 651 else:
652 652 data = self.wopener(filename, 'r').read()
653 653 return self._filter(self._encodefilterpats, filename, data)
654 654
655 655 def wwrite(self, filename, data, flags):
656 656 data = self._filter(self._decodefilterpats, filename, data)
657 657 if 'l' in flags:
658 658 self.wopener.symlink(data, filename)
659 659 else:
660 660 self.wopener(filename, 'w').write(data)
661 661 if 'x' in flags:
662 662 util.set_flags(self.wjoin(filename), False, True)
663 663
664 664 def wwritedata(self, filename, data):
665 665 return self._filter(self._decodefilterpats, filename, data)
666 666
667 667 def transaction(self, desc):
668 668 tr = self._transref and self._transref() or None
669 669 if tr and tr.running():
670 670 return tr.nest()
671 671
672 672 # abort here if the journal already exists
673 673 if os.path.exists(self.sjoin("journal")):
674 674 raise error.RepoError(
675 675 _("abandoned transaction found - run hg recover"))
676 676
677 677 # save dirstate for rollback
678 678 try:
679 679 ds = self.opener("dirstate").read()
680 680 except IOError:
681 681 ds = ""
682 682 self.opener("journal.dirstate", "w").write(ds)
683 683 self.opener("journal.branch", "w").write(
684 684 encoding.fromlocal(self.dirstate.branch()))
685 685 self.opener("journal.desc", "w").write("%d\n%s\n" % (len(self), desc))
686 686
687 687 renames = [(self.sjoin("journal"), self.sjoin("undo")),
688 688 (self.join("journal.dirstate"), self.join("undo.dirstate")),
689 689 (self.join("journal.branch"), self.join("undo.branch")),
690 690 (self.join("journal.desc"), self.join("undo.desc"))]
691 691 tr = transaction.transaction(self.ui.warn, self.sopener,
692 692 self.sjoin("journal"),
693 693 aftertrans(renames),
694 694 self.store.createmode)
695 695 self._transref = weakref.ref(tr)
696 696 return tr
697 697
698 698 def recover(self):
699 699 lock = self.lock()
700 700 try:
701 701 if os.path.exists(self.sjoin("journal")):
702 702 self.ui.status(_("rolling back interrupted transaction\n"))
703 703 transaction.rollback(self.sopener, self.sjoin("journal"),
704 704 self.ui.warn)
705 705 self.invalidate()
706 706 return True
707 707 else:
708 708 self.ui.warn(_("no interrupted transaction available\n"))
709 709 return False
710 710 finally:
711 711 lock.release()
712 712
713 713 def rollback(self, dryrun=False):
714 714 wlock = lock = None
715 715 try:
716 716 wlock = self.wlock()
717 717 lock = self.lock()
718 718 if os.path.exists(self.sjoin("undo")):
719 719 try:
720 720 args = self.opener("undo.desc", "r").read().splitlines()
721 721 if len(args) >= 3 and self.ui.verbose:
722 722 desc = _("repository tip rolled back to revision %s"
723 723 " (undo %s: %s)\n") % (
724 724 int(args[0]) - 1, args[1], args[2])
725 725 elif len(args) >= 2:
726 726 desc = _("repository tip rolled back to revision %s"
727 727 " (undo %s)\n") % (
728 728 int(args[0]) - 1, args[1])
729 729 except IOError:
730 730 desc = _("rolling back unknown transaction\n")
731 731 self.ui.status(desc)
732 732 if dryrun:
733 733 return
734 734 transaction.rollback(self.sopener, self.sjoin("undo"),
735 735 self.ui.warn)
736 736 util.rename(self.join("undo.dirstate"), self.join("dirstate"))
737 737 if os.path.exists(self.join('undo.bookmarks')):
738 738 util.rename(self.join('undo.bookmarks'),
739 739 self.join('bookmarks'))
740 740 try:
741 741 branch = self.opener("undo.branch").read()
742 742 self.dirstate.setbranch(branch)
743 743 except IOError:
744 744 self.ui.warn(_("Named branch could not be reset, "
745 745 "current branch still is: %s\n")
746 746 % self.dirstate.branch())
747 747 self.invalidate()
748 748 self.dirstate.invalidate()
749 749 self.destroyed()
750 750 parents = tuple([p.rev() for p in self.parents()])
751 751 if len(parents) > 1:
752 752 self.ui.status(_("working directory now based on "
753 753 "revisions %d and %d\n") % parents)
754 754 else:
755 755 self.ui.status(_("working directory now based on "
756 756 "revision %d\n") % parents)
757 757 else:
758 758 self.ui.warn(_("no rollback information available\n"))
759 759 return 1
760 760 finally:
761 761 release(lock, wlock)
762 762
763 763 def invalidatecaches(self):
764 764 self._tags = None
765 765 self._tagtypes = None
766 766 self.nodetagscache = None
767 767 self._branchcache = None # in UTF-8
768 768 self._branchcachetip = None
769 769
770 770 def invalidate(self):
771 771 for a in ("changelog", "manifest", "_bookmarks", "_bookmarkcurrent"):
772 772 if a in self.__dict__:
773 773 delattr(self, a)
774 774 self.invalidatecaches()
775 775
776 776 def _lock(self, lockname, wait, releasefn, acquirefn, desc):
777 777 try:
778 778 l = lock.lock(lockname, 0, releasefn, desc=desc)
779 779 except error.LockHeld, inst:
780 780 if not wait:
781 781 raise
782 782 self.ui.warn(_("waiting for lock on %s held by %r\n") %
783 783 (desc, inst.locker))
784 784 # default to 600 seconds timeout
785 785 l = lock.lock(lockname, int(self.ui.config("ui", "timeout", "600")),
786 786 releasefn, desc=desc)
787 787 if acquirefn:
788 788 acquirefn()
789 789 return l
790 790
791 791 def lock(self, wait=True):
792 792 '''Lock the repository store (.hg/store) and return a weak reference
793 793 to the lock. Use this before modifying the store (e.g. committing or
794 794 stripping). If you are opening a transaction, get a lock as well.)'''
795 795 l = self._lockref and self._lockref()
796 796 if l is not None and l.held:
797 797 l.lock()
798 798 return l
799 799
800 800 l = self._lock(self.sjoin("lock"), wait, self.store.write,
801 801 self.invalidate, _('repository %s') % self.origroot)
802 802 self._lockref = weakref.ref(l)
803 803 return l
804 804
805 805 def wlock(self, wait=True):
806 806 '''Lock the non-store parts of the repository (everything under
807 807 .hg except .hg/store) and return a weak reference to the lock.
808 808 Use this before modifying files in .hg.'''
809 809 l = self._wlockref and self._wlockref()
810 810 if l is not None and l.held:
811 811 l.lock()
812 812 return l
813 813
814 814 l = self._lock(self.join("wlock"), wait, self.dirstate.write,
815 815 self.dirstate.invalidate, _('working directory of %s') %
816 816 self.origroot)
817 817 self._wlockref = weakref.ref(l)
818 818 return l
819 819
820 820 def _filecommit(self, fctx, manifest1, manifest2, linkrev, tr, changelist):
821 821 """
822 822 commit an individual file as part of a larger transaction
823 823 """
824 824
825 825 fname = fctx.path()
826 826 text = fctx.data()
827 827 flog = self.file(fname)
828 828 fparent1 = manifest1.get(fname, nullid)
829 829 fparent2 = fparent2o = manifest2.get(fname, nullid)
830 830
831 831 meta = {}
832 832 copy = fctx.renamed()
833 833 if copy and copy[0] != fname:
834 834 # Mark the new revision of this file as a copy of another
835 835 # file. This copy data will effectively act as a parent
836 836 # of this new revision. If this is a merge, the first
837 837 # parent will be the nullid (meaning "look up the copy data")
838 838 # and the second one will be the other parent. For example:
839 839 #
840 840 # 0 --- 1 --- 3 rev1 changes file foo
841 841 # \ / rev2 renames foo to bar and changes it
842 842 # \- 2 -/ rev3 should have bar with all changes and
843 843 # should record that bar descends from
844 844 # bar in rev2 and foo in rev1
845 845 #
846 846 # this allows this merge to succeed:
847 847 #
848 848 # 0 --- 1 --- 3 rev4 reverts the content change from rev2
849 849 # \ / merging rev3 and rev4 should use bar@rev2
850 850 # \- 2 --- 4 as the merge base
851 851 #
852 852
853 853 cfname = copy[0]
854 854 crev = manifest1.get(cfname)
855 855 newfparent = fparent2
856 856
857 857 if manifest2: # branch merge
858 858 if fparent2 == nullid or crev is None: # copied on remote side
859 859 if cfname in manifest2:
860 860 crev = manifest2[cfname]
861 861 newfparent = fparent1
862 862
863 863 # find source in nearest ancestor if we've lost track
864 864 if not crev:
865 865 self.ui.debug(" %s: searching for copy revision for %s\n" %
866 866 (fname, cfname))
867 867 for ancestor in self[None].ancestors():
868 868 if cfname in ancestor:
869 869 crev = ancestor[cfname].filenode()
870 870 break
871 871
872 872 if crev:
873 873 self.ui.debug(" %s: copy %s:%s\n" % (fname, cfname, hex(crev)))
874 874 meta["copy"] = cfname
875 875 meta["copyrev"] = hex(crev)
876 876 fparent1, fparent2 = nullid, newfparent
877 877 else:
878 878 self.ui.warn(_("warning: can't find ancestor for '%s' "
879 879 "copied from '%s'!\n") % (fname, cfname))
880 880
881 881 elif fparent2 != nullid:
882 882 # is one parent an ancestor of the other?
883 883 fparentancestor = flog.ancestor(fparent1, fparent2)
884 884 if fparentancestor == fparent1:
885 885 fparent1, fparent2 = fparent2, nullid
886 886 elif fparentancestor == fparent2:
887 887 fparent2 = nullid
888 888
889 889 # is the file changed?
890 890 if fparent2 != nullid or flog.cmp(fparent1, text) or meta:
891 891 changelist.append(fname)
892 892 return flog.add(text, meta, tr, linkrev, fparent1, fparent2)
893 893
894 894 # are just the flags changed during merge?
895 895 if fparent1 != fparent2o and manifest1.flags(fname) != fctx.flags():
896 896 changelist.append(fname)
897 897
898 898 return fparent1
899 899
900 900 def commit(self, text="", user=None, date=None, match=None, force=False,
901 901 editor=False, extra={}):
902 902 """Add a new revision to current repository.
903 903
904 904 Revision information is gathered from the working directory,
905 905 match can be used to filter the committed files. If editor is
906 906 supplied, it is called to get a commit message.
907 907 """
908 908
909 909 def fail(f, msg):
910 910 raise util.Abort('%s: %s' % (f, msg))
911 911
912 912 if not match:
913 913 match = matchmod.always(self.root, '')
914 914
915 915 if not force:
916 916 vdirs = []
917 917 match.dir = vdirs.append
918 918 match.bad = fail
919 919
920 920 wlock = self.wlock()
921 921 try:
922 922 wctx = self[None]
923 923 merge = len(wctx.parents()) > 1
924 924
925 925 if (not force and merge and match and
926 926 (match.files() or match.anypats())):
927 927 raise util.Abort(_('cannot partially commit a merge '
928 928 '(do not specify files or patterns)'))
929 929
930 930 changes = self.status(match=match, clean=force)
931 931 if force:
932 932 changes[0].extend(changes[6]) # mq may commit unchanged files
933 933
934 934 # check subrepos
935 935 subs = []
936 936 removedsubs = set()
937 937 for p in wctx.parents():
938 938 removedsubs.update(s for s in p.substate if match(s))
939 939 for s in wctx.substate:
940 940 removedsubs.discard(s)
941 941 if match(s) and wctx.sub(s).dirty():
942 942 subs.append(s)
943 943 if (subs or removedsubs):
944 944 if (not match('.hgsub') and
945 945 '.hgsub' in (wctx.modified() + wctx.added())):
946 946 raise util.Abort(_("can't commit subrepos without .hgsub"))
947 947 if '.hgsubstate' not in changes[0]:
948 948 changes[0].insert(0, '.hgsubstate')
949 949
950 950 if subs and not self.ui.configbool('ui', 'commitsubrepos', True):
951 951 changedsubs = [s for s in subs if wctx.sub(s).dirty(True)]
952 952 if changedsubs:
953 953 raise util.Abort(_("uncommitted changes in subrepo %s")
954 954 % changedsubs[0])
955 955
956 956 # make sure all explicit patterns are matched
957 957 if not force and match.files():
958 958 matched = set(changes[0] + changes[1] + changes[2])
959 959
960 960 for f in match.files():
961 961 if f == '.' or f in matched or f in wctx.substate:
962 962 continue
963 963 if f in changes[3]: # missing
964 964 fail(f, _('file not found!'))
965 965 if f in vdirs: # visited directory
966 966 d = f + '/'
967 967 for mf in matched:
968 968 if mf.startswith(d):
969 969 break
970 970 else:
971 971 fail(f, _("no match under directory!"))
972 972 elif f not in self.dirstate:
973 973 fail(f, _("file not tracked!"))
974 974
975 975 if (not force and not extra.get("close") and not merge
976 976 and not (changes[0] or changes[1] or changes[2])
977 977 and wctx.branch() == wctx.p1().branch()):
978 978 return None
979 979
980 980 ms = mergemod.mergestate(self)
981 981 for f in changes[0]:
982 982 if f in ms and ms[f] == 'u':
983 983 raise util.Abort(_("unresolved merge conflicts "
984 984 "(see hg help resolve)"))
985 985
986 986 cctx = context.workingctx(self, text, user, date, extra, changes)
987 987 if editor:
988 988 cctx._text = editor(self, cctx, subs)
989 989 edited = (text != cctx._text)
990 990
991 991 # commit subs
992 992 if subs or removedsubs:
993 993 state = wctx.substate.copy()
994 994 for s in sorted(subs):
995 995 sub = wctx.sub(s)
996 996 self.ui.status(_('committing subrepository %s\n') %
997 997 subrepo.subrelpath(sub))
998 998 sr = sub.commit(cctx._text, user, date)
999 999 state[s] = (state[s][0], sr)
1000 1000 subrepo.writestate(self, state)
1001 1001
1002 1002 # Save commit message in case this transaction gets rolled back
1003 1003 # (e.g. by a pretxncommit hook). Leave the content alone on
1004 1004 # the assumption that the user will use the same editor again.
1005 1005 msgfile = self.opener('last-message.txt', 'wb')
1006 1006 msgfile.write(cctx._text)
1007 1007 msgfile.close()
1008 1008
1009 1009 p1, p2 = self.dirstate.parents()
1010 1010 hookp1, hookp2 = hex(p1), (p2 != nullid and hex(p2) or '')
1011 1011 try:
1012 1012 self.hook("precommit", throw=True, parent1=hookp1, parent2=hookp2)
1013 1013 ret = self.commitctx(cctx, True)
1014 1014 except:
1015 1015 if edited:
1016 1016 msgfn = self.pathto(msgfile.name[len(self.root)+1:])
1017 1017 self.ui.write(
1018 1018 _('note: commit message saved in %s\n') % msgfn)
1019 1019 raise
1020 1020
1021 1021 # update bookmarks, dirstate and mergestate
1022 1022 parents = (p1, p2)
1023 1023 if p2 == nullid:
1024 1024 parents = (p1,)
1025 1025 bookmarks.update(self, parents, ret)
1026 1026 for f in changes[0] + changes[1]:
1027 1027 self.dirstate.normal(f)
1028 1028 for f in changes[2]:
1029 1029 self.dirstate.forget(f)
1030 1030 self.dirstate.setparents(ret)
1031 1031 ms.reset()
1032 1032 finally:
1033 1033 wlock.release()
1034 1034
1035 1035 self.hook("commit", node=hex(ret), parent1=hookp1, parent2=hookp2)
1036 1036 return ret
1037 1037
1038 1038 def commitctx(self, ctx, error=False):
1039 1039 """Add a new revision to current repository.
1040 1040 Revision information is passed via the context argument.
1041 1041 """
1042 1042
1043 1043 tr = lock = None
1044 1044 removed = list(ctx.removed())
1045 1045 p1, p2 = ctx.p1(), ctx.p2()
1046 1046 m1 = p1.manifest().copy()
1047 1047 m2 = p2.manifest()
1048 1048 user = ctx.user()
1049 1049
1050 1050 lock = self.lock()
1051 1051 try:
1052 1052 tr = self.transaction("commit")
1053 1053 trp = weakref.proxy(tr)
1054 1054
1055 1055 # check in files
1056 1056 new = {}
1057 1057 changed = []
1058 1058 linkrev = len(self)
1059 1059 for f in sorted(ctx.modified() + ctx.added()):
1060 1060 self.ui.note(f + "\n")
1061 1061 try:
1062 1062 fctx = ctx[f]
1063 1063 new[f] = self._filecommit(fctx, m1, m2, linkrev, trp,
1064 1064 changed)
1065 1065 m1.set(f, fctx.flags())
1066 1066 except OSError, inst:
1067 1067 self.ui.warn(_("trouble committing %s!\n") % f)
1068 1068 raise
1069 1069 except IOError, inst:
1070 1070 errcode = getattr(inst, 'errno', errno.ENOENT)
1071 1071 if error or errcode and errcode != errno.ENOENT:
1072 1072 self.ui.warn(_("trouble committing %s!\n") % f)
1073 1073 raise
1074 1074 else:
1075 1075 removed.append(f)
1076 1076
1077 1077 # update manifest
1078 1078 m1.update(new)
1079 1079 removed = [f for f in sorted(removed) if f in m1 or f in m2]
1080 1080 drop = [f for f in removed if f in m1]
1081 1081 for f in drop:
1082 1082 del m1[f]
1083 1083 mn = self.manifest.add(m1, trp, linkrev, p1.manifestnode(),
1084 1084 p2.manifestnode(), (new, drop))
1085 1085
1086 1086 # update changelog
1087 1087 self.changelog.delayupdate()
1088 1088 n = self.changelog.add(mn, changed + removed, ctx.description(),
1089 1089 trp, p1.node(), p2.node(),
1090 1090 user, ctx.date(), ctx.extra().copy())
1091 1091 p = lambda: self.changelog.writepending() and self.root or ""
1092 1092 xp1, xp2 = p1.hex(), p2 and p2.hex() or ''
1093 1093 self.hook('pretxncommit', throw=True, node=hex(n), parent1=xp1,
1094 1094 parent2=xp2, pending=p)
1095 1095 self.changelog.finalize(trp)
1096 1096 tr.close()
1097 1097
1098 1098 if self._branchcache:
1099 1099 self.updatebranchcache()
1100 1100 return n
1101 1101 finally:
1102 1102 if tr:
1103 1103 tr.release()
1104 1104 lock.release()
1105 1105
1106 1106 def destroyed(self):
1107 1107 '''Inform the repository that nodes have been destroyed.
1108 1108 Intended for use by strip and rollback, so there's a common
1109 1109 place for anything that has to be done after destroying history.'''
1110 1110 # XXX it might be nice if we could take the list of destroyed
1111 1111 # nodes, but I don't see an easy way for rollback() to do that
1112 1112
1113 1113 # Ensure the persistent tag cache is updated. Doing it now
1114 1114 # means that the tag cache only has to worry about destroyed
1115 1115 # heads immediately after a strip/rollback. That in turn
1116 1116 # guarantees that "cachetip == currenttip" (comparing both rev
1117 1117 # and node) always means no nodes have been added or destroyed.
1118 1118
1119 1119 # XXX this is suboptimal when qrefresh'ing: we strip the current
1120 1120 # head, refresh the tag cache, then immediately add a new head.
1121 1121 # But I think doing it this way is necessary for the "instant
1122 1122 # tag cache retrieval" case to work.
1123 1123 self.invalidatecaches()
1124 1124
1125 1125 def walk(self, match, node=None):
1126 1126 '''
1127 1127 walk recursively through the directory tree or a given
1128 1128 changeset, finding all files matched by the match
1129 1129 function
1130 1130 '''
1131 1131 return self[node].walk(match)
1132 1132
1133 1133 def status(self, node1='.', node2=None, match=None,
1134 1134 ignored=False, clean=False, unknown=False,
1135 1135 listsubrepos=False):
1136 1136 """return status of files between two nodes or node and working directory
1137 1137
1138 1138 If node1 is None, use the first dirstate parent instead.
1139 1139 If node2 is None, compare node1 with working directory.
1140 1140 """
1141 1141
1142 1142 def mfmatches(ctx):
1143 1143 mf = ctx.manifest().copy()
1144 1144 for fn in mf.keys():
1145 1145 if not match(fn):
1146 1146 del mf[fn]
1147 1147 return mf
1148 1148
1149 1149 if isinstance(node1, context.changectx):
1150 1150 ctx1 = node1
1151 1151 else:
1152 1152 ctx1 = self[node1]
1153 1153 if isinstance(node2, context.changectx):
1154 1154 ctx2 = node2
1155 1155 else:
1156 1156 ctx2 = self[node2]
1157 1157
1158 1158 working = ctx2.rev() is None
1159 1159 parentworking = working and ctx1 == self['.']
1160 1160 match = match or matchmod.always(self.root, self.getcwd())
1161 1161 listignored, listclean, listunknown = ignored, clean, unknown
1162 1162
1163 1163 # load earliest manifest first for caching reasons
1164 1164 if not working and ctx2.rev() < ctx1.rev():
1165 1165 ctx2.manifest()
1166 1166
1167 1167 if not parentworking:
1168 1168 def bad(f, msg):
1169 1169 if f not in ctx1:
1170 1170 self.ui.warn('%s: %s\n' % (self.dirstate.pathto(f), msg))
1171 1171 match.bad = bad
1172 1172
1173 1173 if working: # we need to scan the working dir
1174 1174 subrepos = []
1175 1175 if '.hgsub' in self.dirstate:
1176 1176 subrepos = ctx1.substate.keys()
1177 1177 s = self.dirstate.status(match, subrepos, listignored,
1178 1178 listclean, listunknown)
1179 1179 cmp, modified, added, removed, deleted, unknown, ignored, clean = s
1180 1180
1181 1181 # check for any possibly clean files
1182 1182 if parentworking and cmp:
1183 1183 fixup = []
1184 1184 # do a full compare of any files that might have changed
1185 1185 for f in sorted(cmp):
1186 1186 if (f not in ctx1 or ctx2.flags(f) != ctx1.flags(f)
1187 1187 or ctx1[f].cmp(ctx2[f])):
1188 1188 modified.append(f)
1189 1189 else:
1190 1190 fixup.append(f)
1191 1191
1192 1192 # update dirstate for files that are actually clean
1193 1193 if fixup:
1194 1194 if listclean:
1195 1195 clean += fixup
1196 1196
1197 1197 try:
1198 1198 # updating the dirstate is optional
1199 1199 # so we don't wait on the lock
1200 1200 wlock = self.wlock(False)
1201 1201 try:
1202 1202 for f in fixup:
1203 1203 self.dirstate.normal(f)
1204 1204 finally:
1205 1205 wlock.release()
1206 1206 except error.LockError:
1207 1207 pass
1208 1208
1209 1209 if not parentworking:
1210 1210 mf1 = mfmatches(ctx1)
1211 1211 if working:
1212 1212 # we are comparing working dir against non-parent
1213 1213 # generate a pseudo-manifest for the working dir
1214 1214 mf2 = mfmatches(self['.'])
1215 1215 for f in cmp + modified + added:
1216 1216 mf2[f] = None
1217 1217 mf2.set(f, ctx2.flags(f))
1218 1218 for f in removed:
1219 1219 if f in mf2:
1220 1220 del mf2[f]
1221 1221 else:
1222 1222 # we are comparing two revisions
1223 1223 deleted, unknown, ignored = [], [], []
1224 1224 mf2 = mfmatches(ctx2)
1225 1225
1226 1226 modified, added, clean = [], [], []
1227 1227 for fn in mf2:
1228 1228 if fn in mf1:
1229 1229 if (mf1.flags(fn) != mf2.flags(fn) or
1230 1230 (mf1[fn] != mf2[fn] and
1231 1231 (mf2[fn] or ctx1[fn].cmp(ctx2[fn])))):
1232 1232 modified.append(fn)
1233 1233 elif listclean:
1234 1234 clean.append(fn)
1235 1235 del mf1[fn]
1236 1236 else:
1237 1237 added.append(fn)
1238 1238 removed = mf1.keys()
1239 1239
1240 1240 r = modified, added, removed, deleted, unknown, ignored, clean
1241 1241
1242 1242 if listsubrepos:
1243 1243 for subpath, sub in subrepo.itersubrepos(ctx1, ctx2):
1244 1244 if working:
1245 1245 rev2 = None
1246 1246 else:
1247 1247 rev2 = ctx2.substate[subpath][1]
1248 1248 try:
1249 1249 submatch = matchmod.narrowmatcher(subpath, match)
1250 1250 s = sub.status(rev2, match=submatch, ignored=listignored,
1251 1251 clean=listclean, unknown=listunknown,
1252 1252 listsubrepos=True)
1253 1253 for rfiles, sfiles in zip(r, s):
1254 1254 rfiles.extend("%s/%s" % (subpath, f) for f in sfiles)
1255 1255 except error.LookupError:
1256 1256 self.ui.status(_("skipping missing subrepository: %s\n")
1257 1257 % subpath)
1258 1258
1259 1259 for l in r:
1260 1260 l.sort()
1261 1261 return r
1262 1262
1263 1263 def heads(self, start=None):
1264 1264 heads = self.changelog.heads(start)
1265 1265 # sort the output in rev descending order
1266 1266 return sorted(heads, key=self.changelog.rev, reverse=True)
1267 1267
1268 1268 def branchheads(self, branch=None, start=None, closed=False):
1269 1269 '''return a (possibly filtered) list of heads for the given branch
1270 1270
1271 1271 Heads are returned in topological order, from newest to oldest.
1272 1272 If branch is None, use the dirstate branch.
1273 1273 If start is not None, return only heads reachable from start.
1274 1274 If closed is True, return heads that are marked as closed as well.
1275 1275 '''
1276 1276 if branch is None:
1277 1277 branch = self[None].branch()
1278 1278 branches = self.branchmap()
1279 1279 if branch not in branches:
1280 1280 return []
1281 1281 # the cache returns heads ordered lowest to highest
1282 1282 bheads = list(reversed(branches[branch]))
1283 1283 if start is not None:
1284 1284 # filter out the heads that cannot be reached from startrev
1285 1285 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
1286 1286 bheads = [h for h in bheads if h in fbheads]
1287 1287 if not closed:
1288 1288 bheads = [h for h in bheads if
1289 1289 ('close' not in self.changelog.read(h)[5])]
1290 1290 return bheads
1291 1291
1292 1292 def branches(self, nodes):
1293 1293 if not nodes:
1294 1294 nodes = [self.changelog.tip()]
1295 1295 b = []
1296 1296 for n in nodes:
1297 1297 t = n
1298 1298 while 1:
1299 1299 p = self.changelog.parents(n)
1300 1300 if p[1] != nullid or p[0] == nullid:
1301 1301 b.append((t, n, p[0], p[1]))
1302 1302 break
1303 1303 n = p[0]
1304 1304 return b
1305 1305
1306 1306 def between(self, pairs):
1307 1307 r = []
1308 1308
1309 1309 for top, bottom in pairs:
1310 1310 n, l, i = top, [], 0
1311 1311 f = 1
1312 1312
1313 1313 while n != bottom and n != nullid:
1314 1314 p = self.changelog.parents(n)[0]
1315 1315 if i == f:
1316 1316 l.append(n)
1317 1317 f = f * 2
1318 1318 n = p
1319 1319 i += 1
1320 1320
1321 1321 r.append(l)
1322 1322
1323 1323 return r
1324 1324
1325 1325 def pull(self, remote, heads=None, force=False):
1326 1326 lock = self.lock()
1327 1327 try:
1328 1328 usecommon = remote.capable('getbundle')
1329 1329 tmp = discovery.findcommonincoming(self, remote, heads=heads,
1330 1330 force=force, commononly=usecommon)
1331 1331 common, fetch, rheads = tmp
1332 1332 if not fetch:
1333 1333 self.ui.status(_("no changes found\n"))
1334 1334 result = 0
1335 1335 else:
1336 1336 if heads is None and list(common) == [nullid]:
1337 1337 self.ui.status(_("requesting all changes\n"))
1338 1338 elif heads is None and remote.capable('changegroupsubset'):
1339 1339 # issue1320, avoid a race if remote changed after discovery
1340 1340 heads = rheads
1341 1341
1342 1342 if usecommon:
1343 1343 cg = remote.getbundle('pull', common=common,
1344 1344 heads=heads or rheads)
1345 1345 elif heads is None:
1346 1346 cg = remote.changegroup(fetch, 'pull')
1347 1347 elif not remote.capable('changegroupsubset'):
1348 1348 raise util.Abort(_("partial pull cannot be done because "
1349 1349 "other repository doesn't support "
1350 1350 "changegroupsubset."))
1351 1351 else:
1352 1352 cg = remote.changegroupsubset(fetch, heads, 'pull')
1353 1353 result = self.addchangegroup(cg, 'pull', remote.url(),
1354 1354 lock=lock)
1355 1355 finally:
1356 1356 lock.release()
1357 1357
1358 1358 return result
1359 1359
1360 1360 def checkpush(self, force, revs):
1361 1361 """Extensions can override this function if additional checks have
1362 1362 to be performed before pushing, or call it if they override push
1363 1363 command.
1364 1364 """
1365 1365 pass
1366 1366
1367 1367 def push(self, remote, force=False, revs=None, newbranch=False):
1368 1368 '''Push outgoing changesets (limited by revs) from the current
1369 1369 repository to remote. Return an integer:
1370 1370 - 0 means HTTP error *or* nothing to push
1371 1371 - 1 means we pushed and remote head count is unchanged *or*
1372 1372 we have outgoing changesets but refused to push
1373 1373 - other values as described by addchangegroup()
1374 1374 '''
1375 1375 # there are two ways to push to remote repo:
1376 1376 #
1377 1377 # addchangegroup assumes local user can lock remote
1378 1378 # repo (local filesystem, old ssh servers).
1379 1379 #
1380 1380 # unbundle assumes local user cannot lock remote repo (new ssh
1381 1381 # servers, http servers).
1382 1382
1383 1383 self.checkpush(force, revs)
1384 1384 lock = None
1385 1385 unbundle = remote.capable('unbundle')
1386 1386 if not unbundle:
1387 1387 lock = remote.lock()
1388 1388 try:
1389 1389 cg, remote_heads = discovery.prepush(self, remote, force, revs,
1390 1390 newbranch)
1391 1391 ret = remote_heads
1392 1392 if cg is not None:
1393 1393 if unbundle:
1394 1394 # local repo finds heads on server, finds out what
1395 1395 # revs it must push. once revs transferred, if server
1396 1396 # finds it has different heads (someone else won
1397 1397 # commit/push race), server aborts.
1398 1398 if force:
1399 1399 remote_heads = ['force']
1400 1400 # ssh: return remote's addchangegroup()
1401 1401 # http: return remote's addchangegroup() or 0 for error
1402 1402 ret = remote.unbundle(cg, remote_heads, 'push')
1403 1403 else:
1404 1404 # we return an integer indicating remote head count change
1405 1405 ret = remote.addchangegroup(cg, 'push', self.url(),
1406 1406 lock=lock)
1407 1407 finally:
1408 1408 if lock is not None:
1409 1409 lock.release()
1410 1410
1411 1411 self.ui.debug("checking for updated bookmarks\n")
1412 1412 rb = remote.listkeys('bookmarks')
1413 1413 for k in rb.keys():
1414 1414 if k in self._bookmarks:
1415 1415 nr, nl = rb[k], hex(self._bookmarks[k])
1416 1416 if nr in self:
1417 1417 cr = self[nr]
1418 1418 cl = self[nl]
1419 1419 if cl in cr.descendants():
1420 1420 r = remote.pushkey('bookmarks', k, nr, nl)
1421 1421 if r:
1422 1422 self.ui.status(_("updating bookmark %s\n") % k)
1423 1423 else:
1424 1424 self.ui.warn(_('updating bookmark %s'
1425 1425 ' failed!\n') % k)
1426 1426
1427 1427 return ret
1428 1428
1429 1429 def changegroupinfo(self, nodes, source):
1430 1430 if self.ui.verbose or source == 'bundle':
1431 1431 self.ui.status(_("%d changesets found\n") % len(nodes))
1432 1432 if self.ui.debugflag:
1433 1433 self.ui.debug("list of changesets:\n")
1434 1434 for node in nodes:
1435 1435 self.ui.debug("%s\n" % hex(node))
1436 1436
1437 1437 def changegroupsubset(self, bases, heads, source):
1438 1438 """Compute a changegroup consisting of all the nodes that are
1439 1439 descendents of any of the bases and ancestors of any of the heads.
1440 1440 Return a chunkbuffer object whose read() method will return
1441 1441 successive changegroup chunks.
1442 1442
1443 1443 It is fairly complex as determining which filenodes and which
1444 1444 manifest nodes need to be included for the changeset to be complete
1445 1445 is non-trivial.
1446 1446
1447 1447 Another wrinkle is doing the reverse, figuring out which changeset in
1448 1448 the changegroup a particular filenode or manifestnode belongs to.
1449 1449 """
1450 1450 cl = self.changelog
1451 1451 if not bases:
1452 1452 bases = [nullid]
1453 1453 csets, bases, heads = cl.nodesbetween(bases, heads)
1454 1454 # We assume that all ancestors of bases are known
1455 1455 common = set(cl.ancestors(*[cl.rev(n) for n in bases]))
1456 1456 return self._changegroupsubset(common, csets, heads, source)
1457 1457
1458 1458 def getbundle(self, source, heads=None, common=None):
1459 1459 """Like changegroupsubset, but returns the set difference between the
1460 1460 ancestors of heads and the ancestors common.
1461 1461
1462 1462 If heads is None, use the local heads. If common is None, use [nullid].
1463 1463
1464 1464 The nodes in common might not all be known locally due to the way the
1465 1465 current discovery protocol works.
1466 1466 """
1467 1467 cl = self.changelog
1468 1468 if common:
1469 1469 nm = cl.nodemap
1470 1470 common = [n for n in common if n in nm]
1471 1471 else:
1472 1472 common = [nullid]
1473 1473 if not heads:
1474 1474 heads = cl.heads()
1475 1475 common, missing = cl.findcommonmissing(common, heads)
1476 1476 return self._changegroupsubset(common, missing, heads, source)
1477 1477
1478 1478 def _changegroupsubset(self, commonrevs, csets, heads, source):
1479 1479
1480 1480 cl = self.changelog
1481 1481 mf = self.manifest
1482 1482 mfs = {} # needed manifests
1483 1483 fnodes = {} # needed file nodes
1484 1484
1485 1485 # can we go through the fast path ?
1486 1486 heads.sort()
1487 1487 if heads == sorted(self.heads()):
1488 1488 return self._changegroup(csets, source)
1489 1489
1490 1490 # slow path
1491 1491 self.hook('preoutgoing', throw=True, source=source)
1492 1492 self.changegroupinfo(csets, source)
1493 1493
1494 1494 # A function generating function that sets up the initial environment
1495 1495 # the inner function.
1496 1496 def filenode_collector(changedfiles):
1497 1497 # This gathers information from each manifestnode included in the
1498 1498 # changegroup about which filenodes the manifest node references
1499 1499 # so we can include those in the changegroup too.
1500 1500 #
1501 1501 # It also remembers which changenode each filenode belongs to. It
1502 1502 # does this by assuming the a filenode belongs to the changenode
1503 1503 # the first manifest that references it belongs to.
1504 1504 def collect(mnode):
1505 1505 r = mf.rev(mnode)
1506 1506 clnode = mfs[mnode]
1507 1507 mdata = mf.readfast(mnode)
1508 1508 for f in changedfiles:
1509 1509 if f in mdata:
1510 1510 fnodes.setdefault(f, {}).setdefault(mdata[f], clnode)
1511 1511
1512 1512 return collect
1513 1513
1514 1514 # If we determine that a particular file or manifest node must be a
1515 1515 # node that the recipient of the changegroup will already have, we can
1516 1516 # also assume the recipient will have all the parents. This function
1517 1517 # prunes them from the set of missing nodes.
1518 1518 def prune(revlog, missingnodes):
1519 1519 # drop any nodes that claim to be part of a cset in commonrevs
1520 1520 drop = set()
1521 1521 for n in missingnodes:
1522 1522 if revlog.linkrev(revlog.rev(n)) in commonrevs:
1523 1523 drop.add(n)
1524 1524 for n in drop:
1525 1525 missingnodes.pop(n, None)
1526 1526
1527 1527 # Now that we have all theses utility functions to help out and
1528 1528 # logically divide up the task, generate the group.
1529 1529 def gengroup():
1530 1530 # The set of changed files starts empty.
1531 1531 changedfiles = set()
1532
1532 1533 collect = changegroup.collector(cl, mfs, changedfiles)
1534 def clookup(x):
1535 collect(x)
1536 return x
1533 1537
1534 1538 # Create a changenode group generator that will call our functions
1535 1539 # back to lookup the owning changenode and collect information.
1536 group = cl.group(csets, lambda x: x, collect)
1540 group = cl.group(csets, clookup)
1537 1541 for count, chunk in enumerate(group):
1538 1542 yield chunk
1539 1543 # revlog.group yields three entries per node, so
1540 1544 # dividing by 3 gives an approximation of how many
1541 1545 # nodes have been processed.
1542 1546 self.ui.progress(_('bundling'), count / 3,
1543 1547 unit=_('changesets'))
1544 1548 changecount = count / 3
1545 1549 efiles = len(changedfiles)
1546 1550 self.ui.progress(_('bundling'), None)
1547 1551
1548 1552 prune(mf, mfs)
1549 1553 # Create a generator for the manifestnodes that calls our lookup
1550 1554 # and data collection functions back.
1551 group = mf.group(sorted(mfs, key=mf.rev),
1552 lambda mnode: mfs[mnode],
1553 filenode_collector(changedfiles))
1555 fcollect = filenode_collector(changedfiles)
1556 def mlookup(x):
1557 fcollect(x)
1558 return mfs[x]
1559
1560 group = mf.group(sorted(mfs, key=mf.rev), mlookup)
1554 1561 for count, chunk in enumerate(group):
1555 1562 yield chunk
1556 1563 # see above comment for why we divide by 3
1557 1564 self.ui.progress(_('bundling'), count / 3,
1558 1565 unit=_('manifests'), total=changecount)
1559 1566 self.ui.progress(_('bundling'), None)
1560 1567
1561 1568 mfs.clear()
1562 1569
1563 1570 # Go through all our files in order sorted by name.
1564 1571 for idx, fname in enumerate(sorted(changedfiles)):
1565 1572 filerevlog = self.file(fname)
1566 1573 if not len(filerevlog):
1567 1574 raise util.Abort(_("empty or missing revlog for %s") % fname)
1568 1575 # Toss out the filenodes that the recipient isn't really
1569 1576 # missing.
1570 1577 missingfnodes = fnodes.pop(fname, {})
1571 1578 prune(filerevlog, missingfnodes)
1572 1579 # If any filenodes are left, generate the group for them,
1573 1580 # otherwise don't bother.
1574 1581 if missingfnodes:
1575 1582 yield changegroup.chunkheader(len(fname))
1576 1583 yield fname
1577 1584 # Create a group generator and only pass in a changenode
1578 1585 # lookup function as we need to collect no information
1579 1586 # from filenodes.
1587 def flookup(x):
1588 return missingfnodes[x]
1589
1580 1590 group = filerevlog.group(
1581 1591 sorted(missingfnodes, key=filerevlog.rev),
1582 lambda fnode: missingfnodes[fnode])
1592 flookup)
1583 1593 for chunk in group:
1584 1594 # even though we print the same progress on
1585 1595 # most loop iterations, put the progress call
1586 1596 # here so that time estimates (if any) can be updated
1587 1597 self.ui.progress(
1588 1598 _('bundling'), idx, item=fname,
1589 1599 unit=_('files'), total=efiles)
1590 1600 yield chunk
1591 1601 # Signal that no more groups are left.
1592 1602 yield changegroup.closechunk()
1593 1603 self.ui.progress(_('bundling'), None)
1594 1604
1595 1605 if csets:
1596 1606 self.hook('outgoing', node=hex(csets[0]), source=source)
1597 1607
1598 1608 return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
1599 1609
1600 1610 def changegroup(self, basenodes, source):
1601 1611 # to avoid a race we use changegroupsubset() (issue1320)
1602 1612 return self.changegroupsubset(basenodes, self.heads(), source)
1603 1613
1604 1614 def _changegroup(self, nodes, source):
1605 1615 """Compute the changegroup of all nodes that we have that a recipient
1606 1616 doesn't. Return a chunkbuffer object whose read() method will return
1607 1617 successive changegroup chunks.
1608 1618
1609 1619 This is much easier than the previous function as we can assume that
1610 1620 the recipient has any changenode we aren't sending them.
1611 1621
1612 1622 nodes is the set of nodes to send"""
1613 1623
1614 1624 self.hook('preoutgoing', throw=True, source=source)
1615 1625
1616 1626 cl = self.changelog
1617 1627 revset = set([cl.rev(n) for n in nodes])
1618 1628 self.changegroupinfo(nodes, source)
1619 1629
1620 1630 def gennodelst(log):
1621 1631 for r in log:
1622 1632 if log.linkrev(r) in revset:
1623 1633 yield log.node(r)
1624 1634
1625 1635 def lookuplinkrev_func(revlog):
1626 1636 def lookuplinkrev(n):
1627 1637 return cl.node(revlog.linkrev(revlog.rev(n)))
1628 1638 return lookuplinkrev
1629 1639
1630 1640 def gengroup():
1631 1641 '''yield a sequence of changegroup chunks (strings)'''
1632 1642 # construct a list of all changed files
1633 1643 changedfiles = set()
1634 1644 mmfs = {}
1635 collect = changegroup.collector(cl, mmfs, changedfiles)
1636 1645
1637 for count, chunk in enumerate(cl.group(nodes, lambda x: x, collect)):
1646 collect = changegroup.collector(cl, mmfs, changedfiles)
1647 def clookup(x):
1648 collect(x)
1649 return x
1650
1651 for count, chunk in enumerate(cl.group(nodes, clookup)):
1638 1652 # revlog.group yields three entries per node, so
1639 1653 # dividing by 3 gives an approximation of how many
1640 1654 # nodes have been processed.
1641 1655 self.ui.progress(_('bundling'), count / 3, unit=_('changesets'))
1642 1656 yield chunk
1643 1657 efiles = len(changedfiles)
1644 1658 changecount = count / 3
1645 1659 self.ui.progress(_('bundling'), None)
1646 1660
1647 1661 mnfst = self.manifest
1648 1662 nodeiter = gennodelst(mnfst)
1649 for count, chunk in enumerate(mnfst.group(nodeiter,
1650 lookuplinkrev_func(mnfst))):
1663 mfunc = lookuplinkrev_func(mnfst)
1664 def mlookup(x):
1665 return mfunc(x)
1666
1667 for count, chunk in enumerate(mnfst.group(nodeiter, mlookup)):
1651 1668 # see above comment for why we divide by 3
1652 1669 self.ui.progress(_('bundling'), count / 3,
1653 1670 unit=_('manifests'), total=changecount)
1654 1671 yield chunk
1655 1672 self.ui.progress(_('bundling'), None)
1656 1673
1657 1674 for idx, fname in enumerate(sorted(changedfiles)):
1658 1675 filerevlog = self.file(fname)
1659 1676 if not len(filerevlog):
1660 1677 raise util.Abort(_("empty or missing revlog for %s") % fname)
1661 1678 nodeiter = gennodelst(filerevlog)
1662 1679 nodeiter = list(nodeiter)
1663 1680 if nodeiter:
1664 1681 yield changegroup.chunkheader(len(fname))
1665 1682 yield fname
1666 lookup = lookuplinkrev_func(filerevlog)
1667 for chunk in filerevlog.group(nodeiter, lookup):
1683 ffunc = lookuplinkrev_func(filerevlog)
1684 def flookup(x):
1685 return ffunc(x)
1686
1687 for chunk in filerevlog.group(nodeiter, flookup):
1668 1688 self.ui.progress(
1669 1689 _('bundling'), idx, item=fname,
1670 1690 total=efiles, unit=_('files'))
1671 1691 yield chunk
1672 1692 self.ui.progress(_('bundling'), None)
1673 1693
1674 1694 yield changegroup.closechunk()
1675 1695
1676 1696 if nodes:
1677 1697 self.hook('outgoing', node=hex(nodes[0]), source=source)
1678 1698
1679 1699 return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
1680 1700
1681 1701 def addchangegroup(self, source, srctype, url, emptyok=False, lock=None):
1682 1702 """Add the changegroup returned by source.read() to this repo.
1683 1703 srctype is a string like 'push', 'pull', or 'unbundle'. url is
1684 1704 the URL of the repo where this changegroup is coming from.
1685 1705 If lock is not None, the function takes ownership of the lock
1686 1706 and releases it after the changegroup is added.
1687 1707
1688 1708 Return an integer summarizing the change to this repo:
1689 1709 - nothing changed or no source: 0
1690 1710 - more heads than before: 1+added heads (2..n)
1691 1711 - fewer heads than before: -1-removed heads (-2..-n)
1692 1712 - number of heads stays the same: 1
1693 1713 """
1694 1714 def csmap(x):
1695 1715 self.ui.debug("add changeset %s\n" % short(x))
1696 1716 return len(cl)
1697 1717
1698 1718 def revmap(x):
1699 1719 return cl.rev(x)
1700 1720
1701 1721 if not source:
1702 1722 return 0
1703 1723
1704 1724 self.hook('prechangegroup', throw=True, source=srctype, url=url)
1705 1725
1706 1726 changesets = files = revisions = 0
1707 1727 efiles = set()
1708 1728
1709 1729 # write changelog data to temp files so concurrent readers will not see
1710 1730 # inconsistent view
1711 1731 cl = self.changelog
1712 1732 cl.delayupdate()
1713 1733 oldheads = len(cl.heads())
1714 1734
1715 1735 tr = self.transaction("\n".join([srctype, urlmod.hidepassword(url)]))
1716 1736 try:
1717 1737 trp = weakref.proxy(tr)
1718 1738 # pull off the changeset group
1719 1739 self.ui.status(_("adding changesets\n"))
1720 1740 clstart = len(cl)
1721 1741 class prog(object):
1722 1742 step = _('changesets')
1723 1743 count = 1
1724 1744 ui = self.ui
1725 1745 total = None
1726 1746 def __call__(self):
1727 1747 self.ui.progress(self.step, self.count, unit=_('chunks'),
1728 1748 total=self.total)
1729 1749 self.count += 1
1730 1750 pr = prog()
1731 1751 source.callback = pr
1732 1752
1733 1753 if (cl.addgroup(source, csmap, trp) is None
1734 1754 and not emptyok):
1735 1755 raise util.Abort(_("received changelog group is empty"))
1736 1756 clend = len(cl)
1737 1757 changesets = clend - clstart
1738 1758 for c in xrange(clstart, clend):
1739 1759 efiles.update(self[c].files())
1740 1760 efiles = len(efiles)
1741 1761 self.ui.progress(_('changesets'), None)
1742 1762
1743 1763 # pull off the manifest group
1744 1764 self.ui.status(_("adding manifests\n"))
1745 1765 pr.step = _('manifests')
1746 1766 pr.count = 1
1747 1767 pr.total = changesets # manifests <= changesets
1748 1768 # no need to check for empty manifest group here:
1749 1769 # if the result of the merge of 1 and 2 is the same in 3 and 4,
1750 1770 # no new manifest will be created and the manifest group will
1751 1771 # be empty during the pull
1752 1772 self.manifest.addgroup(source, revmap, trp)
1753 1773 self.ui.progress(_('manifests'), None)
1754 1774
1755 1775 needfiles = {}
1756 1776 if self.ui.configbool('server', 'validate', default=False):
1757 1777 # validate incoming csets have their manifests
1758 1778 for cset in xrange(clstart, clend):
1759 1779 mfest = self.changelog.read(self.changelog.node(cset))[0]
1760 1780 mfest = self.manifest.readdelta(mfest)
1761 1781 # store file nodes we must see
1762 1782 for f, n in mfest.iteritems():
1763 1783 needfiles.setdefault(f, set()).add(n)
1764 1784
1765 1785 # process the files
1766 1786 self.ui.status(_("adding file changes\n"))
1767 1787 pr.step = 'files'
1768 1788 pr.count = 1
1769 1789 pr.total = efiles
1770 1790 source.callback = None
1771 1791
1772 1792 while 1:
1773 1793 f = source.chunk()
1774 1794 if not f:
1775 1795 break
1776 1796 self.ui.debug("adding %s revisions\n" % f)
1777 1797 pr()
1778 1798 fl = self.file(f)
1779 1799 o = len(fl)
1780 1800 if fl.addgroup(source, revmap, trp) is None:
1781 1801 raise util.Abort(_("received file revlog group is empty"))
1782 1802 revisions += len(fl) - o
1783 1803 files += 1
1784 1804 if f in needfiles:
1785 1805 needs = needfiles[f]
1786 1806 for new in xrange(o, len(fl)):
1787 1807 n = fl.node(new)
1788 1808 if n in needs:
1789 1809 needs.remove(n)
1790 1810 if not needs:
1791 1811 del needfiles[f]
1792 1812 self.ui.progress(_('files'), None)
1793 1813
1794 1814 for f, needs in needfiles.iteritems():
1795 1815 fl = self.file(f)
1796 1816 for n in needs:
1797 1817 try:
1798 1818 fl.rev(n)
1799 1819 except error.LookupError:
1800 1820 raise util.Abort(
1801 1821 _('missing file data for %s:%s - run hg verify') %
1802 1822 (f, hex(n)))
1803 1823
1804 1824 newheads = len(cl.heads())
1805 1825 heads = ""
1806 1826 if oldheads and newheads != oldheads:
1807 1827 heads = _(" (%+d heads)") % (newheads - oldheads)
1808 1828
1809 1829 self.ui.status(_("added %d changesets"
1810 1830 " with %d changes to %d files%s\n")
1811 1831 % (changesets, revisions, files, heads))
1812 1832
1813 1833 if changesets > 0:
1814 1834 p = lambda: cl.writepending() and self.root or ""
1815 1835 self.hook('pretxnchangegroup', throw=True,
1816 1836 node=hex(cl.node(clstart)), source=srctype,
1817 1837 url=url, pending=p)
1818 1838
1819 1839 # make changelog see real files again
1820 1840 cl.finalize(trp)
1821 1841
1822 1842 tr.close()
1823 1843 finally:
1824 1844 tr.release()
1825 1845 if lock:
1826 1846 lock.release()
1827 1847
1828 1848 if changesets > 0:
1829 1849 # forcefully update the on-disk branch cache
1830 1850 self.ui.debug("updating the branch cache\n")
1831 1851 self.updatebranchcache()
1832 1852 self.hook("changegroup", node=hex(cl.node(clstart)),
1833 1853 source=srctype, url=url)
1834 1854
1835 1855 for i in xrange(clstart, clend):
1836 1856 self.hook("incoming", node=hex(cl.node(i)),
1837 1857 source=srctype, url=url)
1838 1858
1839 1859 # never return 0 here:
1840 1860 if newheads < oldheads:
1841 1861 return newheads - oldheads - 1
1842 1862 else:
1843 1863 return newheads - oldheads + 1
1844 1864
1845 1865
1846 1866 def stream_in(self, remote, requirements):
1847 1867 lock = self.lock()
1848 1868 try:
1849 1869 fp = remote.stream_out()
1850 1870 l = fp.readline()
1851 1871 try:
1852 1872 resp = int(l)
1853 1873 except ValueError:
1854 1874 raise error.ResponseError(
1855 1875 _('Unexpected response from remote server:'), l)
1856 1876 if resp == 1:
1857 1877 raise util.Abort(_('operation forbidden by server'))
1858 1878 elif resp == 2:
1859 1879 raise util.Abort(_('locking the remote repository failed'))
1860 1880 elif resp != 0:
1861 1881 raise util.Abort(_('the server sent an unknown error code'))
1862 1882 self.ui.status(_('streaming all changes\n'))
1863 1883 l = fp.readline()
1864 1884 try:
1865 1885 total_files, total_bytes = map(int, l.split(' ', 1))
1866 1886 except (ValueError, TypeError):
1867 1887 raise error.ResponseError(
1868 1888 _('Unexpected response from remote server:'), l)
1869 1889 self.ui.status(_('%d files to transfer, %s of data\n') %
1870 1890 (total_files, util.bytecount(total_bytes)))
1871 1891 start = time.time()
1872 1892 for i in xrange(total_files):
1873 1893 # XXX doesn't support '\n' or '\r' in filenames
1874 1894 l = fp.readline()
1875 1895 try:
1876 1896 name, size = l.split('\0', 1)
1877 1897 size = int(size)
1878 1898 except (ValueError, TypeError):
1879 1899 raise error.ResponseError(
1880 1900 _('Unexpected response from remote server:'), l)
1881 1901 self.ui.debug('adding %s (%s)\n' % (name, util.bytecount(size)))
1882 1902 # for backwards compat, name was partially encoded
1883 1903 ofp = self.sopener(store.decodedir(name), 'w')
1884 1904 for chunk in util.filechunkiter(fp, limit=size):
1885 1905 ofp.write(chunk)
1886 1906 ofp.close()
1887 1907 elapsed = time.time() - start
1888 1908 if elapsed <= 0:
1889 1909 elapsed = 0.001
1890 1910 self.ui.status(_('transferred %s in %.1f seconds (%s/sec)\n') %
1891 1911 (util.bytecount(total_bytes), elapsed,
1892 1912 util.bytecount(total_bytes / elapsed)))
1893 1913
1894 1914 # new requirements = old non-format requirements + new format-related
1895 1915 # requirements from the streamed-in repository
1896 1916 requirements.update(set(self.requirements) - self.supportedformats)
1897 1917 self._applyrequirements(requirements)
1898 1918 self._writerequirements()
1899 1919
1900 1920 self.invalidate()
1901 1921 return len(self.heads()) + 1
1902 1922 finally:
1903 1923 lock.release()
1904 1924
1905 1925 def clone(self, remote, heads=[], stream=False):
1906 1926 '''clone remote repository.
1907 1927
1908 1928 keyword arguments:
1909 1929 heads: list of revs to clone (forces use of pull)
1910 1930 stream: use streaming clone if possible'''
1911 1931
1912 1932 # now, all clients that can request uncompressed clones can
1913 1933 # read repo formats supported by all servers that can serve
1914 1934 # them.
1915 1935
1916 1936 # if revlog format changes, client will have to check version
1917 1937 # and format flags on "stream" capability, and use
1918 1938 # uncompressed only if compatible.
1919 1939
1920 1940 if stream and not heads:
1921 1941 # 'stream' means remote revlog format is revlogv1 only
1922 1942 if remote.capable('stream'):
1923 1943 return self.stream_in(remote, set(('revlogv1',)))
1924 1944 # otherwise, 'streamreqs' contains the remote revlog format
1925 1945 streamreqs = remote.capable('streamreqs')
1926 1946 if streamreqs:
1927 1947 streamreqs = set(streamreqs.split(','))
1928 1948 # if we support it, stream in and adjust our requirements
1929 1949 if not streamreqs - self.supportedformats:
1930 1950 return self.stream_in(remote, streamreqs)
1931 1951 return self.pull(remote, heads)
1932 1952
1933 1953 def pushkey(self, namespace, key, old, new):
1934 1954 return pushkey.push(self, namespace, key, old, new)
1935 1955
1936 1956 def listkeys(self, namespace):
1937 1957 return pushkey.list(self, namespace)
1938 1958
1939 1959 def debugwireargs(self, one, two, three=None, four=None):
1940 1960 '''used to test argument passing over the wire'''
1941 1961 return "%s %s %s %s" % (one, two, three, four)
1942 1962
1943 1963 # used to avoid circular references so destructors work
1944 1964 def aftertrans(files):
1945 1965 renamefiles = [tuple(t) for t in files]
1946 1966 def a():
1947 1967 for src, dest in renamefiles:
1948 1968 util.rename(src, dest)
1949 1969 return a
1950 1970
1951 1971 def instance(ui, path, create):
1952 1972 return localrepository(ui, util.drop_scheme('file', path), create)
1953 1973
1954 1974 def islocal(path):
1955 1975 return True
@@ -1,1276 +1,1273 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47
48 48 def getoffset(q):
49 49 return int(q >> 16)
50 50
51 51 def gettype(q):
52 52 return int(q & 0xFFFF)
53 53
54 54 def offset_type(offset, type):
55 55 return long(long(offset) << 16 | type)
56 56
57 57 nullhash = _sha(nullid)
58 58
59 59 def hash(text, p1, p2):
60 60 """generate a hash from the given text and its parent hashes
61 61
62 62 This hash combines both the current file contents and its history
63 63 in a manner that makes it easy to distinguish nodes with the same
64 64 content in the revision graph.
65 65 """
66 66 # As of now, if one of the parent node is null, p2 is null
67 67 if p2 == nullid:
68 68 # deep copy of a hash is faster than creating one
69 69 s = nullhash.copy()
70 70 s.update(p1)
71 71 else:
72 72 # none of the parent nodes are nullid
73 73 l = [p1, p2]
74 74 l.sort()
75 75 s = _sha(l[0])
76 76 s.update(l[1])
77 77 s.update(text)
78 78 return s.digest()
79 79
80 80 def compress(text):
81 81 """ generate a possibly-compressed representation of text """
82 82 if not text:
83 83 return ("", text)
84 84 l = len(text)
85 85 bin = None
86 86 if l < 44:
87 87 pass
88 88 elif l > 1000000:
89 89 # zlib makes an internal copy, thus doubling memory usage for
90 90 # large files, so lets do this in pieces
91 91 z = zlib.compressobj()
92 92 p = []
93 93 pos = 0
94 94 while pos < l:
95 95 pos2 = pos + 2**20
96 96 p.append(z.compress(text[pos:pos2]))
97 97 pos = pos2
98 98 p.append(z.flush())
99 99 if sum(map(len, p)) < l:
100 100 bin = "".join(p)
101 101 else:
102 102 bin = _compress(text)
103 103 if bin is None or len(bin) > l:
104 104 if text[0] == '\0':
105 105 return ("", text)
106 106 return ('u', text)
107 107 return ("", bin)
108 108
109 109 def decompress(bin):
110 110 """ decompress the given input """
111 111 if not bin:
112 112 return bin
113 113 t = bin[0]
114 114 if t == '\0':
115 115 return bin
116 116 if t == 'x':
117 117 return _decompress(bin)
118 118 if t == 'u':
119 119 return bin[1:]
120 120 raise RevlogError(_("unknown compression type %r") % t)
121 121
122 122 indexformatv0 = ">4l20s20s20s"
123 123 v0shaoffset = 56
124 124
125 125 class revlogoldio(object):
126 126 def __init__(self):
127 127 self.size = struct.calcsize(indexformatv0)
128 128
129 129 def parseindex(self, data, inline):
130 130 s = self.size
131 131 index = []
132 132 nodemap = {nullid: nullrev}
133 133 n = off = 0
134 134 l = len(data)
135 135 while off + s <= l:
136 136 cur = data[off:off + s]
137 137 off += s
138 138 e = _unpack(indexformatv0, cur)
139 139 # transform to revlogv1 format
140 140 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 141 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 142 index.append(e2)
143 143 nodemap[e[6]] = n
144 144 n += 1
145 145
146 146 # add the magic null revision at -1
147 147 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 148
149 149 return index, nodemap, None
150 150
151 151 def packentry(self, entry, node, version, rev):
152 152 if gettype(entry[0]):
153 153 raise RevlogError(_("index entry flags need RevlogNG"))
154 154 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 155 node(entry[5]), node(entry[6]), entry[7])
156 156 return _pack(indexformatv0, *e2)
157 157
158 158 # index ng:
159 159 # 6 bytes: offset
160 160 # 2 bytes: flags
161 161 # 4 bytes: compressed length
162 162 # 4 bytes: uncompressed length
163 163 # 4 bytes: base rev
164 164 # 4 bytes: link rev
165 165 # 4 bytes: parent 1 rev
166 166 # 4 bytes: parent 2 rev
167 167 # 32 bytes: nodeid
168 168 indexformatng = ">Qiiiiii20s12x"
169 169 ngshaoffset = 32
170 170 versionformat = ">I"
171 171
172 172 class revlogio(object):
173 173 def __init__(self):
174 174 self.size = struct.calcsize(indexformatng)
175 175
176 176 def parseindex(self, data, inline):
177 177 # call the C implementation to parse the index data
178 178 index, cache = parsers.parse_index2(data, inline)
179 179 return index, None, cache
180 180
181 181 def packentry(self, entry, node, version, rev):
182 182 p = _pack(indexformatng, *entry)
183 183 if rev == 0:
184 184 p = _pack(versionformat, version) + p[4:]
185 185 return p
186 186
187 187 class revlog(object):
188 188 """
189 189 the underlying revision storage object
190 190
191 191 A revlog consists of two parts, an index and the revision data.
192 192
193 193 The index is a file with a fixed record size containing
194 194 information on each revision, including its nodeid (hash), the
195 195 nodeids of its parents, the position and offset of its data within
196 196 the data file, and the revision it's based on. Finally, each entry
197 197 contains a linkrev entry that can serve as a pointer to external
198 198 data.
199 199
200 200 The revision data itself is a linear collection of data chunks.
201 201 Each chunk represents a revision and is usually represented as a
202 202 delta against the previous chunk. To bound lookup time, runs of
203 203 deltas are limited to about 2 times the length of the original
204 204 version data. This makes retrieval of a version proportional to
205 205 its size, or O(1) relative to the number of revisions.
206 206
207 207 Both pieces of the revlog are written to in an append-only
208 208 fashion, which means we never need to rewrite a file to insert or
209 209 remove data, and can use some simple techniques to avoid the need
210 210 for locking while reading.
211 211 """
212 212 def __init__(self, opener, indexfile, shallowroot=None):
213 213 """
214 214 create a revlog object
215 215
216 216 opener is a function that abstracts the file opening operation
217 217 and can be used to implement COW semantics or the like.
218 218 """
219 219 self.indexfile = indexfile
220 220 self.datafile = indexfile[:-2] + ".d"
221 221 self.opener = opener
222 222 self._cache = None
223 223 self._chunkcache = (0, '')
224 224 self.index = []
225 225 self._shallowroot = shallowroot
226 226 self._parentdelta = 0
227 227 self._pcache = {}
228 228 self._nodecache = {nullid: nullrev}
229 229 self._nodepos = None
230 230
231 231 v = REVLOG_DEFAULT_VERSION
232 232 if hasattr(opener, 'options') and 'defversion' in opener.options:
233 233 v = opener.options['defversion']
234 234 if v & REVLOGNG:
235 235 v |= REVLOGNGINLINEDATA
236 236 if v & REVLOGNG and 'parentdelta' in opener.options:
237 237 self._parentdelta = 1
238 238
239 239 if shallowroot:
240 240 v |= REVLOGSHALLOW
241 241
242 242 i = ''
243 243 try:
244 244 f = self.opener(self.indexfile)
245 245 i = f.read()
246 246 f.close()
247 247 if len(i) > 0:
248 248 v = struct.unpack(versionformat, i[:4])[0]
249 249 except IOError, inst:
250 250 if inst.errno != errno.ENOENT:
251 251 raise
252 252
253 253 self.version = v
254 254 self._inline = v & REVLOGNGINLINEDATA
255 255 self._shallow = v & REVLOGSHALLOW
256 256 flags = v & ~0xFFFF
257 257 fmt = v & 0xFFFF
258 258 if fmt == REVLOGV0 and flags:
259 259 raise RevlogError(_("index %s unknown flags %#04x for format v0")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
262 262 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
263 263 % (self.indexfile, flags >> 16))
264 264 elif fmt > REVLOGNG:
265 265 raise RevlogError(_("index %s unknown format %d")
266 266 % (self.indexfile, fmt))
267 267
268 268 self._io = revlogio()
269 269 if self.version == REVLOGV0:
270 270 self._io = revlogoldio()
271 271 try:
272 272 d = self._io.parseindex(i, self._inline)
273 273 except (ValueError, IndexError):
274 274 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
275 275 self.index, nodemap, self._chunkcache = d
276 276 if nodemap is not None:
277 277 self.nodemap = self._nodecache = nodemap
278 278 if not self._chunkcache:
279 279 self._chunkclear()
280 280
281 281 def tip(self):
282 282 return self.node(len(self.index) - 2)
283 283 def __len__(self):
284 284 return len(self.index) - 1
285 285 def __iter__(self):
286 286 for i in xrange(len(self)):
287 287 yield i
288 288
289 289 @util.propertycache
290 290 def nodemap(self):
291 291 n = self.rev(self.node(0))
292 292 return self._nodecache
293 293
294 294 def rev(self, node):
295 295 try:
296 296 return self._nodecache[node]
297 297 except KeyError:
298 298 n = self._nodecache
299 299 i = self.index
300 300 p = self._nodepos
301 301 if p is None:
302 302 p = len(i) - 2
303 303 for r in xrange(p, -1, -1):
304 304 v = i[r][7]
305 305 n[v] = r
306 306 if v == node:
307 307 self._nodepos = r - 1
308 308 return r
309 309 raise LookupError(node, self.indexfile, _('no node'))
310 310
311 311 def node(self, rev):
312 312 return self.index[rev][7]
313 313 def linkrev(self, rev):
314 314 return self.index[rev][4]
315 315 def parents(self, node):
316 316 i = self.index
317 317 d = i[self.rev(node)]
318 318 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
319 319 def parentrevs(self, rev):
320 320 return self.index[rev][5:7]
321 321 def start(self, rev):
322 322 return int(self.index[rev][0] >> 16)
323 323 def end(self, rev):
324 324 return self.start(rev) + self.length(rev)
325 325 def length(self, rev):
326 326 return self.index[rev][1]
327 327 def base(self, rev):
328 328 return self.index[rev][3]
329 329 def flags(self, rev):
330 330 return self.index[rev][0] & 0xFFFF
331 331 def rawsize(self, rev):
332 332 """return the length of the uncompressed text for a given revision"""
333 333 l = self.index[rev][2]
334 334 if l >= 0:
335 335 return l
336 336
337 337 t = self.revision(self.node(rev))
338 338 return len(t)
339 339 size = rawsize
340 340
341 341 def reachable(self, node, stop=None):
342 342 """return the set of all nodes ancestral to a given node, including
343 343 the node itself, stopping when stop is matched"""
344 344 reachable = set((node,))
345 345 visit = [node]
346 346 if stop:
347 347 stopn = self.rev(stop)
348 348 else:
349 349 stopn = 0
350 350 while visit:
351 351 n = visit.pop(0)
352 352 if n == stop:
353 353 continue
354 354 if n == nullid:
355 355 continue
356 356 for p in self.parents(n):
357 357 if self.rev(p) < stopn:
358 358 continue
359 359 if p not in reachable:
360 360 reachable.add(p)
361 361 visit.append(p)
362 362 return reachable
363 363
364 364 def ancestors(self, *revs):
365 365 """Generate the ancestors of 'revs' in reverse topological order.
366 366
367 367 Yield a sequence of revision numbers starting with the parents
368 368 of each revision in revs, i.e., each revision is *not* considered
369 369 an ancestor of itself. Results are in breadth-first order:
370 370 parents of each rev in revs, then parents of those, etc. Result
371 371 does not include the null revision."""
372 372 visit = list(revs)
373 373 seen = set([nullrev])
374 374 while visit:
375 375 for parent in self.parentrevs(visit.pop(0)):
376 376 if parent not in seen:
377 377 visit.append(parent)
378 378 seen.add(parent)
379 379 yield parent
380 380
381 381 def descendants(self, *revs):
382 382 """Generate the descendants of 'revs' in revision order.
383 383
384 384 Yield a sequence of revision numbers starting with a child of
385 385 some rev in revs, i.e., each revision is *not* considered a
386 386 descendant of itself. Results are ordered by revision number (a
387 387 topological sort)."""
388 388 first = min(revs)
389 389 if first == nullrev:
390 390 for i in self:
391 391 yield i
392 392 return
393 393
394 394 seen = set(revs)
395 395 for i in xrange(first + 1, len(self)):
396 396 for x in self.parentrevs(i):
397 397 if x != nullrev and x in seen:
398 398 seen.add(i)
399 399 yield i
400 400 break
401 401
402 402 def findcommonmissing(self, common=None, heads=None):
403 403 """Return a tuple of the ancestors of common and the ancestors of heads
404 404 that are not ancestors of common.
405 405
406 406 More specifically, the second element is a list of nodes N such that
407 407 every N satisfies the following constraints:
408 408
409 409 1. N is an ancestor of some node in 'heads'
410 410 2. N is not an ancestor of any node in 'common'
411 411
412 412 The list is sorted by revision number, meaning it is
413 413 topologically sorted.
414 414
415 415 'heads' and 'common' are both lists of node IDs. If heads is
416 416 not supplied, uses all of the revlog's heads. If common is not
417 417 supplied, uses nullid."""
418 418 if common is None:
419 419 common = [nullid]
420 420 if heads is None:
421 421 heads = self.heads()
422 422
423 423 common = [self.rev(n) for n in common]
424 424 heads = [self.rev(n) for n in heads]
425 425
426 426 # we want the ancestors, but inclusive
427 427 has = set(self.ancestors(*common))
428 428 has.add(nullrev)
429 429 has.update(common)
430 430
431 431 # take all ancestors from heads that aren't in has
432 432 missing = set()
433 433 visit = [r for r in heads if r not in has]
434 434 while visit:
435 435 r = visit.pop(0)
436 436 if r in missing:
437 437 continue
438 438 else:
439 439 missing.add(r)
440 440 for p in self.parentrevs(r):
441 441 if p not in has:
442 442 visit.append(p)
443 443 missing = list(missing)
444 444 missing.sort()
445 445 return has, [self.node(r) for r in missing]
446 446
447 447 def findmissing(self, common=None, heads=None):
448 448 """Return the ancestors of heads that are not ancestors of common.
449 449
450 450 More specifically, return a list of nodes N such that every N
451 451 satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of node IDs. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 _common, missing = self.findcommonmissing(common, heads)
463 463 return missing
464 464
465 465 def nodesbetween(self, roots=None, heads=None):
466 466 """Return a topological path from 'roots' to 'heads'.
467 467
468 468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
469 469 topologically sorted list of all nodes N that satisfy both of
470 470 these constraints:
471 471
472 472 1. N is a descendant of some node in 'roots'
473 473 2. N is an ancestor of some node in 'heads'
474 474
475 475 Every node is considered to be both a descendant and an ancestor
476 476 of itself, so every reachable node in 'roots' and 'heads' will be
477 477 included in 'nodes'.
478 478
479 479 'outroots' is the list of reachable nodes in 'roots', i.e., the
480 480 subset of 'roots' that is returned in 'nodes'. Likewise,
481 481 'outheads' is the subset of 'heads' that is also in 'nodes'.
482 482
483 483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
484 484 unspecified, uses nullid as the only root. If 'heads' is
485 485 unspecified, uses list of all of the revlog's heads."""
486 486 nonodes = ([], [], [])
487 487 if roots is not None:
488 488 roots = list(roots)
489 489 if not roots:
490 490 return nonodes
491 491 lowestrev = min([self.rev(n) for n in roots])
492 492 else:
493 493 roots = [nullid] # Everybody's a descendent of nullid
494 494 lowestrev = nullrev
495 495 if (lowestrev == nullrev) and (heads is None):
496 496 # We want _all_ the nodes!
497 497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
498 498 if heads is None:
499 499 # All nodes are ancestors, so the latest ancestor is the last
500 500 # node.
501 501 highestrev = len(self) - 1
502 502 # Set ancestors to None to signal that every node is an ancestor.
503 503 ancestors = None
504 504 # Set heads to an empty dictionary for later discovery of heads
505 505 heads = {}
506 506 else:
507 507 heads = list(heads)
508 508 if not heads:
509 509 return nonodes
510 510 ancestors = set()
511 511 # Turn heads into a dictionary so we can remove 'fake' heads.
512 512 # Also, later we will be using it to filter out the heads we can't
513 513 # find from roots.
514 514 heads = dict.fromkeys(heads, 0)
515 515 # Start at the top and keep marking parents until we're done.
516 516 nodestotag = set(heads)
517 517 # Remember where the top was so we can use it as a limit later.
518 518 highestrev = max([self.rev(n) for n in nodestotag])
519 519 while nodestotag:
520 520 # grab a node to tag
521 521 n = nodestotag.pop()
522 522 # Never tag nullid
523 523 if n == nullid:
524 524 continue
525 525 # A node's revision number represents its place in a
526 526 # topologically sorted list of nodes.
527 527 r = self.rev(n)
528 528 if r >= lowestrev:
529 529 if n not in ancestors:
530 530 # If we are possibly a descendent of one of the roots
531 531 # and we haven't already been marked as an ancestor
532 532 ancestors.add(n) # Mark as ancestor
533 533 # Add non-nullid parents to list of nodes to tag.
534 534 nodestotag.update([p for p in self.parents(n) if
535 535 p != nullid])
536 536 elif n in heads: # We've seen it before, is it a fake head?
537 537 # So it is, real heads should not be the ancestors of
538 538 # any other heads.
539 539 heads.pop(n)
540 540 if not ancestors:
541 541 return nonodes
542 542 # Now that we have our set of ancestors, we want to remove any
543 543 # roots that are not ancestors.
544 544
545 545 # If one of the roots was nullid, everything is included anyway.
546 546 if lowestrev > nullrev:
547 547 # But, since we weren't, let's recompute the lowest rev to not
548 548 # include roots that aren't ancestors.
549 549
550 550 # Filter out roots that aren't ancestors of heads
551 551 roots = [n for n in roots if n in ancestors]
552 552 # Recompute the lowest revision
553 553 if roots:
554 554 lowestrev = min([self.rev(n) for n in roots])
555 555 else:
556 556 # No more roots? Return empty list
557 557 return nonodes
558 558 else:
559 559 # We are descending from nullid, and don't need to care about
560 560 # any other roots.
561 561 lowestrev = nullrev
562 562 roots = [nullid]
563 563 # Transform our roots list into a set.
564 564 descendents = set(roots)
565 565 # Also, keep the original roots so we can filter out roots that aren't
566 566 # 'real' roots (i.e. are descended from other roots).
567 567 roots = descendents.copy()
568 568 # Our topologically sorted list of output nodes.
569 569 orderedout = []
570 570 # Don't start at nullid since we don't want nullid in our output list,
571 571 # and if nullid shows up in descedents, empty parents will look like
572 572 # they're descendents.
573 573 for r in xrange(max(lowestrev, 0), highestrev + 1):
574 574 n = self.node(r)
575 575 isdescendent = False
576 576 if lowestrev == nullrev: # Everybody is a descendent of nullid
577 577 isdescendent = True
578 578 elif n in descendents:
579 579 # n is already a descendent
580 580 isdescendent = True
581 581 # This check only needs to be done here because all the roots
582 582 # will start being marked is descendents before the loop.
583 583 if n in roots:
584 584 # If n was a root, check if it's a 'real' root.
585 585 p = tuple(self.parents(n))
586 586 # If any of its parents are descendents, it's not a root.
587 587 if (p[0] in descendents) or (p[1] in descendents):
588 588 roots.remove(n)
589 589 else:
590 590 p = tuple(self.parents(n))
591 591 # A node is a descendent if either of its parents are
592 592 # descendents. (We seeded the dependents list with the roots
593 593 # up there, remember?)
594 594 if (p[0] in descendents) or (p[1] in descendents):
595 595 descendents.add(n)
596 596 isdescendent = True
597 597 if isdescendent and ((ancestors is None) or (n in ancestors)):
598 598 # Only include nodes that are both descendents and ancestors.
599 599 orderedout.append(n)
600 600 if (ancestors is not None) and (n in heads):
601 601 # We're trying to figure out which heads are reachable
602 602 # from roots.
603 603 # Mark this head as having been reached
604 604 heads[n] = 1
605 605 elif ancestors is None:
606 606 # Otherwise, we're trying to discover the heads.
607 607 # Assume this is a head because if it isn't, the next step
608 608 # will eventually remove it.
609 609 heads[n] = 1
610 610 # But, obviously its parents aren't.
611 611 for p in self.parents(n):
612 612 heads.pop(p, None)
613 613 heads = [n for n in heads.iterkeys() if heads[n] != 0]
614 614 roots = list(roots)
615 615 assert orderedout
616 616 assert roots
617 617 assert heads
618 618 return (orderedout, roots, heads)
619 619
620 620 def heads(self, start=None, stop=None):
621 621 """return the list of all nodes that have no children
622 622
623 623 if start is specified, only heads that are descendants of
624 624 start will be returned
625 625 if stop is specified, it will consider all the revs from stop
626 626 as if they had no children
627 627 """
628 628 if start is None and stop is None:
629 629 count = len(self)
630 630 if not count:
631 631 return [nullid]
632 632 ishead = [1] * (count + 1)
633 633 index = self.index
634 634 for r in xrange(count):
635 635 e = index[r]
636 636 ishead[e[5]] = ishead[e[6]] = 0
637 637 return [self.node(r) for r in xrange(count) if ishead[r]]
638 638
639 639 if start is None:
640 640 start = nullid
641 641 if stop is None:
642 642 stop = []
643 643 stoprevs = set([self.rev(n) for n in stop])
644 644 startrev = self.rev(start)
645 645 reachable = set((startrev,))
646 646 heads = set((startrev,))
647 647
648 648 parentrevs = self.parentrevs
649 649 for r in xrange(startrev + 1, len(self)):
650 650 for p in parentrevs(r):
651 651 if p in reachable:
652 652 if r not in stoprevs:
653 653 reachable.add(r)
654 654 heads.add(r)
655 655 if p in heads and p not in stoprevs:
656 656 heads.remove(p)
657 657
658 658 return [self.node(r) for r in heads]
659 659
660 660 def children(self, node):
661 661 """find the children of a given node"""
662 662 c = []
663 663 p = self.rev(node)
664 664 for r in range(p + 1, len(self)):
665 665 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
666 666 if prevs:
667 667 for pr in prevs:
668 668 if pr == p:
669 669 c.append(self.node(r))
670 670 elif p == nullrev:
671 671 c.append(self.node(r))
672 672 return c
673 673
674 674 def descendant(self, start, end):
675 675 if start == nullrev:
676 676 return True
677 677 for i in self.descendants(start):
678 678 if i == end:
679 679 return True
680 680 elif i > end:
681 681 break
682 682 return False
683 683
684 684 def ancestor(self, a, b):
685 685 """calculate the least common ancestor of nodes a and b"""
686 686
687 687 # fast path, check if it is a descendant
688 688 a, b = self.rev(a), self.rev(b)
689 689 start, end = sorted((a, b))
690 690 if self.descendant(start, end):
691 691 return self.node(start)
692 692
693 693 def parents(rev):
694 694 return [p for p in self.parentrevs(rev) if p != nullrev]
695 695
696 696 c = ancestor.ancestor(a, b, parents)
697 697 if c is None:
698 698 return nullid
699 699
700 700 return self.node(c)
701 701
702 702 def _match(self, id):
703 703 if isinstance(id, (long, int)):
704 704 # rev
705 705 return self.node(id)
706 706 if len(id) == 20:
707 707 # possibly a binary node
708 708 # odds of a binary node being all hex in ASCII are 1 in 10**25
709 709 try:
710 710 node = id
711 711 self.rev(node) # quick search the index
712 712 return node
713 713 except LookupError:
714 714 pass # may be partial hex id
715 715 try:
716 716 # str(rev)
717 717 rev = int(id)
718 718 if str(rev) != id:
719 719 raise ValueError
720 720 if rev < 0:
721 721 rev = len(self) + rev
722 722 if rev < 0 or rev >= len(self):
723 723 raise ValueError
724 724 return self.node(rev)
725 725 except (ValueError, OverflowError):
726 726 pass
727 727 if len(id) == 40:
728 728 try:
729 729 # a full hex nodeid?
730 730 node = bin(id)
731 731 self.rev(node)
732 732 return node
733 733 except (TypeError, LookupError):
734 734 pass
735 735
736 736 def _partialmatch(self, id):
737 737 if id in self._pcache:
738 738 return self._pcache[id]
739 739
740 740 if len(id) < 40:
741 741 try:
742 742 # hex(node)[:...]
743 743 l = len(id) // 2 # grab an even number of digits
744 744 prefix = bin(id[:l * 2])
745 745 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
746 746 nl = [n for n in nl if hex(n).startswith(id)]
747 747 if len(nl) > 0:
748 748 if len(nl) == 1:
749 749 self._pcache[id] = nl[0]
750 750 return nl[0]
751 751 raise LookupError(id, self.indexfile,
752 752 _('ambiguous identifier'))
753 753 return None
754 754 except TypeError:
755 755 pass
756 756
757 757 def lookup(self, id):
758 758 """locate a node based on:
759 759 - revision number or str(revision number)
760 760 - nodeid or subset of hex nodeid
761 761 """
762 762 n = self._match(id)
763 763 if n is not None:
764 764 return n
765 765 n = self._partialmatch(id)
766 766 if n:
767 767 return n
768 768
769 769 raise LookupError(id, self.indexfile, _('no match found'))
770 770
771 771 def cmp(self, node, text):
772 772 """compare text with a given file revision
773 773
774 774 returns True if text is different than what is stored.
775 775 """
776 776 p1, p2 = self.parents(node)
777 777 return hash(text, p1, p2) != node
778 778
779 779 def _addchunk(self, offset, data):
780 780 o, d = self._chunkcache
781 781 # try to add to existing cache
782 782 if o + len(d) == offset and len(d) + len(data) < _chunksize:
783 783 self._chunkcache = o, d + data
784 784 else:
785 785 self._chunkcache = offset, data
786 786
787 787 def _loadchunk(self, offset, length):
788 788 if self._inline:
789 789 df = self.opener(self.indexfile)
790 790 else:
791 791 df = self.opener(self.datafile)
792 792
793 793 readahead = max(65536, length)
794 794 df.seek(offset)
795 795 d = df.read(readahead)
796 796 self._addchunk(offset, d)
797 797 if readahead > length:
798 798 return d[:length]
799 799 return d
800 800
801 801 def _getchunk(self, offset, length):
802 802 o, d = self._chunkcache
803 803 l = len(d)
804 804
805 805 # is it in the cache?
806 806 cachestart = offset - o
807 807 cacheend = cachestart + length
808 808 if cachestart >= 0 and cacheend <= l:
809 809 if cachestart == 0 and cacheend == l:
810 810 return d # avoid a copy
811 811 return d[cachestart:cacheend]
812 812
813 813 return self._loadchunk(offset, length)
814 814
815 815 def _chunkraw(self, startrev, endrev):
816 816 start = self.start(startrev)
817 817 length = self.end(endrev) - start
818 818 if self._inline:
819 819 start += (startrev + 1) * self._io.size
820 820 return self._getchunk(start, length)
821 821
822 822 def _chunk(self, rev):
823 823 return decompress(self._chunkraw(rev, rev))
824 824
825 825 def _chunkclear(self):
826 826 self._chunkcache = (0, '')
827 827
828 828 def deltaparent(self, rev):
829 829 """return previous revision or parentrev according to flags"""
830 830 if self.flags(rev) & REVIDX_PARENTDELTA:
831 831 return self.parentrevs(rev)[0]
832 832 else:
833 833 return rev - 1
834 834
835 835 def revdiff(self, rev1, rev2):
836 836 """return or calculate a delta between two revisions"""
837 837 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
838 838 return self._chunk(rev2)
839 839
840 840 return mdiff.textdiff(self.revision(self.node(rev1)),
841 841 self.revision(self.node(rev2)))
842 842
843 843 def revision(self, node):
844 844 """return an uncompressed revision of a given node"""
845 845 cachedrev = None
846 846 if node == nullid:
847 847 return ""
848 848 if self._cache:
849 849 if self._cache[0] == node:
850 850 return self._cache[2]
851 851 cachedrev = self._cache[1]
852 852
853 853 # look up what we need to read
854 854 text = None
855 855 rev = self.rev(node)
856 856 base = self.base(rev)
857 857
858 858 # check rev flags
859 859 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
860 860 raise RevlogError(_('incompatible revision flag %x') %
861 861 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
862 862
863 863 # build delta chain
864 864 chain = []
865 865 index = self.index # for performance
866 866 iterrev = rev
867 867 e = index[iterrev]
868 868 while iterrev != base and iterrev != cachedrev:
869 869 chain.append(iterrev)
870 870 if e[0] & REVIDX_PARENTDELTA:
871 871 iterrev = e[5]
872 872 else:
873 873 iterrev -= 1
874 874 e = index[iterrev]
875 875 chain.reverse()
876 876 base = iterrev
877 877
878 878 if iterrev == cachedrev:
879 879 # cache hit
880 880 text = self._cache[2]
881 881
882 882 # drop cache to save memory
883 883 self._cache = None
884 884
885 885 self._chunkraw(base, rev)
886 886 if text is None:
887 887 text = self._chunk(base)
888 888
889 889 bins = [self._chunk(r) for r in chain]
890 890 text = mdiff.patches(text, bins)
891 891
892 892 text = self._checkhash(text, node, rev)
893 893
894 894 self._cache = (node, rev, text)
895 895 return text
896 896
897 897 def _checkhash(self, text, node, rev):
898 898 p1, p2 = self.parents(node)
899 899 if (node != hash(text, p1, p2) and
900 900 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
901 901 raise RevlogError(_("integrity check failed on %s:%d")
902 902 % (self.indexfile, rev))
903 903 return text
904 904
905 905 def checkinlinesize(self, tr, fp=None):
906 906 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
907 907 return
908 908
909 909 trinfo = tr.find(self.indexfile)
910 910 if trinfo is None:
911 911 raise RevlogError(_("%s not found in the transaction")
912 912 % self.indexfile)
913 913
914 914 trindex = trinfo[2]
915 915 dataoff = self.start(trindex)
916 916
917 917 tr.add(self.datafile, dataoff)
918 918
919 919 if fp:
920 920 fp.flush()
921 921 fp.close()
922 922
923 923 df = self.opener(self.datafile, 'w')
924 924 try:
925 925 for r in self:
926 926 df.write(self._chunkraw(r, r))
927 927 finally:
928 928 df.close()
929 929
930 930 fp = self.opener(self.indexfile, 'w', atomictemp=True)
931 931 self.version &= ~(REVLOGNGINLINEDATA)
932 932 self._inline = False
933 933 for i in self:
934 934 e = self._io.packentry(self.index[i], self.node, self.version, i)
935 935 fp.write(e)
936 936
937 937 # if we don't call rename, the temp file will never replace the
938 938 # real index
939 939 fp.rename()
940 940
941 941 tr.replace(self.indexfile, trindex * self._io.size)
942 942 self._chunkclear()
943 943
944 944 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
945 945 """add a revision to the log
946 946
947 947 text - the revision data to add
948 948 transaction - the transaction object used for rollback
949 949 link - the linkrev data to add
950 950 p1, p2 - the parent nodeids of the revision
951 951 cachedelta - an optional precomputed delta
952 952 """
953 953 node = hash(text, p1, p2)
954 954 if (node in self.nodemap and
955 955 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
956 956 return node
957 957
958 958 dfh = None
959 959 if not self._inline:
960 960 dfh = self.opener(self.datafile, "a")
961 961 ifh = self.opener(self.indexfile, "a+")
962 962 try:
963 963 return self._addrevision(node, text, transaction, link, p1, p2,
964 964 cachedelta, ifh, dfh)
965 965 finally:
966 966 if dfh:
967 967 dfh.close()
968 968 ifh.close()
969 969
970 970 def _addrevision(self, node, text, transaction, link, p1, p2,
971 971 cachedelta, ifh, dfh):
972 972
973 973 btext = [text]
974 974 def buildtext():
975 975 if btext[0] is not None:
976 976 return btext[0]
977 977 # flush any pending writes here so we can read it in revision
978 978 if dfh:
979 979 dfh.flush()
980 980 ifh.flush()
981 981 basetext = self.revision(self.node(cachedelta[0]))
982 982 btext[0] = mdiff.patch(basetext, cachedelta[1])
983 983 chk = hash(btext[0], p1, p2)
984 984 if chk != node:
985 985 raise RevlogError(_("consistency error in delta"))
986 986 return btext[0]
987 987
988 988 def builddelta(rev):
989 989 # can we use the cached delta?
990 990 if cachedelta and cachedelta[0] == rev:
991 991 delta = cachedelta[1]
992 992 else:
993 993 t = buildtext()
994 994 ptext = self.revision(self.node(rev))
995 995 delta = mdiff.textdiff(ptext, t)
996 996 data = compress(delta)
997 997 l = len(data[1]) + len(data[0])
998 998 base = self.base(rev)
999 999 dist = l + offset - self.start(base)
1000 1000 return dist, l, data, base
1001 1001
1002 1002 curr = len(self)
1003 1003 prev = curr - 1
1004 1004 base = curr
1005 1005 offset = self.end(prev)
1006 1006 flags = 0
1007 1007 d = None
1008 1008 p1r, p2r = self.rev(p1), self.rev(p2)
1009 1009
1010 1010 # should we try to build a delta?
1011 1011 if prev != nullrev:
1012 1012 d = builddelta(prev)
1013 1013 if self._parentdelta and prev != p1r:
1014 1014 d2 = builddelta(p1r)
1015 1015 if d2 < d:
1016 1016 d = d2
1017 1017 flags = REVIDX_PARENTDELTA
1018 1018 dist, l, data, base = d
1019 1019
1020 1020 # full versions are inserted when the needed deltas
1021 1021 # become comparable to the uncompressed text
1022 1022 # or the base revision is punched
1023 1023 if text is None:
1024 1024 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1025 1025 cachedelta[1])
1026 1026 else:
1027 1027 textlen = len(text)
1028 1028 if (d is None or dist > textlen * 2 or
1029 1029 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1030 1030 text = buildtext()
1031 1031 data = compress(text)
1032 1032 l = len(data[1]) + len(data[0])
1033 1033 base = curr
1034 1034
1035 1035 e = (offset_type(offset, flags), l, textlen,
1036 1036 base, link, p1r, p2r, node)
1037 1037 self.index.insert(-1, e)
1038 1038 self.nodemap[node] = curr
1039 1039
1040 1040 entry = self._io.packentry(e, self.node, self.version, curr)
1041 1041 if not self._inline:
1042 1042 transaction.add(self.datafile, offset)
1043 1043 transaction.add(self.indexfile, curr * len(entry))
1044 1044 if data[0]:
1045 1045 dfh.write(data[0])
1046 1046 dfh.write(data[1])
1047 1047 dfh.flush()
1048 1048 ifh.write(entry)
1049 1049 else:
1050 1050 offset += curr * self._io.size
1051 1051 transaction.add(self.indexfile, offset, curr)
1052 1052 ifh.write(entry)
1053 1053 ifh.write(data[0])
1054 1054 ifh.write(data[1])
1055 1055 self.checkinlinesize(transaction, ifh)
1056 1056
1057 1057 if type(text) == str: # only accept immutable objects
1058 1058 self._cache = (node, curr, text)
1059 1059 return node
1060 1060
1061 def group(self, nodelist, lookup, infocollect=None):
1061 def group(self, nodelist, lookup):
1062 1062 """Calculate a delta group, yielding a sequence of changegroup chunks
1063 1063 (strings).
1064 1064
1065 1065 Given a list of changeset revs, return a set of deltas and
1066 1066 metadata corresponding to nodes. The first delta is
1067 1067 first parent(nodelist[0]) -> nodelist[0], the receiver is
1068 1068 guaranteed to have this parent as it has all history before
1069 1069 these changesets. In the case firstparent is nullrev the
1070 1070 changegroup starts with a full revision.
1071 1071 """
1072 1072
1073 1073 revs = [self.rev(n) for n in nodelist]
1074 1074
1075 1075 # if we don't have any revisions touched by these changesets, bail
1076 1076 if not revs:
1077 1077 yield changegroup.closechunk()
1078 1078 return
1079 1079
1080 1080 # add the parent of the first rev
1081 1081 p = self.parentrevs(revs[0])[0]
1082 1082 revs.insert(0, p)
1083 1083
1084 1084 # build deltas
1085 1085 for r in xrange(len(revs) - 1):
1086 1086 a, b = revs[r], revs[r + 1]
1087 1087 nb = self.node(b)
1088 1088
1089 if infocollect is not None:
1090 infocollect(nb)
1091
1092 1089 p = self.parents(nb)
1093 1090 meta = nb + p[0] + p[1] + lookup(nb)
1094 1091 if a == nullrev:
1095 1092 d = self.revision(nb)
1096 1093 meta += mdiff.trivialdiffheader(len(d))
1097 1094 else:
1098 1095 d = self.revdiff(a, b)
1099 1096 yield changegroup.chunkheader(len(meta) + len(d))
1100 1097 yield meta
1101 1098 yield d
1102 1099
1103 1100 yield changegroup.closechunk()
1104 1101
1105 1102 def addgroup(self, bundle, linkmapper, transaction):
1106 1103 """
1107 1104 add a delta group
1108 1105
1109 1106 given a set of deltas, add them to the revision log. the
1110 1107 first delta is against its parent, which should be in our
1111 1108 log, the rest are against the previous delta.
1112 1109 """
1113 1110
1114 1111 # track the base of the current delta log
1115 1112 node = None
1116 1113
1117 1114 r = len(self)
1118 1115 end = 0
1119 1116 if r:
1120 1117 end = self.end(r - 1)
1121 1118 ifh = self.opener(self.indexfile, "a+")
1122 1119 isize = r * self._io.size
1123 1120 if self._inline:
1124 1121 transaction.add(self.indexfile, end + isize, r)
1125 1122 dfh = None
1126 1123 else:
1127 1124 transaction.add(self.indexfile, isize, r)
1128 1125 transaction.add(self.datafile, end)
1129 1126 dfh = self.opener(self.datafile, "a")
1130 1127
1131 1128 try:
1132 1129 # loop through our set of deltas
1133 1130 chain = None
1134 1131 while 1:
1135 1132 chunkdata = bundle.parsechunk()
1136 1133 if not chunkdata:
1137 1134 break
1138 1135 node = chunkdata['node']
1139 1136 p1 = chunkdata['p1']
1140 1137 p2 = chunkdata['p2']
1141 1138 cs = chunkdata['cs']
1142 1139 delta = chunkdata['data']
1143 1140
1144 1141 link = linkmapper(cs)
1145 1142 if (node in self.nodemap and
1146 1143 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1147 1144 # this can happen if two branches make the same change
1148 1145 chain = node
1149 1146 continue
1150 1147
1151 1148 for p in (p1, p2):
1152 1149 if not p in self.nodemap:
1153 1150 if self._shallow:
1154 1151 # add null entries for missing parents
1155 1152 # XXX FIXME
1156 1153 #if base == nullrev:
1157 1154 # base = len(self)
1158 1155 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1159 1156 # 0, 0, base, nullrev, nullrev, nullrev, p)
1160 1157 #self.index.insert(-1, e)
1161 1158 #self.nodemap[p] = r
1162 1159 #entry = self._io.packentry(e, self.node,
1163 1160 # self.version, r)
1164 1161 #ifh.write(entry)
1165 1162 #t, r = r, r + 1
1166 1163 raise LookupError(p, self.indexfile,
1167 1164 _('unknown parent'))
1168 1165 else:
1169 1166 raise LookupError(p, self.indexfile,
1170 1167 _('unknown parent'))
1171 1168
1172 1169 if not chain:
1173 1170 # retrieve the parent revision of the delta chain
1174 1171 chain = p1
1175 1172 if not chain in self.nodemap:
1176 1173 raise LookupError(chain, self.indexfile, _('unknown base'))
1177 1174
1178 1175 chainrev = self.rev(chain)
1179 1176 chain = self._addrevision(node, None, transaction, link,
1180 1177 p1, p2, (chainrev, delta), ifh, dfh)
1181 1178 if not dfh and not self._inline:
1182 1179 # addrevision switched from inline to conventional
1183 1180 # reopen the index
1184 1181 ifh.close()
1185 1182 dfh = self.opener(self.datafile, "a")
1186 1183 ifh = self.opener(self.indexfile, "a")
1187 1184 finally:
1188 1185 if dfh:
1189 1186 dfh.close()
1190 1187 ifh.close()
1191 1188
1192 1189 return node
1193 1190
1194 1191 def strip(self, minlink, transaction):
1195 1192 """truncate the revlog on the first revision with a linkrev >= minlink
1196 1193
1197 1194 This function is called when we're stripping revision minlink and
1198 1195 its descendants from the repository.
1199 1196
1200 1197 We have to remove all revisions with linkrev >= minlink, because
1201 1198 the equivalent changelog revisions will be renumbered after the
1202 1199 strip.
1203 1200
1204 1201 So we truncate the revlog on the first of these revisions, and
1205 1202 trust that the caller has saved the revisions that shouldn't be
1206 1203 removed and that it'll readd them after this truncation.
1207 1204 """
1208 1205 if len(self) == 0:
1209 1206 return
1210 1207
1211 1208 for rev in self:
1212 1209 if self.index[rev][4] >= minlink:
1213 1210 break
1214 1211 else:
1215 1212 return
1216 1213
1217 1214 # first truncate the files on disk
1218 1215 end = self.start(rev)
1219 1216 if not self._inline:
1220 1217 transaction.add(self.datafile, end)
1221 1218 end = rev * self._io.size
1222 1219 else:
1223 1220 end += rev * self._io.size
1224 1221
1225 1222 transaction.add(self.indexfile, end)
1226 1223
1227 1224 # then reset internal state in memory to forget those revisions
1228 1225 self._cache = None
1229 1226 self._chunkclear()
1230 1227 for x in xrange(rev, len(self)):
1231 1228 del self.nodemap[self.node(x)]
1232 1229
1233 1230 del self.index[rev:-1]
1234 1231
1235 1232 def checksize(self):
1236 1233 expected = 0
1237 1234 if len(self):
1238 1235 expected = max(0, self.end(len(self) - 1))
1239 1236
1240 1237 try:
1241 1238 f = self.opener(self.datafile)
1242 1239 f.seek(0, 2)
1243 1240 actual = f.tell()
1244 1241 f.close()
1245 1242 dd = actual - expected
1246 1243 except IOError, inst:
1247 1244 if inst.errno != errno.ENOENT:
1248 1245 raise
1249 1246 dd = 0
1250 1247
1251 1248 try:
1252 1249 f = self.opener(self.indexfile)
1253 1250 f.seek(0, 2)
1254 1251 actual = f.tell()
1255 1252 f.close()
1256 1253 s = self._io.size
1257 1254 i = max(0, actual // s)
1258 1255 di = actual - (i * s)
1259 1256 if self._inline:
1260 1257 databytes = 0
1261 1258 for r in self:
1262 1259 databytes += max(0, self.length(r))
1263 1260 dd = 0
1264 1261 di = actual - len(self) * s - databytes
1265 1262 except IOError, inst:
1266 1263 if inst.errno != errno.ENOENT:
1267 1264 raise
1268 1265 di = 0
1269 1266
1270 1267 return (dd, di)
1271 1268
1272 1269 def files(self):
1273 1270 res = [self.indexfile]
1274 1271 if not self._inline:
1275 1272 res.append(self.datafile)
1276 1273 return res
General Comments 0
You need to be logged in to leave comments. Login now