##// END OF EJS Templates
flagprocessors: remove flagprocessorsmixin...
marmoute -
r43265:73288e7a default
parent child Browse files
Show More
@@ -1,459 +1,459 b''
1 # remotefilelog.py - filelog implementation where filelog history is stored
1 # remotefilelog.py - filelog implementation where filelog history is stored
2 # remotely
2 # remotely
3 #
3 #
4 # Copyright 2013 Facebook, Inc.
4 # Copyright 2013 Facebook, Inc.
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from mercurial.node import (
13 from mercurial.node import (
14 bin,
14 bin,
15 nullid,
15 nullid,
16 wdirfilenodeids,
16 wdirfilenodeids,
17 wdirid,
17 wdirid,
18 )
18 )
19 from mercurial.i18n import _
19 from mercurial.i18n import _
20 from mercurial import (
20 from mercurial import (
21 ancestor,
21 ancestor,
22 error,
22 error,
23 mdiff,
23 mdiff,
24 revlog,
24 revlog,
25 util,
25 util,
26 )
26 )
27 from mercurial.utils import storageutil
27 from mercurial.utils import storageutil
28 from mercurial.revlogutils import flagutil
28 from mercurial.revlogutils import flagutil
29
29
30 from . import (
30 from . import (
31 constants,
31 constants,
32 fileserverclient,
32 fileserverclient,
33 shallowutil,
33 shallowutil,
34 )
34 )
35
35
36 class remotefilelognodemap(object):
36 class remotefilelognodemap(object):
37 def __init__(self, filename, store):
37 def __init__(self, filename, store):
38 self._filename = filename
38 self._filename = filename
39 self._store = store
39 self._store = store
40
40
41 def __contains__(self, node):
41 def __contains__(self, node):
42 missing = self._store.getmissing([(self._filename, node)])
42 missing = self._store.getmissing([(self._filename, node)])
43 return not bool(missing)
43 return not bool(missing)
44
44
45 def __get__(self, node):
45 def __get__(self, node):
46 if node not in self:
46 if node not in self:
47 raise KeyError(node)
47 raise KeyError(node)
48 return node
48 return node
49
49
50 class remotefilelog(flagutil.flagprocessorsmixin):
50 class remotefilelog(object):
51
51
52 _generaldelta = True
52 _generaldelta = True
53 _flagserrorclass = error.RevlogError
53 _flagserrorclass = error.RevlogError
54
54
55 def __init__(self, opener, path, repo):
55 def __init__(self, opener, path, repo):
56 self.opener = opener
56 self.opener = opener
57 self.filename = path
57 self.filename = path
58 self.repo = repo
58 self.repo = repo
59 self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
59 self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
60
60
61 self.version = 1
61 self.version = 1
62
62
63 self._flagprocessors = dict(flagutil.flagprocessors)
63 self._flagprocessors = dict(flagutil.flagprocessors)
64
64
65 def read(self, node):
65 def read(self, node):
66 """returns the file contents at this node"""
66 """returns the file contents at this node"""
67 t = self.revision(node)
67 t = self.revision(node)
68 if not t.startswith('\1\n'):
68 if not t.startswith('\1\n'):
69 return t
69 return t
70 s = t.index('\1\n', 2)
70 s = t.index('\1\n', 2)
71 return t[s + 2:]
71 return t[s + 2:]
72
72
73 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
73 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
74 # hash with the metadata, like in vanilla filelogs
74 # hash with the metadata, like in vanilla filelogs
75 hashtext = shallowutil.createrevlogtext(text, meta.get('copy'),
75 hashtext = shallowutil.createrevlogtext(text, meta.get('copy'),
76 meta.get('copyrev'))
76 meta.get('copyrev'))
77 node = storageutil.hashrevisionsha1(hashtext, p1, p2)
77 node = storageutil.hashrevisionsha1(hashtext, p1, p2)
78 return self.addrevision(hashtext, transaction, linknode, p1, p2,
78 return self.addrevision(hashtext, transaction, linknode, p1, p2,
79 node=node)
79 node=node)
80
80
81 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
81 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
82 # text passed to "_createfileblob" does not include filelog metadata
82 # text passed to "_createfileblob" does not include filelog metadata
83 header = shallowutil.buildfileblobheader(len(text), flags)
83 header = shallowutil.buildfileblobheader(len(text), flags)
84 data = "%s\0%s" % (header, text)
84 data = "%s\0%s" % (header, text)
85
85
86 realp1 = p1
86 realp1 = p1
87 copyfrom = ""
87 copyfrom = ""
88 if meta and 'copy' in meta:
88 if meta and 'copy' in meta:
89 copyfrom = meta['copy']
89 copyfrom = meta['copy']
90 realp1 = bin(meta['copyrev'])
90 realp1 = bin(meta['copyrev'])
91
91
92 data += "%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
92 data += "%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
93
93
94 visited = set()
94 visited = set()
95
95
96 pancestors = {}
96 pancestors = {}
97 queue = []
97 queue = []
98 if realp1 != nullid:
98 if realp1 != nullid:
99 p1flog = self
99 p1flog = self
100 if copyfrom:
100 if copyfrom:
101 p1flog = remotefilelog(self.opener, copyfrom, self.repo)
101 p1flog = remotefilelog(self.opener, copyfrom, self.repo)
102
102
103 pancestors.update(p1flog.ancestormap(realp1))
103 pancestors.update(p1flog.ancestormap(realp1))
104 queue.append(realp1)
104 queue.append(realp1)
105 visited.add(realp1)
105 visited.add(realp1)
106 if p2 != nullid:
106 if p2 != nullid:
107 pancestors.update(self.ancestormap(p2))
107 pancestors.update(self.ancestormap(p2))
108 queue.append(p2)
108 queue.append(p2)
109 visited.add(p2)
109 visited.add(p2)
110
110
111 ancestortext = ""
111 ancestortext = ""
112
112
113 # add the ancestors in topological order
113 # add the ancestors in topological order
114 while queue:
114 while queue:
115 c = queue.pop(0)
115 c = queue.pop(0)
116 pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
116 pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
117
117
118 pacopyfrom = pacopyfrom or ''
118 pacopyfrom = pacopyfrom or ''
119 ancestortext += "%s%s%s%s%s\0" % (
119 ancestortext += "%s%s%s%s%s\0" % (
120 c, pa1, pa2, ancestorlinknode, pacopyfrom)
120 c, pa1, pa2, ancestorlinknode, pacopyfrom)
121
121
122 if pa1 != nullid and pa1 not in visited:
122 if pa1 != nullid and pa1 not in visited:
123 queue.append(pa1)
123 queue.append(pa1)
124 visited.add(pa1)
124 visited.add(pa1)
125 if pa2 != nullid and pa2 not in visited:
125 if pa2 != nullid and pa2 not in visited:
126 queue.append(pa2)
126 queue.append(pa2)
127 visited.add(pa2)
127 visited.add(pa2)
128
128
129 data += ancestortext
129 data += ancestortext
130
130
131 return data
131 return data
132
132
133 def addrevision(self, text, transaction, linknode, p1, p2, cachedelta=None,
133 def addrevision(self, text, transaction, linknode, p1, p2, cachedelta=None,
134 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
134 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
135 sidedata=None):
135 sidedata=None):
136 # text passed to "addrevision" includes hg filelog metadata header
136 # text passed to "addrevision" includes hg filelog metadata header
137 if node is None:
137 if node is None:
138 node = storageutil.hashrevisionsha1(text, p1, p2)
138 node = storageutil.hashrevisionsha1(text, p1, p2)
139 if sidedata is None:
139 if sidedata is None:
140 sidedata = {}
140 sidedata = {}
141
141
142 meta, metaoffset = storageutil.parsemeta(text)
142 meta, metaoffset = storageutil.parsemeta(text)
143 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
143 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
144 sidedata=sidedata)
144 sidedata=sidedata)
145 return self.addrawrevision(rawtext, transaction, linknode, p1, p2,
145 return self.addrawrevision(rawtext, transaction, linknode, p1, p2,
146 node, flags, cachedelta,
146 node, flags, cachedelta,
147 _metatuple=(meta, metaoffset))
147 _metatuple=(meta, metaoffset))
148
148
149 def addrawrevision(self, rawtext, transaction, linknode, p1, p2, node,
149 def addrawrevision(self, rawtext, transaction, linknode, p1, p2, node,
150 flags, cachedelta=None, _metatuple=None):
150 flags, cachedelta=None, _metatuple=None):
151 if _metatuple:
151 if _metatuple:
152 # _metatuple: used by "addrevision" internally by remotefilelog
152 # _metatuple: used by "addrevision" internally by remotefilelog
153 # meta was parsed confidently
153 # meta was parsed confidently
154 meta, metaoffset = _metatuple
154 meta, metaoffset = _metatuple
155 else:
155 else:
156 # not from self.addrevision, but something else (repo._filecommit)
156 # not from self.addrevision, but something else (repo._filecommit)
157 # calls addrawrevision directly. remotefilelog needs to get and
157 # calls addrawrevision directly. remotefilelog needs to get and
158 # strip filelog metadata.
158 # strip filelog metadata.
159 # we don't have confidence about whether rawtext contains filelog
159 # we don't have confidence about whether rawtext contains filelog
160 # metadata or not (flag processor could replace it), so we just
160 # metadata or not (flag processor could replace it), so we just
161 # parse it as best-effort.
161 # parse it as best-effort.
162 # in LFS (flags != 0)'s case, the best way is to call LFS code to
162 # in LFS (flags != 0)'s case, the best way is to call LFS code to
163 # get the meta information, instead of storageutil.parsemeta.
163 # get the meta information, instead of storageutil.parsemeta.
164 meta, metaoffset = storageutil.parsemeta(rawtext)
164 meta, metaoffset = storageutil.parsemeta(rawtext)
165 if flags != 0:
165 if flags != 0:
166 # when flags != 0, be conservative and do not mangle rawtext, since
166 # when flags != 0, be conservative and do not mangle rawtext, since
167 # a read flag processor expects the text not being mangled at all.
167 # a read flag processor expects the text not being mangled at all.
168 metaoffset = 0
168 metaoffset = 0
169 if metaoffset:
169 if metaoffset:
170 # remotefilelog fileblob stores copy metadata in its ancestortext,
170 # remotefilelog fileblob stores copy metadata in its ancestortext,
171 # not its main blob. so we need to remove filelog metadata
171 # not its main blob. so we need to remove filelog metadata
172 # (containing copy information) from text.
172 # (containing copy information) from text.
173 blobtext = rawtext[metaoffset:]
173 blobtext = rawtext[metaoffset:]
174 else:
174 else:
175 blobtext = rawtext
175 blobtext = rawtext
176 data = self._createfileblob(blobtext, meta, flags, p1, p2, node,
176 data = self._createfileblob(blobtext, meta, flags, p1, p2, node,
177 linknode)
177 linknode)
178 self.repo.contentstore.addremotefilelognode(self.filename, node, data)
178 self.repo.contentstore.addremotefilelognode(self.filename, node, data)
179
179
180 return node
180 return node
181
181
182 def renamed(self, node):
182 def renamed(self, node):
183 ancestors = self.repo.metadatastore.getancestors(self.filename, node)
183 ancestors = self.repo.metadatastore.getancestors(self.filename, node)
184 p1, p2, linknode, copyfrom = ancestors[node]
184 p1, p2, linknode, copyfrom = ancestors[node]
185 if copyfrom:
185 if copyfrom:
186 return (copyfrom, p1)
186 return (copyfrom, p1)
187
187
188 return False
188 return False
189
189
190 def size(self, node):
190 def size(self, node):
191 """return the size of a given revision"""
191 """return the size of a given revision"""
192 return len(self.read(node))
192 return len(self.read(node))
193
193
194 rawsize = size
194 rawsize = size
195
195
196 def cmp(self, node, text):
196 def cmp(self, node, text):
197 """compare text with a given file revision
197 """compare text with a given file revision
198
198
199 returns True if text is different than what is stored.
199 returns True if text is different than what is stored.
200 """
200 """
201
201
202 if node == nullid:
202 if node == nullid:
203 return True
203 return True
204
204
205 nodetext = self.read(node)
205 nodetext = self.read(node)
206 return nodetext != text
206 return nodetext != text
207
207
208 def __nonzero__(self):
208 def __nonzero__(self):
209 return True
209 return True
210
210
211 __bool__ = __nonzero__
211 __bool__ = __nonzero__
212
212
213 def __len__(self):
213 def __len__(self):
214 if self.filename == '.hgtags':
214 if self.filename == '.hgtags':
215 # The length of .hgtags is used to fast path tag checking.
215 # The length of .hgtags is used to fast path tag checking.
216 # remotefilelog doesn't support .hgtags since the entire .hgtags
216 # remotefilelog doesn't support .hgtags since the entire .hgtags
217 # history is needed. Use the excludepattern setting to make
217 # history is needed. Use the excludepattern setting to make
218 # .hgtags a normal filelog.
218 # .hgtags a normal filelog.
219 return 0
219 return 0
220
220
221 raise RuntimeError("len not supported")
221 raise RuntimeError("len not supported")
222
222
223 def empty(self):
223 def empty(self):
224 return False
224 return False
225
225
226 def flags(self, node):
226 def flags(self, node):
227 if isinstance(node, int):
227 if isinstance(node, int):
228 raise error.ProgrammingError(
228 raise error.ProgrammingError(
229 'remotefilelog does not accept integer rev for flags')
229 'remotefilelog does not accept integer rev for flags')
230 store = self.repo.contentstore
230 store = self.repo.contentstore
231 return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
231 return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
232
232
233 def parents(self, node):
233 def parents(self, node):
234 if node == nullid:
234 if node == nullid:
235 return nullid, nullid
235 return nullid, nullid
236
236
237 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
237 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
238 p1, p2, linknode, copyfrom = ancestormap[node]
238 p1, p2, linknode, copyfrom = ancestormap[node]
239 if copyfrom:
239 if copyfrom:
240 p1 = nullid
240 p1 = nullid
241
241
242 return p1, p2
242 return p1, p2
243
243
244 def parentrevs(self, rev):
244 def parentrevs(self, rev):
245 # TODO(augie): this is a node and should be a rev, but for now
245 # TODO(augie): this is a node and should be a rev, but for now
246 # nothing in core seems to actually break.
246 # nothing in core seems to actually break.
247 return self.parents(rev)
247 return self.parents(rev)
248
248
249 def linknode(self, node):
249 def linknode(self, node):
250 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
250 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
251 p1, p2, linknode, copyfrom = ancestormap[node]
251 p1, p2, linknode, copyfrom = ancestormap[node]
252 return linknode
252 return linknode
253
253
254 def linkrev(self, node):
254 def linkrev(self, node):
255 return self.repo.unfiltered().changelog.rev(self.linknode(node))
255 return self.repo.unfiltered().changelog.rev(self.linknode(node))
256
256
257 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
257 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
258 assumehaveparentrevisions=False, deltaprevious=False,
258 assumehaveparentrevisions=False, deltaprevious=False,
259 deltamode=None):
259 deltamode=None):
260 # we don't use any of these parameters here
260 # we don't use any of these parameters here
261 del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
261 del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
262 del deltamode
262 del deltamode
263 prevnode = None
263 prevnode = None
264 for node in nodes:
264 for node in nodes:
265 p1, p2 = self.parents(node)
265 p1, p2 = self.parents(node)
266 if prevnode is None:
266 if prevnode is None:
267 basenode = prevnode = p1
267 basenode = prevnode = p1
268 if basenode == node:
268 if basenode == node:
269 basenode = nullid
269 basenode = nullid
270 if basenode != nullid:
270 if basenode != nullid:
271 revision = None
271 revision = None
272 delta = self.revdiff(basenode, node)
272 delta = self.revdiff(basenode, node)
273 else:
273 else:
274 revision = self.rawdata(node)
274 revision = self.rawdata(node)
275 delta = None
275 delta = None
276 yield revlog.revlogrevisiondelta(
276 yield revlog.revlogrevisiondelta(
277 node=node,
277 node=node,
278 p1node=p1,
278 p1node=p1,
279 p2node=p2,
279 p2node=p2,
280 linknode=self.linknode(node),
280 linknode=self.linknode(node),
281 basenode=basenode,
281 basenode=basenode,
282 flags=self.flags(node),
282 flags=self.flags(node),
283 baserevisionsize=None,
283 baserevisionsize=None,
284 revision=revision,
284 revision=revision,
285 delta=delta,
285 delta=delta,
286 )
286 )
287
287
288 def revdiff(self, node1, node2):
288 def revdiff(self, node1, node2):
289 return mdiff.textdiff(self.rawdata(node1),
289 return mdiff.textdiff(self.rawdata(node1),
290 self.rawdata(node2))
290 self.rawdata(node2))
291
291
292 def lookup(self, node):
292 def lookup(self, node):
293 if len(node) == 40:
293 if len(node) == 40:
294 node = bin(node)
294 node = bin(node)
295 if len(node) != 20:
295 if len(node) != 20:
296 raise error.LookupError(node, self.filename,
296 raise error.LookupError(node, self.filename,
297 _('invalid lookup input'))
297 _('invalid lookup input'))
298
298
299 return node
299 return node
300
300
301 def rev(self, node):
301 def rev(self, node):
302 # This is a hack to make TortoiseHG work.
302 # This is a hack to make TortoiseHG work.
303 return node
303 return node
304
304
305 def node(self, rev):
305 def node(self, rev):
306 # This is a hack.
306 # This is a hack.
307 if isinstance(rev, int):
307 if isinstance(rev, int):
308 raise error.ProgrammingError(
308 raise error.ProgrammingError(
309 'remotefilelog does not convert integer rev to node')
309 'remotefilelog does not convert integer rev to node')
310 return rev
310 return rev
311
311
312 def _processflags(self, text, flags, operation, raw=False):
312 def _processflags(self, text, flags, operation, raw=False):
313 """deprecated entry point to access flag processors"""
313 """deprecated entry point to access flag processors"""
314 msg = ('_processflag(...) use the specialized variant')
314 msg = ('_processflag(...) use the specialized variant')
315 util.nouideprecwarn(msg, '5.2', stacklevel=2)
315 util.nouideprecwarn(msg, '5.2', stacklevel=2)
316 if raw:
316 if raw:
317 return text, flagutil.processflagsraw(self, text, flags)
317 return text, flagutil.processflagsraw(self, text, flags)
318 elif operation == 'read':
318 elif operation == 'read':
319 return flagutil.processflagsread(self, text, flags)
319 return flagutil.processflagsread(self, text, flags)
320 else: # write operation
320 else: # write operation
321 return flagutil.processflagswrite(self, text, flags)
321 return flagutil.processflagswrite(self, text, flags)
322
322
323 def revision(self, node, raw=False):
323 def revision(self, node, raw=False):
324 """returns the revlog contents at this node.
324 """returns the revlog contents at this node.
325 this includes the meta data traditionally included in file revlogs.
325 this includes the meta data traditionally included in file revlogs.
326 this is generally only used for bundling and communicating with vanilla
326 this is generally only used for bundling and communicating with vanilla
327 hg clients.
327 hg clients.
328 """
328 """
329 if node == nullid:
329 if node == nullid:
330 return ""
330 return ""
331 if len(node) != 20:
331 if len(node) != 20:
332 raise error.LookupError(node, self.filename,
332 raise error.LookupError(node, self.filename,
333 _('invalid revision input'))
333 _('invalid revision input'))
334 if node == wdirid or node in wdirfilenodeids:
334 if node == wdirid or node in wdirfilenodeids:
335 raise error.WdirUnsupported
335 raise error.WdirUnsupported
336
336
337 store = self.repo.contentstore
337 store = self.repo.contentstore
338 rawtext = store.get(self.filename, node)
338 rawtext = store.get(self.filename, node)
339 if raw:
339 if raw:
340 return rawtext
340 return rawtext
341 flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
341 flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
342 if flags == 0:
342 if flags == 0:
343 return rawtext
343 return rawtext
344 return flagutil.processflagsread(self, rawtext, flags)[0]
344 return flagutil.processflagsread(self, rawtext, flags)[0]
345
345
346 def rawdata(self, node):
346 def rawdata(self, node):
347 return self.revision(node, raw=False)
347 return self.revision(node, raw=False)
348
348
349 def _read(self, id):
349 def _read(self, id):
350 """reads the raw file blob from disk, cache, or server"""
350 """reads the raw file blob from disk, cache, or server"""
351 fileservice = self.repo.fileservice
351 fileservice = self.repo.fileservice
352 localcache = fileservice.localcache
352 localcache = fileservice.localcache
353 cachekey = fileserverclient.getcachekey(self.repo.name, self.filename,
353 cachekey = fileserverclient.getcachekey(self.repo.name, self.filename,
354 id)
354 id)
355 try:
355 try:
356 return localcache.read(cachekey)
356 return localcache.read(cachekey)
357 except KeyError:
357 except KeyError:
358 pass
358 pass
359
359
360 localkey = fileserverclient.getlocalkey(self.filename, id)
360 localkey = fileserverclient.getlocalkey(self.filename, id)
361 localpath = os.path.join(self.localpath, localkey)
361 localpath = os.path.join(self.localpath, localkey)
362 try:
362 try:
363 return shallowutil.readfile(localpath)
363 return shallowutil.readfile(localpath)
364 except IOError:
364 except IOError:
365 pass
365 pass
366
366
367 fileservice.prefetch([(self.filename, id)])
367 fileservice.prefetch([(self.filename, id)])
368 try:
368 try:
369 return localcache.read(cachekey)
369 return localcache.read(cachekey)
370 except KeyError:
370 except KeyError:
371 pass
371 pass
372
372
373 raise error.LookupError(id, self.filename, _('no node'))
373 raise error.LookupError(id, self.filename, _('no node'))
374
374
375 def ancestormap(self, node):
375 def ancestormap(self, node):
376 return self.repo.metadatastore.getancestors(self.filename, node)
376 return self.repo.metadatastore.getancestors(self.filename, node)
377
377
378 def ancestor(self, a, b):
378 def ancestor(self, a, b):
379 if a == nullid or b == nullid:
379 if a == nullid or b == nullid:
380 return nullid
380 return nullid
381
381
382 revmap, parentfunc = self._buildrevgraph(a, b)
382 revmap, parentfunc = self._buildrevgraph(a, b)
383 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
383 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
384
384
385 ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
385 ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
386 if ancs:
386 if ancs:
387 # choose a consistent winner when there's a tie
387 # choose a consistent winner when there's a tie
388 return min(map(nodemap.__getitem__, ancs))
388 return min(map(nodemap.__getitem__, ancs))
389 return nullid
389 return nullid
390
390
391 def commonancestorsheads(self, a, b):
391 def commonancestorsheads(self, a, b):
392 """calculate all the heads of the common ancestors of nodes a and b"""
392 """calculate all the heads of the common ancestors of nodes a and b"""
393
393
394 if a == nullid or b == nullid:
394 if a == nullid or b == nullid:
395 return nullid
395 return nullid
396
396
397 revmap, parentfunc = self._buildrevgraph(a, b)
397 revmap, parentfunc = self._buildrevgraph(a, b)
398 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
398 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
399
399
400 ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
400 ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
401 return map(nodemap.__getitem__, ancs)
401 return map(nodemap.__getitem__, ancs)
402
402
403 def _buildrevgraph(self, a, b):
403 def _buildrevgraph(self, a, b):
404 """Builds a numeric revision graph for the given two nodes.
404 """Builds a numeric revision graph for the given two nodes.
405 Returns a node->rev map and a rev->[revs] parent function.
405 Returns a node->rev map and a rev->[revs] parent function.
406 """
406 """
407 amap = self.ancestormap(a)
407 amap = self.ancestormap(a)
408 bmap = self.ancestormap(b)
408 bmap = self.ancestormap(b)
409
409
410 # Union the two maps
410 # Union the two maps
411 parentsmap = collections.defaultdict(list)
411 parentsmap = collections.defaultdict(list)
412 allparents = set()
412 allparents = set()
413 for mapping in (amap, bmap):
413 for mapping in (amap, bmap):
414 for node, pdata in mapping.iteritems():
414 for node, pdata in mapping.iteritems():
415 parents = parentsmap[node]
415 parents = parentsmap[node]
416 p1, p2, linknode, copyfrom = pdata
416 p1, p2, linknode, copyfrom = pdata
417 # Don't follow renames (copyfrom).
417 # Don't follow renames (copyfrom).
418 # remotefilectx.ancestor does that.
418 # remotefilectx.ancestor does that.
419 if p1 != nullid and not copyfrom:
419 if p1 != nullid and not copyfrom:
420 parents.append(p1)
420 parents.append(p1)
421 allparents.add(p1)
421 allparents.add(p1)
422 if p2 != nullid:
422 if p2 != nullid:
423 parents.append(p2)
423 parents.append(p2)
424 allparents.add(p2)
424 allparents.add(p2)
425
425
426 # Breadth first traversal to build linkrev graph
426 # Breadth first traversal to build linkrev graph
427 parentrevs = collections.defaultdict(list)
427 parentrevs = collections.defaultdict(list)
428 revmap = {}
428 revmap = {}
429 queue = collections.deque(((None, n) for n in parentsmap
429 queue = collections.deque(((None, n) for n in parentsmap
430 if n not in allparents))
430 if n not in allparents))
431 while queue:
431 while queue:
432 prevrev, current = queue.pop()
432 prevrev, current = queue.pop()
433 if current in revmap:
433 if current in revmap:
434 if prevrev:
434 if prevrev:
435 parentrevs[prevrev].append(revmap[current])
435 parentrevs[prevrev].append(revmap[current])
436 continue
436 continue
437
437
438 # Assign linkrevs in reverse order, so start at
438 # Assign linkrevs in reverse order, so start at
439 # len(parentsmap) and work backwards.
439 # len(parentsmap) and work backwards.
440 currentrev = len(parentsmap) - len(revmap) - 1
440 currentrev = len(parentsmap) - len(revmap) - 1
441 revmap[current] = currentrev
441 revmap[current] = currentrev
442
442
443 if prevrev:
443 if prevrev:
444 parentrevs[prevrev].append(currentrev)
444 parentrevs[prevrev].append(currentrev)
445
445
446 for parent in parentsmap.get(current):
446 for parent in parentsmap.get(current):
447 queue.appendleft((currentrev, parent))
447 queue.appendleft((currentrev, parent))
448
448
449 return revmap, parentrevs.__getitem__
449 return revmap, parentrevs.__getitem__
450
450
451 def strip(self, minlink, transaction):
451 def strip(self, minlink, transaction):
452 pass
452 pass
453
453
454 # misc unused things
454 # misc unused things
455 def files(self):
455 def files(self):
456 return []
456 return []
457
457
458 def checksize(self):
458 def checksize(self):
459 return 0, 0
459 return 0, 0
@@ -1,2653 +1,2653 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 )
57 )
58 from .thirdparty import (
58 from .thirdparty import (
59 attr,
59 attr,
60 )
60 )
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 )
78 )
79 from .utils import (
79 from .utils import (
80 storageutil,
80 storageutil,
81 stringutil,
81 stringutil,
82 )
82 )
83
83
84 # blanked usage of all the name to prevent pyflakes constraints
84 # blanked usage of all the name to prevent pyflakes constraints
85 # We need these name available in the module for extensions.
85 # We need these name available in the module for extensions.
86 REVLOGV0
86 REVLOGV0
87 REVLOGV1
87 REVLOGV1
88 REVLOGV2
88 REVLOGV2
89 FLAG_INLINE_DATA
89 FLAG_INLINE_DATA
90 FLAG_GENERALDELTA
90 FLAG_GENERALDELTA
91 REVLOG_DEFAULT_FLAGS
91 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FORMAT
92 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_VERSION
93 REVLOG_DEFAULT_VERSION
94 REVLOGV1_FLAGS
94 REVLOGV1_FLAGS
95 REVLOGV2_FLAGS
95 REVLOGV2_FLAGS
96 REVIDX_ISCENSORED
96 REVIDX_ISCENSORED
97 REVIDX_ELLIPSIS
97 REVIDX_ELLIPSIS
98 REVIDX_EXTSTORED
98 REVIDX_EXTSTORED
99 REVIDX_DEFAULT_FLAGS
99 REVIDX_DEFAULT_FLAGS
100 REVIDX_FLAGS_ORDER
100 REVIDX_FLAGS_ORDER
101 REVIDX_RAWTEXT_CHANGING_FLAGS
101 REVIDX_RAWTEXT_CHANGING_FLAGS
102
102
103 parsers = policy.importmod(r'parsers')
103 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
104 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
105 rustdagop = policy.importrust(r'dagop')
106
106
107 # Aliased for performance.
107 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
108 _zlibdecompress = zlib.decompress
109
109
110 # max size of revlog with inline data
110 # max size of revlog with inline data
111 _maxinline = 131072
111 _maxinline = 131072
112 _chunksize = 1048576
112 _chunksize = 1048576
113
113
114 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
116 return text, False, {}
116 return text, False, {}
117
117
118 def ellipsiswriteprocessor(rl, text, sidedata):
118 def ellipsiswriteprocessor(rl, text, sidedata):
119 return text, False
119 return text, False
120
120
121 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
122 return False
122 return False
123
123
124 ellipsisprocessor = (
124 ellipsisprocessor = (
125 ellipsisreadprocessor,
125 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
127 ellipsisrawprocessor,
128 )
128 )
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 @attr.s(slots=True, frozen=True)
141 @attr.s(slots=True, frozen=True)
142 class _revisioninfo(object):
142 class _revisioninfo(object):
143 """Information about a revision that allows building its fulltext
143 """Information about a revision that allows building its fulltext
144 node: expected hash of the revision
144 node: expected hash of the revision
145 p1, p2: parent revs of the revision
145 p1, p2: parent revs of the revision
146 btext: built text cache consisting of a one-element list
146 btext: built text cache consisting of a one-element list
147 cachedelta: (baserev, uncompressed_delta) or None
147 cachedelta: (baserev, uncompressed_delta) or None
148 flags: flags associated to the revision storage
148 flags: flags associated to the revision storage
149
149
150 One of btext[0] or cachedelta must be set.
150 One of btext[0] or cachedelta must be set.
151 """
151 """
152 node = attr.ib()
152 node = attr.ib()
153 p1 = attr.ib()
153 p1 = attr.ib()
154 p2 = attr.ib()
154 p2 = attr.ib()
155 btext = attr.ib()
155 btext = attr.ib()
156 textlen = attr.ib()
156 textlen = attr.ib()
157 cachedelta = attr.ib()
157 cachedelta = attr.ib()
158 flags = attr.ib()
158 flags = attr.ib()
159
159
160 @interfaceutil.implementer(repository.irevisiondelta)
160 @interfaceutil.implementer(repository.irevisiondelta)
161 @attr.s(slots=True)
161 @attr.s(slots=True)
162 class revlogrevisiondelta(object):
162 class revlogrevisiondelta(object):
163 node = attr.ib()
163 node = attr.ib()
164 p1node = attr.ib()
164 p1node = attr.ib()
165 p2node = attr.ib()
165 p2node = attr.ib()
166 basenode = attr.ib()
166 basenode = attr.ib()
167 flags = attr.ib()
167 flags = attr.ib()
168 baserevisionsize = attr.ib()
168 baserevisionsize = attr.ib()
169 revision = attr.ib()
169 revision = attr.ib()
170 delta = attr.ib()
170 delta = attr.ib()
171 linknode = attr.ib(default=None)
171 linknode = attr.ib(default=None)
172
172
173 @interfaceutil.implementer(repository.iverifyproblem)
173 @interfaceutil.implementer(repository.iverifyproblem)
174 @attr.s(frozen=True)
174 @attr.s(frozen=True)
175 class revlogproblem(object):
175 class revlogproblem(object):
176 warning = attr.ib(default=None)
176 warning = attr.ib(default=None)
177 error = attr.ib(default=None)
177 error = attr.ib(default=None)
178 node = attr.ib(default=None)
178 node = attr.ib(default=None)
179
179
180 # index v0:
180 # index v0:
181 # 4 bytes: offset
181 # 4 bytes: offset
182 # 4 bytes: compressed length
182 # 4 bytes: compressed length
183 # 4 bytes: base rev
183 # 4 bytes: base rev
184 # 4 bytes: link rev
184 # 4 bytes: link rev
185 # 20 bytes: parent 1 nodeid
185 # 20 bytes: parent 1 nodeid
186 # 20 bytes: parent 2 nodeid
186 # 20 bytes: parent 2 nodeid
187 # 20 bytes: nodeid
187 # 20 bytes: nodeid
188 indexformatv0 = struct.Struct(">4l20s20s20s")
188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 indexformatv0_pack = indexformatv0.pack
189 indexformatv0_pack = indexformatv0.pack
190 indexformatv0_unpack = indexformatv0.unpack
190 indexformatv0_unpack = indexformatv0.unpack
191
191
192 class revlogoldindex(list):
192 class revlogoldindex(list):
193 def __getitem__(self, i):
193 def __getitem__(self, i):
194 if i == -1:
194 if i == -1:
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 return list.__getitem__(self, i)
196 return list.__getitem__(self, i)
197
197
198 class revlogoldio(object):
198 class revlogoldio(object):
199 def __init__(self):
199 def __init__(self):
200 self.size = indexformatv0.size
200 self.size = indexformatv0.size
201
201
202 def parseindex(self, data, inline):
202 def parseindex(self, data, inline):
203 s = self.size
203 s = self.size
204 index = []
204 index = []
205 nodemap = {nullid: nullrev}
205 nodemap = {nullid: nullrev}
206 n = off = 0
206 n = off = 0
207 l = len(data)
207 l = len(data)
208 while off + s <= l:
208 while off + s <= l:
209 cur = data[off:off + s]
209 cur = data[off:off + s]
210 off += s
210 off += s
211 e = indexformatv0_unpack(cur)
211 e = indexformatv0_unpack(cur)
212 # transform to revlogv1 format
212 # transform to revlogv1 format
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 index.append(e2)
215 index.append(e2)
216 nodemap[e[6]] = n
216 nodemap[e[6]] = n
217 n += 1
217 n += 1
218
218
219 return revlogoldindex(index), nodemap, None
219 return revlogoldindex(index), nodemap, None
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 if gettype(entry[0]):
222 if gettype(entry[0]):
223 raise error.RevlogError(_('index entry flags need revlog '
223 raise error.RevlogError(_('index entry flags need revlog '
224 'version 1'))
224 'version 1'))
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 node(entry[5]), node(entry[6]), entry[7])
226 node(entry[5]), node(entry[6]), entry[7])
227 return indexformatv0_pack(*e2)
227 return indexformatv0_pack(*e2)
228
228
229 # index ng:
229 # index ng:
230 # 6 bytes: offset
230 # 6 bytes: offset
231 # 2 bytes: flags
231 # 2 bytes: flags
232 # 4 bytes: compressed length
232 # 4 bytes: compressed length
233 # 4 bytes: uncompressed length
233 # 4 bytes: uncompressed length
234 # 4 bytes: base rev
234 # 4 bytes: base rev
235 # 4 bytes: link rev
235 # 4 bytes: link rev
236 # 4 bytes: parent 1 rev
236 # 4 bytes: parent 1 rev
237 # 4 bytes: parent 2 rev
237 # 4 bytes: parent 2 rev
238 # 32 bytes: nodeid
238 # 32 bytes: nodeid
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 indexformatng_pack = indexformatng.pack
240 indexformatng_pack = indexformatng.pack
241 versionformat = struct.Struct(">I")
241 versionformat = struct.Struct(">I")
242 versionformat_pack = versionformat.pack
242 versionformat_pack = versionformat.pack
243 versionformat_unpack = versionformat.unpack
243 versionformat_unpack = versionformat.unpack
244
244
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
246 # signed integer)
247 _maxentrysize = 0x7fffffff
247 _maxentrysize = 0x7fffffff
248
248
249 class revlogio(object):
249 class revlogio(object):
250 def __init__(self):
250 def __init__(self):
251 self.size = indexformatng.size
251 self.size = indexformatng.size
252
252
253 def parseindex(self, data, inline):
253 def parseindex(self, data, inline):
254 # call the C implementation to parse the index data
254 # call the C implementation to parse the index data
255 index, cache = parsers.parse_index2(data, inline)
255 index, cache = parsers.parse_index2(data, inline)
256 return index, getattr(index, 'nodemap', None), cache
256 return index, getattr(index, 'nodemap', None), cache
257
257
258 def packentry(self, entry, node, version, rev):
258 def packentry(self, entry, node, version, rev):
259 p = indexformatng_pack(*entry)
259 p = indexformatng_pack(*entry)
260 if rev == 0:
260 if rev == 0:
261 p = versionformat_pack(version) + p[4:]
261 p = versionformat_pack(version) + p[4:]
262 return p
262 return p
263
263
264 class revlog(flagutil.flagprocessorsmixin):
264 class revlog(object):
265 """
265 """
266 the underlying revision storage object
266 the underlying revision storage object
267
267
268 A revlog consists of two parts, an index and the revision data.
268 A revlog consists of two parts, an index and the revision data.
269
269
270 The index is a file with a fixed record size containing
270 The index is a file with a fixed record size containing
271 information on each revision, including its nodeid (hash), the
271 information on each revision, including its nodeid (hash), the
272 nodeids of its parents, the position and offset of its data within
272 nodeids of its parents, the position and offset of its data within
273 the data file, and the revision it's based on. Finally, each entry
273 the data file, and the revision it's based on. Finally, each entry
274 contains a linkrev entry that can serve as a pointer to external
274 contains a linkrev entry that can serve as a pointer to external
275 data.
275 data.
276
276
277 The revision data itself is a linear collection of data chunks.
277 The revision data itself is a linear collection of data chunks.
278 Each chunk represents a revision and is usually represented as a
278 Each chunk represents a revision and is usually represented as a
279 delta against the previous chunk. To bound lookup time, runs of
279 delta against the previous chunk. To bound lookup time, runs of
280 deltas are limited to about 2 times the length of the original
280 deltas are limited to about 2 times the length of the original
281 version data. This makes retrieval of a version proportional to
281 version data. This makes retrieval of a version proportional to
282 its size, or O(1) relative to the number of revisions.
282 its size, or O(1) relative to the number of revisions.
283
283
284 Both pieces of the revlog are written to in an append-only
284 Both pieces of the revlog are written to in an append-only
285 fashion, which means we never need to rewrite a file to insert or
285 fashion, which means we never need to rewrite a file to insert or
286 remove data, and can use some simple techniques to avoid the need
286 remove data, and can use some simple techniques to avoid the need
287 for locking while reading.
287 for locking while reading.
288
288
289 If checkambig, indexfile is opened with checkambig=True at
289 If checkambig, indexfile is opened with checkambig=True at
290 writing, to avoid file stat ambiguity.
290 writing, to avoid file stat ambiguity.
291
291
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 index will be mmapped rather than read if it is larger than the
293 index will be mmapped rather than read if it is larger than the
294 configured threshold.
294 configured threshold.
295
295
296 If censorable is True, the revlog can have censored revisions.
296 If censorable is True, the revlog can have censored revisions.
297
297
298 If `upperboundcomp` is not None, this is the expected maximal gain from
298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 compression for the data content.
299 compression for the data content.
300 """
300 """
301
301
302 _flagserrorclass = error.RevlogError
302 _flagserrorclass = error.RevlogError
303
303
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 mmaplargeindex=False, censorable=False,
305 mmaplargeindex=False, censorable=False,
306 upperboundcomp=None):
306 upperboundcomp=None):
307 """
307 """
308 create a revlog object
308 create a revlog object
309
309
310 opener is a function that abstracts the file opening operation
310 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
311 and can be used to implement COW semantics or the like.
312
312
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315 self.indexfile = indexfile
315 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + ".d")
316 self.datafile = datafile or (indexfile[:-2] + ".d")
317 self.opener = opener
317 self.opener = opener
318 # When True, indexfile is opened with checkambig=True at writing, to
318 # When True, indexfile is opened with checkambig=True at writing, to
319 # avoid file stat ambiguity.
319 # avoid file stat ambiguity.
320 self._checkambig = checkambig
320 self._checkambig = checkambig
321 self._mmaplargeindex = mmaplargeindex
321 self._mmaplargeindex = mmaplargeindex
322 self._censorable = censorable
322 self._censorable = censorable
323 # 3-tuple of (node, rev, text) for a raw revision.
323 # 3-tuple of (node, rev, text) for a raw revision.
324 self._revisioncache = None
324 self._revisioncache = None
325 # Maps rev to chain base rev.
325 # Maps rev to chain base rev.
326 self._chainbasecache = util.lrucachedict(100)
326 self._chainbasecache = util.lrucachedict(100)
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 self._chunkcache = (0, '')
328 self._chunkcache = (0, '')
329 # How much data to read and cache into the raw revlog data cache.
329 # How much data to read and cache into the raw revlog data cache.
330 self._chunkcachesize = 65536
330 self._chunkcachesize = 65536
331 self._maxchainlen = None
331 self._maxchainlen = None
332 self._deltabothparents = True
332 self._deltabothparents = True
333 self.index = []
333 self.index = []
334 # Mapping of partial identifiers to full nodes.
334 # Mapping of partial identifiers to full nodes.
335 self._pcache = {}
335 self._pcache = {}
336 # Mapping of revision integer to full node.
336 # Mapping of revision integer to full node.
337 self._nodecache = {nullid: nullrev}
337 self._nodecache = {nullid: nullrev}
338 self._nodepos = None
338 self._nodepos = None
339 self._compengine = 'zlib'
339 self._compengine = 'zlib'
340 self._compengineopts = {}
340 self._compengineopts = {}
341 self._maxdeltachainspan = -1
341 self._maxdeltachainspan = -1
342 self._withsparseread = False
342 self._withsparseread = False
343 self._sparserevlog = False
343 self._sparserevlog = False
344 self._srdensitythreshold = 0.50
344 self._srdensitythreshold = 0.50
345 self._srmingapsize = 262144
345 self._srmingapsize = 262144
346
346
347 # Make copy of flag processors so each revlog instance can support
347 # Make copy of flag processors so each revlog instance can support
348 # custom flags.
348 # custom flags.
349 self._flagprocessors = dict(flagutil.flagprocessors)
349 self._flagprocessors = dict(flagutil.flagprocessors)
350
350
351 # 2-tuple of file handles being used for active writing.
351 # 2-tuple of file handles being used for active writing.
352 self._writinghandles = None
352 self._writinghandles = None
353
353
354 self._loadindex()
354 self._loadindex()
355
355
356 def _loadindex(self):
356 def _loadindex(self):
357 mmapindexthreshold = None
357 mmapindexthreshold = None
358 opts = getattr(self.opener, 'options', {}) or {}
358 opts = getattr(self.opener, 'options', {}) or {}
359
359
360 if 'revlogv2' in opts:
360 if 'revlogv2' in opts:
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 elif 'revlogv1' in opts:
362 elif 'revlogv1' in opts:
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 if 'generaldelta' in opts:
364 if 'generaldelta' in opts:
365 newversionflags |= FLAG_GENERALDELTA
365 newversionflags |= FLAG_GENERALDELTA
366 elif getattr(self.opener, 'options', None) is not None:
366 elif getattr(self.opener, 'options', None) is not None:
367 # If options provided but no 'revlog*' found, the repository
367 # If options provided but no 'revlog*' found, the repository
368 # would have no 'requires' file in it, which means we have to
368 # would have no 'requires' file in it, which means we have to
369 # stick to the old format.
369 # stick to the old format.
370 newversionflags = REVLOGV0
370 newversionflags = REVLOGV0
371 else:
371 else:
372 newversionflags = REVLOG_DEFAULT_VERSION
372 newversionflags = REVLOG_DEFAULT_VERSION
373
373
374 if 'chunkcachesize' in opts:
374 if 'chunkcachesize' in opts:
375 self._chunkcachesize = opts['chunkcachesize']
375 self._chunkcachesize = opts['chunkcachesize']
376 if 'maxchainlen' in opts:
376 if 'maxchainlen' in opts:
377 self._maxchainlen = opts['maxchainlen']
377 self._maxchainlen = opts['maxchainlen']
378 if 'deltabothparents' in opts:
378 if 'deltabothparents' in opts:
379 self._deltabothparents = opts['deltabothparents']
379 self._deltabothparents = opts['deltabothparents']
380 self._lazydelta = bool(opts.get('lazydelta', True))
380 self._lazydelta = bool(opts.get('lazydelta', True))
381 self._lazydeltabase = False
381 self._lazydeltabase = False
382 if self._lazydelta:
382 if self._lazydelta:
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
384 if 'compengine' in opts:
384 if 'compengine' in opts:
385 self._compengine = opts['compengine']
385 self._compengine = opts['compengine']
386 if 'zlib.level' in opts:
386 if 'zlib.level' in opts:
387 self._compengineopts['zlib.level'] = opts['zlib.level']
387 self._compengineopts['zlib.level'] = opts['zlib.level']
388 if 'zstd.level' in opts:
388 if 'zstd.level' in opts:
389 self._compengineopts['zstd.level'] = opts['zstd.level']
389 self._compengineopts['zstd.level'] = opts['zstd.level']
390 if 'maxdeltachainspan' in opts:
390 if 'maxdeltachainspan' in opts:
391 self._maxdeltachainspan = opts['maxdeltachainspan']
391 self._maxdeltachainspan = opts['maxdeltachainspan']
392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
393 mmapindexthreshold = opts['mmapindexthreshold']
393 mmapindexthreshold = opts['mmapindexthreshold']
394 self._sparserevlog = bool(opts.get('sparse-revlog', False))
394 self._sparserevlog = bool(opts.get('sparse-revlog', False))
395 withsparseread = bool(opts.get('with-sparse-read', False))
395 withsparseread = bool(opts.get('with-sparse-read', False))
396 # sparse-revlog forces sparse-read
396 # sparse-revlog forces sparse-read
397 self._withsparseread = self._sparserevlog or withsparseread
397 self._withsparseread = self._sparserevlog or withsparseread
398 if 'sparse-read-density-threshold' in opts:
398 if 'sparse-read-density-threshold' in opts:
399 self._srdensitythreshold = opts['sparse-read-density-threshold']
399 self._srdensitythreshold = opts['sparse-read-density-threshold']
400 if 'sparse-read-min-gap-size' in opts:
400 if 'sparse-read-min-gap-size' in opts:
401 self._srmingapsize = opts['sparse-read-min-gap-size']
401 self._srmingapsize = opts['sparse-read-min-gap-size']
402 if opts.get('enableellipsis'):
402 if opts.get('enableellipsis'):
403 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
403 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
404
404
405 # revlog v0 doesn't have flag processors
405 # revlog v0 doesn't have flag processors
406 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
406 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
407 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
407 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
408
408
409 if self._chunkcachesize <= 0:
409 if self._chunkcachesize <= 0:
410 raise error.RevlogError(_('revlog chunk cache size %r is not '
410 raise error.RevlogError(_('revlog chunk cache size %r is not '
411 'greater than 0') % self._chunkcachesize)
411 'greater than 0') % self._chunkcachesize)
412 elif self._chunkcachesize & (self._chunkcachesize - 1):
412 elif self._chunkcachesize & (self._chunkcachesize - 1):
413 raise error.RevlogError(_('revlog chunk cache size %r is not a '
413 raise error.RevlogError(_('revlog chunk cache size %r is not a '
414 'power of 2') % self._chunkcachesize)
414 'power of 2') % self._chunkcachesize)
415
415
416 indexdata = ''
416 indexdata = ''
417 self._initempty = True
417 self._initempty = True
418 try:
418 try:
419 with self._indexfp() as f:
419 with self._indexfp() as f:
420 if (mmapindexthreshold is not None and
420 if (mmapindexthreshold is not None and
421 self.opener.fstat(f).st_size >= mmapindexthreshold):
421 self.opener.fstat(f).st_size >= mmapindexthreshold):
422 # TODO: should .close() to release resources without
422 # TODO: should .close() to release resources without
423 # relying on Python GC
423 # relying on Python GC
424 indexdata = util.buffer(util.mmapread(f))
424 indexdata = util.buffer(util.mmapread(f))
425 else:
425 else:
426 indexdata = f.read()
426 indexdata = f.read()
427 if len(indexdata) > 0:
427 if len(indexdata) > 0:
428 versionflags = versionformat_unpack(indexdata[:4])[0]
428 versionflags = versionformat_unpack(indexdata[:4])[0]
429 self._initempty = False
429 self._initempty = False
430 else:
430 else:
431 versionflags = newversionflags
431 versionflags = newversionflags
432 except IOError as inst:
432 except IOError as inst:
433 if inst.errno != errno.ENOENT:
433 if inst.errno != errno.ENOENT:
434 raise
434 raise
435
435
436 versionflags = newversionflags
436 versionflags = newversionflags
437
437
438 self.version = versionflags
438 self.version = versionflags
439
439
440 flags = versionflags & ~0xFFFF
440 flags = versionflags & ~0xFFFF
441 fmt = versionflags & 0xFFFF
441 fmt = versionflags & 0xFFFF
442
442
443 if fmt == REVLOGV0:
443 if fmt == REVLOGV0:
444 if flags:
444 if flags:
445 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
445 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
446 'revlog %s') %
446 'revlog %s') %
447 (flags >> 16, fmt, self.indexfile))
447 (flags >> 16, fmt, self.indexfile))
448
448
449 self._inline = False
449 self._inline = False
450 self._generaldelta = False
450 self._generaldelta = False
451
451
452 elif fmt == REVLOGV1:
452 elif fmt == REVLOGV1:
453 if flags & ~REVLOGV1_FLAGS:
453 if flags & ~REVLOGV1_FLAGS:
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 'revlog %s') %
455 'revlog %s') %
456 (flags >> 16, fmt, self.indexfile))
456 (flags >> 16, fmt, self.indexfile))
457
457
458 self._inline = versionflags & FLAG_INLINE_DATA
458 self._inline = versionflags & FLAG_INLINE_DATA
459 self._generaldelta = versionflags & FLAG_GENERALDELTA
459 self._generaldelta = versionflags & FLAG_GENERALDELTA
460
460
461 elif fmt == REVLOGV2:
461 elif fmt == REVLOGV2:
462 if flags & ~REVLOGV2_FLAGS:
462 if flags & ~REVLOGV2_FLAGS:
463 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
463 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 'revlog %s') %
464 'revlog %s') %
465 (flags >> 16, fmt, self.indexfile))
465 (flags >> 16, fmt, self.indexfile))
466
466
467 self._inline = versionflags & FLAG_INLINE_DATA
467 self._inline = versionflags & FLAG_INLINE_DATA
468 # generaldelta implied by version 2 revlogs.
468 # generaldelta implied by version 2 revlogs.
469 self._generaldelta = True
469 self._generaldelta = True
470
470
471 else:
471 else:
472 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
472 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
473 (fmt, self.indexfile))
473 (fmt, self.indexfile))
474 # sparse-revlog can't be on without general-delta (issue6056)
474 # sparse-revlog can't be on without general-delta (issue6056)
475 if not self._generaldelta:
475 if not self._generaldelta:
476 self._sparserevlog = False
476 self._sparserevlog = False
477
477
478 self._storedeltachains = True
478 self._storedeltachains = True
479
479
480 self._io = revlogio()
480 self._io = revlogio()
481 if self.version == REVLOGV0:
481 if self.version == REVLOGV0:
482 self._io = revlogoldio()
482 self._io = revlogoldio()
483 try:
483 try:
484 d = self._io.parseindex(indexdata, self._inline)
484 d = self._io.parseindex(indexdata, self._inline)
485 except (ValueError, IndexError):
485 except (ValueError, IndexError):
486 raise error.RevlogError(_("index %s is corrupted") %
486 raise error.RevlogError(_("index %s is corrupted") %
487 self.indexfile)
487 self.indexfile)
488 self.index, nodemap, self._chunkcache = d
488 self.index, nodemap, self._chunkcache = d
489 if nodemap is not None:
489 if nodemap is not None:
490 self.nodemap = self._nodecache = nodemap
490 self.nodemap = self._nodecache = nodemap
491 if not self._chunkcache:
491 if not self._chunkcache:
492 self._chunkclear()
492 self._chunkclear()
493 # revnum -> (chain-length, sum-delta-length)
493 # revnum -> (chain-length, sum-delta-length)
494 self._chaininfocache = {}
494 self._chaininfocache = {}
495 # revlog header -> revlog compressor
495 # revlog header -> revlog compressor
496 self._decompressors = {}
496 self._decompressors = {}
497
497
498 @util.propertycache
498 @util.propertycache
499 def _compressor(self):
499 def _compressor(self):
500 engine = util.compengines[self._compengine]
500 engine = util.compengines[self._compengine]
501 return engine.revlogcompressor(self._compengineopts)
501 return engine.revlogcompressor(self._compengineopts)
502
502
503 def _indexfp(self, mode='r'):
503 def _indexfp(self, mode='r'):
504 """file object for the revlog's index file"""
504 """file object for the revlog's index file"""
505 args = {r'mode': mode}
505 args = {r'mode': mode}
506 if mode != 'r':
506 if mode != 'r':
507 args[r'checkambig'] = self._checkambig
507 args[r'checkambig'] = self._checkambig
508 if mode == 'w':
508 if mode == 'w':
509 args[r'atomictemp'] = True
509 args[r'atomictemp'] = True
510 return self.opener(self.indexfile, **args)
510 return self.opener(self.indexfile, **args)
511
511
512 def _datafp(self, mode='r'):
512 def _datafp(self, mode='r'):
513 """file object for the revlog's data file"""
513 """file object for the revlog's data file"""
514 return self.opener(self.datafile, mode=mode)
514 return self.opener(self.datafile, mode=mode)
515
515
516 @contextlib.contextmanager
516 @contextlib.contextmanager
517 def _datareadfp(self, existingfp=None):
517 def _datareadfp(self, existingfp=None):
518 """file object suitable to read data"""
518 """file object suitable to read data"""
519 # Use explicit file handle, if given.
519 # Use explicit file handle, if given.
520 if existingfp is not None:
520 if existingfp is not None:
521 yield existingfp
521 yield existingfp
522
522
523 # Use a file handle being actively used for writes, if available.
523 # Use a file handle being actively used for writes, if available.
524 # There is some danger to doing this because reads will seek the
524 # There is some danger to doing this because reads will seek the
525 # file. However, _writeentry() performs a SEEK_END before all writes,
525 # file. However, _writeentry() performs a SEEK_END before all writes,
526 # so we should be safe.
526 # so we should be safe.
527 elif self._writinghandles:
527 elif self._writinghandles:
528 if self._inline:
528 if self._inline:
529 yield self._writinghandles[0]
529 yield self._writinghandles[0]
530 else:
530 else:
531 yield self._writinghandles[1]
531 yield self._writinghandles[1]
532
532
533 # Otherwise open a new file handle.
533 # Otherwise open a new file handle.
534 else:
534 else:
535 if self._inline:
535 if self._inline:
536 func = self._indexfp
536 func = self._indexfp
537 else:
537 else:
538 func = self._datafp
538 func = self._datafp
539 with func() as fp:
539 with func() as fp:
540 yield fp
540 yield fp
541
541
542 def tip(self):
542 def tip(self):
543 return self.node(len(self.index) - 1)
543 return self.node(len(self.index) - 1)
544 def __contains__(self, rev):
544 def __contains__(self, rev):
545 return 0 <= rev < len(self)
545 return 0 <= rev < len(self)
546 def __len__(self):
546 def __len__(self):
547 return len(self.index)
547 return len(self.index)
548 def __iter__(self):
548 def __iter__(self):
549 return iter(pycompat.xrange(len(self)))
549 return iter(pycompat.xrange(len(self)))
550 def revs(self, start=0, stop=None):
550 def revs(self, start=0, stop=None):
551 """iterate over all rev in this revlog (from start to stop)"""
551 """iterate over all rev in this revlog (from start to stop)"""
552 return storageutil.iterrevs(len(self), start=start, stop=stop)
552 return storageutil.iterrevs(len(self), start=start, stop=stop)
553
553
554 @util.propertycache
554 @util.propertycache
555 def nodemap(self):
555 def nodemap(self):
556 if self.index:
556 if self.index:
557 # populate mapping down to the initial node
557 # populate mapping down to the initial node
558 node0 = self.index[0][7] # get around changelog filtering
558 node0 = self.index[0][7] # get around changelog filtering
559 self.rev(node0)
559 self.rev(node0)
560 return self._nodecache
560 return self._nodecache
561
561
562 def hasnode(self, node):
562 def hasnode(self, node):
563 try:
563 try:
564 self.rev(node)
564 self.rev(node)
565 return True
565 return True
566 except KeyError:
566 except KeyError:
567 return False
567 return False
568
568
569 def candelta(self, baserev, rev):
569 def candelta(self, baserev, rev):
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
571 # Disable delta if either rev requires a content-changing flag
571 # Disable delta if either rev requires a content-changing flag
572 # processor (ex. LFS). This is because such flag processor can alter
572 # processor (ex. LFS). This is because such flag processor can alter
573 # the rawtext content that the delta will be based on, and two clients
573 # the rawtext content that the delta will be based on, and two clients
574 # could have a same revlog node with different flags (i.e. different
574 # could have a same revlog node with different flags (i.e. different
575 # rawtext contents) and the delta could be incompatible.
575 # rawtext contents) and the delta could be incompatible.
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
578 return False
578 return False
579 return True
579 return True
580
580
581 def clearcaches(self):
581 def clearcaches(self):
582 self._revisioncache = None
582 self._revisioncache = None
583 self._chainbasecache.clear()
583 self._chainbasecache.clear()
584 self._chunkcache = (0, '')
584 self._chunkcache = (0, '')
585 self._pcache = {}
585 self._pcache = {}
586
586
587 try:
587 try:
588 # If we are using the native C version, you are in a fun case
588 # If we are using the native C version, you are in a fun case
589 # where self.index, self.nodemap and self._nodecaches is the same
589 # where self.index, self.nodemap and self._nodecaches is the same
590 # object.
590 # object.
591 self._nodecache.clearcaches()
591 self._nodecache.clearcaches()
592 except AttributeError:
592 except AttributeError:
593 self._nodecache = {nullid: nullrev}
593 self._nodecache = {nullid: nullrev}
594 self._nodepos = None
594 self._nodepos = None
595
595
596 def rev(self, node):
596 def rev(self, node):
597 try:
597 try:
598 return self._nodecache[node]
598 return self._nodecache[node]
599 except TypeError:
599 except TypeError:
600 raise
600 raise
601 except error.RevlogError:
601 except error.RevlogError:
602 # parsers.c radix tree lookup failed
602 # parsers.c radix tree lookup failed
603 if node == wdirid or node in wdirfilenodeids:
603 if node == wdirid or node in wdirfilenodeids:
604 raise error.WdirUnsupported
604 raise error.WdirUnsupported
605 raise error.LookupError(node, self.indexfile, _('no node'))
605 raise error.LookupError(node, self.indexfile, _('no node'))
606 except KeyError:
606 except KeyError:
607 # pure python cache lookup failed
607 # pure python cache lookup failed
608 n = self._nodecache
608 n = self._nodecache
609 i = self.index
609 i = self.index
610 p = self._nodepos
610 p = self._nodepos
611 if p is None:
611 if p is None:
612 p = len(i) - 1
612 p = len(i) - 1
613 else:
613 else:
614 assert p < len(i)
614 assert p < len(i)
615 for r in pycompat.xrange(p, -1, -1):
615 for r in pycompat.xrange(p, -1, -1):
616 v = i[r][7]
616 v = i[r][7]
617 n[v] = r
617 n[v] = r
618 if v == node:
618 if v == node:
619 self._nodepos = r - 1
619 self._nodepos = r - 1
620 return r
620 return r
621 if node == wdirid or node in wdirfilenodeids:
621 if node == wdirid or node in wdirfilenodeids:
622 raise error.WdirUnsupported
622 raise error.WdirUnsupported
623 raise error.LookupError(node, self.indexfile, _('no node'))
623 raise error.LookupError(node, self.indexfile, _('no node'))
624
624
625 # Accessors for index entries.
625 # Accessors for index entries.
626
626
627 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
627 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
628 # are flags.
628 # are flags.
629 def start(self, rev):
629 def start(self, rev):
630 return int(self.index[rev][0] >> 16)
630 return int(self.index[rev][0] >> 16)
631
631
632 def flags(self, rev):
632 def flags(self, rev):
633 return self.index[rev][0] & 0xFFFF
633 return self.index[rev][0] & 0xFFFF
634
634
635 def length(self, rev):
635 def length(self, rev):
636 return self.index[rev][1]
636 return self.index[rev][1]
637
637
638 def rawsize(self, rev):
638 def rawsize(self, rev):
639 """return the length of the uncompressed text for a given revision"""
639 """return the length of the uncompressed text for a given revision"""
640 l = self.index[rev][2]
640 l = self.index[rev][2]
641 if l >= 0:
641 if l >= 0:
642 return l
642 return l
643
643
644 t = self.rawdata(rev)
644 t = self.rawdata(rev)
645 return len(t)
645 return len(t)
646
646
647 def size(self, rev):
647 def size(self, rev):
648 """length of non-raw text (processed by a "read" flag processor)"""
648 """length of non-raw text (processed by a "read" flag processor)"""
649 # fast path: if no "read" flag processor could change the content,
649 # fast path: if no "read" flag processor could change the content,
650 # size is rawsize. note: ELLIPSIS is known to not change the content.
650 # size is rawsize. note: ELLIPSIS is known to not change the content.
651 flags = self.flags(rev)
651 flags = self.flags(rev)
652 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
652 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
653 return self.rawsize(rev)
653 return self.rawsize(rev)
654
654
655 return len(self.revision(rev, raw=False))
655 return len(self.revision(rev, raw=False))
656
656
657 def chainbase(self, rev):
657 def chainbase(self, rev):
658 base = self._chainbasecache.get(rev)
658 base = self._chainbasecache.get(rev)
659 if base is not None:
659 if base is not None:
660 return base
660 return base
661
661
662 index = self.index
662 index = self.index
663 iterrev = rev
663 iterrev = rev
664 base = index[iterrev][3]
664 base = index[iterrev][3]
665 while base != iterrev:
665 while base != iterrev:
666 iterrev = base
666 iterrev = base
667 base = index[iterrev][3]
667 base = index[iterrev][3]
668
668
669 self._chainbasecache[rev] = base
669 self._chainbasecache[rev] = base
670 return base
670 return base
671
671
672 def linkrev(self, rev):
672 def linkrev(self, rev):
673 return self.index[rev][4]
673 return self.index[rev][4]
674
674
675 def parentrevs(self, rev):
675 def parentrevs(self, rev):
676 try:
676 try:
677 entry = self.index[rev]
677 entry = self.index[rev]
678 except IndexError:
678 except IndexError:
679 if rev == wdirrev:
679 if rev == wdirrev:
680 raise error.WdirUnsupported
680 raise error.WdirUnsupported
681 raise
681 raise
682
682
683 return entry[5], entry[6]
683 return entry[5], entry[6]
684
684
685 # fast parentrevs(rev) where rev isn't filtered
685 # fast parentrevs(rev) where rev isn't filtered
686 _uncheckedparentrevs = parentrevs
686 _uncheckedparentrevs = parentrevs
687
687
688 def node(self, rev):
688 def node(self, rev):
689 try:
689 try:
690 return self.index[rev][7]
690 return self.index[rev][7]
691 except IndexError:
691 except IndexError:
692 if rev == wdirrev:
692 if rev == wdirrev:
693 raise error.WdirUnsupported
693 raise error.WdirUnsupported
694 raise
694 raise
695
695
696 # Derived from index values.
696 # Derived from index values.
697
697
698 def end(self, rev):
698 def end(self, rev):
699 return self.start(rev) + self.length(rev)
699 return self.start(rev) + self.length(rev)
700
700
701 def parents(self, node):
701 def parents(self, node):
702 i = self.index
702 i = self.index
703 d = i[self.rev(node)]
703 d = i[self.rev(node)]
704 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
704 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
705
705
706 def chainlen(self, rev):
706 def chainlen(self, rev):
707 return self._chaininfo(rev)[0]
707 return self._chaininfo(rev)[0]
708
708
709 def _chaininfo(self, rev):
709 def _chaininfo(self, rev):
710 chaininfocache = self._chaininfocache
710 chaininfocache = self._chaininfocache
711 if rev in chaininfocache:
711 if rev in chaininfocache:
712 return chaininfocache[rev]
712 return chaininfocache[rev]
713 index = self.index
713 index = self.index
714 generaldelta = self._generaldelta
714 generaldelta = self._generaldelta
715 iterrev = rev
715 iterrev = rev
716 e = index[iterrev]
716 e = index[iterrev]
717 clen = 0
717 clen = 0
718 compresseddeltalen = 0
718 compresseddeltalen = 0
719 while iterrev != e[3]:
719 while iterrev != e[3]:
720 clen += 1
720 clen += 1
721 compresseddeltalen += e[1]
721 compresseddeltalen += e[1]
722 if generaldelta:
722 if generaldelta:
723 iterrev = e[3]
723 iterrev = e[3]
724 else:
724 else:
725 iterrev -= 1
725 iterrev -= 1
726 if iterrev in chaininfocache:
726 if iterrev in chaininfocache:
727 t = chaininfocache[iterrev]
727 t = chaininfocache[iterrev]
728 clen += t[0]
728 clen += t[0]
729 compresseddeltalen += t[1]
729 compresseddeltalen += t[1]
730 break
730 break
731 e = index[iterrev]
731 e = index[iterrev]
732 else:
732 else:
733 # Add text length of base since decompressing that also takes
733 # Add text length of base since decompressing that also takes
734 # work. For cache hits the length is already included.
734 # work. For cache hits the length is already included.
735 compresseddeltalen += e[1]
735 compresseddeltalen += e[1]
736 r = (clen, compresseddeltalen)
736 r = (clen, compresseddeltalen)
737 chaininfocache[rev] = r
737 chaininfocache[rev] = r
738 return r
738 return r
739
739
740 def _deltachain(self, rev, stoprev=None):
740 def _deltachain(self, rev, stoprev=None):
741 """Obtain the delta chain for a revision.
741 """Obtain the delta chain for a revision.
742
742
743 ``stoprev`` specifies a revision to stop at. If not specified, we
743 ``stoprev`` specifies a revision to stop at. If not specified, we
744 stop at the base of the chain.
744 stop at the base of the chain.
745
745
746 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
746 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
747 revs in ascending order and ``stopped`` is a bool indicating whether
747 revs in ascending order and ``stopped`` is a bool indicating whether
748 ``stoprev`` was hit.
748 ``stoprev`` was hit.
749 """
749 """
750 # Try C implementation.
750 # Try C implementation.
751 try:
751 try:
752 return self.index.deltachain(rev, stoprev, self._generaldelta)
752 return self.index.deltachain(rev, stoprev, self._generaldelta)
753 except AttributeError:
753 except AttributeError:
754 pass
754 pass
755
755
756 chain = []
756 chain = []
757
757
758 # Alias to prevent attribute lookup in tight loop.
758 # Alias to prevent attribute lookup in tight loop.
759 index = self.index
759 index = self.index
760 generaldelta = self._generaldelta
760 generaldelta = self._generaldelta
761
761
762 iterrev = rev
762 iterrev = rev
763 e = index[iterrev]
763 e = index[iterrev]
764 while iterrev != e[3] and iterrev != stoprev:
764 while iterrev != e[3] and iterrev != stoprev:
765 chain.append(iterrev)
765 chain.append(iterrev)
766 if generaldelta:
766 if generaldelta:
767 iterrev = e[3]
767 iterrev = e[3]
768 else:
768 else:
769 iterrev -= 1
769 iterrev -= 1
770 e = index[iterrev]
770 e = index[iterrev]
771
771
772 if iterrev == stoprev:
772 if iterrev == stoprev:
773 stopped = True
773 stopped = True
774 else:
774 else:
775 chain.append(iterrev)
775 chain.append(iterrev)
776 stopped = False
776 stopped = False
777
777
778 chain.reverse()
778 chain.reverse()
779 return chain, stopped
779 return chain, stopped
780
780
781 def ancestors(self, revs, stoprev=0, inclusive=False):
781 def ancestors(self, revs, stoprev=0, inclusive=False):
782 """Generate the ancestors of 'revs' in reverse revision order.
782 """Generate the ancestors of 'revs' in reverse revision order.
783 Does not generate revs lower than stoprev.
783 Does not generate revs lower than stoprev.
784
784
785 See the documentation for ancestor.lazyancestors for more details."""
785 See the documentation for ancestor.lazyancestors for more details."""
786
786
787 # first, make sure start revisions aren't filtered
787 # first, make sure start revisions aren't filtered
788 revs = list(revs)
788 revs = list(revs)
789 checkrev = self.node
789 checkrev = self.node
790 for r in revs:
790 for r in revs:
791 checkrev(r)
791 checkrev(r)
792 # and we're sure ancestors aren't filtered as well
792 # and we're sure ancestors aren't filtered as well
793
793
794 if rustancestor is not None:
794 if rustancestor is not None:
795 lazyancestors = rustancestor.LazyAncestors
795 lazyancestors = rustancestor.LazyAncestors
796 arg = self.index
796 arg = self.index
797 elif util.safehasattr(parsers, 'rustlazyancestors'):
797 elif util.safehasattr(parsers, 'rustlazyancestors'):
798 lazyancestors = ancestor.rustlazyancestors
798 lazyancestors = ancestor.rustlazyancestors
799 arg = self.index
799 arg = self.index
800 else:
800 else:
801 lazyancestors = ancestor.lazyancestors
801 lazyancestors = ancestor.lazyancestors
802 arg = self._uncheckedparentrevs
802 arg = self._uncheckedparentrevs
803 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
803 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
804
804
805 def descendants(self, revs):
805 def descendants(self, revs):
806 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
806 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
807
807
808 def findcommonmissing(self, common=None, heads=None):
808 def findcommonmissing(self, common=None, heads=None):
809 """Return a tuple of the ancestors of common and the ancestors of heads
809 """Return a tuple of the ancestors of common and the ancestors of heads
810 that are not ancestors of common. In revset terminology, we return the
810 that are not ancestors of common. In revset terminology, we return the
811 tuple:
811 tuple:
812
812
813 ::common, (::heads) - (::common)
813 ::common, (::heads) - (::common)
814
814
815 The list is sorted by revision number, meaning it is
815 The list is sorted by revision number, meaning it is
816 topologically sorted.
816 topologically sorted.
817
817
818 'heads' and 'common' are both lists of node IDs. If heads is
818 'heads' and 'common' are both lists of node IDs. If heads is
819 not supplied, uses all of the revlog's heads. If common is not
819 not supplied, uses all of the revlog's heads. If common is not
820 supplied, uses nullid."""
820 supplied, uses nullid."""
821 if common is None:
821 if common is None:
822 common = [nullid]
822 common = [nullid]
823 if heads is None:
823 if heads is None:
824 heads = self.heads()
824 heads = self.heads()
825
825
826 common = [self.rev(n) for n in common]
826 common = [self.rev(n) for n in common]
827 heads = [self.rev(n) for n in heads]
827 heads = [self.rev(n) for n in heads]
828
828
829 # we want the ancestors, but inclusive
829 # we want the ancestors, but inclusive
830 class lazyset(object):
830 class lazyset(object):
831 def __init__(self, lazyvalues):
831 def __init__(self, lazyvalues):
832 self.addedvalues = set()
832 self.addedvalues = set()
833 self.lazyvalues = lazyvalues
833 self.lazyvalues = lazyvalues
834
834
835 def __contains__(self, value):
835 def __contains__(self, value):
836 return value in self.addedvalues or value in self.lazyvalues
836 return value in self.addedvalues or value in self.lazyvalues
837
837
838 def __iter__(self):
838 def __iter__(self):
839 added = self.addedvalues
839 added = self.addedvalues
840 for r in added:
840 for r in added:
841 yield r
841 yield r
842 for r in self.lazyvalues:
842 for r in self.lazyvalues:
843 if not r in added:
843 if not r in added:
844 yield r
844 yield r
845
845
846 def add(self, value):
846 def add(self, value):
847 self.addedvalues.add(value)
847 self.addedvalues.add(value)
848
848
849 def update(self, values):
849 def update(self, values):
850 self.addedvalues.update(values)
850 self.addedvalues.update(values)
851
851
852 has = lazyset(self.ancestors(common))
852 has = lazyset(self.ancestors(common))
853 has.add(nullrev)
853 has.add(nullrev)
854 has.update(common)
854 has.update(common)
855
855
856 # take all ancestors from heads that aren't in has
856 # take all ancestors from heads that aren't in has
857 missing = set()
857 missing = set()
858 visit = collections.deque(r for r in heads if r not in has)
858 visit = collections.deque(r for r in heads if r not in has)
859 while visit:
859 while visit:
860 r = visit.popleft()
860 r = visit.popleft()
861 if r in missing:
861 if r in missing:
862 continue
862 continue
863 else:
863 else:
864 missing.add(r)
864 missing.add(r)
865 for p in self.parentrevs(r):
865 for p in self.parentrevs(r):
866 if p not in has:
866 if p not in has:
867 visit.append(p)
867 visit.append(p)
868 missing = list(missing)
868 missing = list(missing)
869 missing.sort()
869 missing.sort()
870 return has, [self.node(miss) for miss in missing]
870 return has, [self.node(miss) for miss in missing]
871
871
872 def incrementalmissingrevs(self, common=None):
872 def incrementalmissingrevs(self, common=None):
873 """Return an object that can be used to incrementally compute the
873 """Return an object that can be used to incrementally compute the
874 revision numbers of the ancestors of arbitrary sets that are not
874 revision numbers of the ancestors of arbitrary sets that are not
875 ancestors of common. This is an ancestor.incrementalmissingancestors
875 ancestors of common. This is an ancestor.incrementalmissingancestors
876 object.
876 object.
877
877
878 'common' is a list of revision numbers. If common is not supplied, uses
878 'common' is a list of revision numbers. If common is not supplied, uses
879 nullrev.
879 nullrev.
880 """
880 """
881 if common is None:
881 if common is None:
882 common = [nullrev]
882 common = [nullrev]
883
883
884 if rustancestor is not None:
884 if rustancestor is not None:
885 return rustancestor.MissingAncestors(self.index, common)
885 return rustancestor.MissingAncestors(self.index, common)
886 return ancestor.incrementalmissingancestors(self.parentrevs, common)
886 return ancestor.incrementalmissingancestors(self.parentrevs, common)
887
887
888 def findmissingrevs(self, common=None, heads=None):
888 def findmissingrevs(self, common=None, heads=None):
889 """Return the revision numbers of the ancestors of heads that
889 """Return the revision numbers of the ancestors of heads that
890 are not ancestors of common.
890 are not ancestors of common.
891
891
892 More specifically, return a list of revision numbers corresponding to
892 More specifically, return a list of revision numbers corresponding to
893 nodes N such that every N satisfies the following constraints:
893 nodes N such that every N satisfies the following constraints:
894
894
895 1. N is an ancestor of some node in 'heads'
895 1. N is an ancestor of some node in 'heads'
896 2. N is not an ancestor of any node in 'common'
896 2. N is not an ancestor of any node in 'common'
897
897
898 The list is sorted by revision number, meaning it is
898 The list is sorted by revision number, meaning it is
899 topologically sorted.
899 topologically sorted.
900
900
901 'heads' and 'common' are both lists of revision numbers. If heads is
901 'heads' and 'common' are both lists of revision numbers. If heads is
902 not supplied, uses all of the revlog's heads. If common is not
902 not supplied, uses all of the revlog's heads. If common is not
903 supplied, uses nullid."""
903 supplied, uses nullid."""
904 if common is None:
904 if common is None:
905 common = [nullrev]
905 common = [nullrev]
906 if heads is None:
906 if heads is None:
907 heads = self.headrevs()
907 heads = self.headrevs()
908
908
909 inc = self.incrementalmissingrevs(common=common)
909 inc = self.incrementalmissingrevs(common=common)
910 return inc.missingancestors(heads)
910 return inc.missingancestors(heads)
911
911
912 def findmissing(self, common=None, heads=None):
912 def findmissing(self, common=None, heads=None):
913 """Return the ancestors of heads that are not ancestors of common.
913 """Return the ancestors of heads that are not ancestors of common.
914
914
915 More specifically, return a list of nodes N such that every N
915 More specifically, return a list of nodes N such that every N
916 satisfies the following constraints:
916 satisfies the following constraints:
917
917
918 1. N is an ancestor of some node in 'heads'
918 1. N is an ancestor of some node in 'heads'
919 2. N is not an ancestor of any node in 'common'
919 2. N is not an ancestor of any node in 'common'
920
920
921 The list is sorted by revision number, meaning it is
921 The list is sorted by revision number, meaning it is
922 topologically sorted.
922 topologically sorted.
923
923
924 'heads' and 'common' are both lists of node IDs. If heads is
924 'heads' and 'common' are both lists of node IDs. If heads is
925 not supplied, uses all of the revlog's heads. If common is not
925 not supplied, uses all of the revlog's heads. If common is not
926 supplied, uses nullid."""
926 supplied, uses nullid."""
927 if common is None:
927 if common is None:
928 common = [nullid]
928 common = [nullid]
929 if heads is None:
929 if heads is None:
930 heads = self.heads()
930 heads = self.heads()
931
931
932 common = [self.rev(n) for n in common]
932 common = [self.rev(n) for n in common]
933 heads = [self.rev(n) for n in heads]
933 heads = [self.rev(n) for n in heads]
934
934
935 inc = self.incrementalmissingrevs(common=common)
935 inc = self.incrementalmissingrevs(common=common)
936 return [self.node(r) for r in inc.missingancestors(heads)]
936 return [self.node(r) for r in inc.missingancestors(heads)]
937
937
938 def nodesbetween(self, roots=None, heads=None):
938 def nodesbetween(self, roots=None, heads=None):
939 """Return a topological path from 'roots' to 'heads'.
939 """Return a topological path from 'roots' to 'heads'.
940
940
941 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
941 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
942 topologically sorted list of all nodes N that satisfy both of
942 topologically sorted list of all nodes N that satisfy both of
943 these constraints:
943 these constraints:
944
944
945 1. N is a descendant of some node in 'roots'
945 1. N is a descendant of some node in 'roots'
946 2. N is an ancestor of some node in 'heads'
946 2. N is an ancestor of some node in 'heads'
947
947
948 Every node is considered to be both a descendant and an ancestor
948 Every node is considered to be both a descendant and an ancestor
949 of itself, so every reachable node in 'roots' and 'heads' will be
949 of itself, so every reachable node in 'roots' and 'heads' will be
950 included in 'nodes'.
950 included in 'nodes'.
951
951
952 'outroots' is the list of reachable nodes in 'roots', i.e., the
952 'outroots' is the list of reachable nodes in 'roots', i.e., the
953 subset of 'roots' that is returned in 'nodes'. Likewise,
953 subset of 'roots' that is returned in 'nodes'. Likewise,
954 'outheads' is the subset of 'heads' that is also in 'nodes'.
954 'outheads' is the subset of 'heads' that is also in 'nodes'.
955
955
956 'roots' and 'heads' are both lists of node IDs. If 'roots' is
956 'roots' and 'heads' are both lists of node IDs. If 'roots' is
957 unspecified, uses nullid as the only root. If 'heads' is
957 unspecified, uses nullid as the only root. If 'heads' is
958 unspecified, uses list of all of the revlog's heads."""
958 unspecified, uses list of all of the revlog's heads."""
959 nonodes = ([], [], [])
959 nonodes = ([], [], [])
960 if roots is not None:
960 if roots is not None:
961 roots = list(roots)
961 roots = list(roots)
962 if not roots:
962 if not roots:
963 return nonodes
963 return nonodes
964 lowestrev = min([self.rev(n) for n in roots])
964 lowestrev = min([self.rev(n) for n in roots])
965 else:
965 else:
966 roots = [nullid] # Everybody's a descendant of nullid
966 roots = [nullid] # Everybody's a descendant of nullid
967 lowestrev = nullrev
967 lowestrev = nullrev
968 if (lowestrev == nullrev) and (heads is None):
968 if (lowestrev == nullrev) and (heads is None):
969 # We want _all_ the nodes!
969 # We want _all_ the nodes!
970 return ([self.node(r) for r in self], [nullid], list(self.heads()))
970 return ([self.node(r) for r in self], [nullid], list(self.heads()))
971 if heads is None:
971 if heads is None:
972 # All nodes are ancestors, so the latest ancestor is the last
972 # All nodes are ancestors, so the latest ancestor is the last
973 # node.
973 # node.
974 highestrev = len(self) - 1
974 highestrev = len(self) - 1
975 # Set ancestors to None to signal that every node is an ancestor.
975 # Set ancestors to None to signal that every node is an ancestor.
976 ancestors = None
976 ancestors = None
977 # Set heads to an empty dictionary for later discovery of heads
977 # Set heads to an empty dictionary for later discovery of heads
978 heads = {}
978 heads = {}
979 else:
979 else:
980 heads = list(heads)
980 heads = list(heads)
981 if not heads:
981 if not heads:
982 return nonodes
982 return nonodes
983 ancestors = set()
983 ancestors = set()
984 # Turn heads into a dictionary so we can remove 'fake' heads.
984 # Turn heads into a dictionary so we can remove 'fake' heads.
985 # Also, later we will be using it to filter out the heads we can't
985 # Also, later we will be using it to filter out the heads we can't
986 # find from roots.
986 # find from roots.
987 heads = dict.fromkeys(heads, False)
987 heads = dict.fromkeys(heads, False)
988 # Start at the top and keep marking parents until we're done.
988 # Start at the top and keep marking parents until we're done.
989 nodestotag = set(heads)
989 nodestotag = set(heads)
990 # Remember where the top was so we can use it as a limit later.
990 # Remember where the top was so we can use it as a limit later.
991 highestrev = max([self.rev(n) for n in nodestotag])
991 highestrev = max([self.rev(n) for n in nodestotag])
992 while nodestotag:
992 while nodestotag:
993 # grab a node to tag
993 # grab a node to tag
994 n = nodestotag.pop()
994 n = nodestotag.pop()
995 # Never tag nullid
995 # Never tag nullid
996 if n == nullid:
996 if n == nullid:
997 continue
997 continue
998 # A node's revision number represents its place in a
998 # A node's revision number represents its place in a
999 # topologically sorted list of nodes.
999 # topologically sorted list of nodes.
1000 r = self.rev(n)
1000 r = self.rev(n)
1001 if r >= lowestrev:
1001 if r >= lowestrev:
1002 if n not in ancestors:
1002 if n not in ancestors:
1003 # If we are possibly a descendant of one of the roots
1003 # If we are possibly a descendant of one of the roots
1004 # and we haven't already been marked as an ancestor
1004 # and we haven't already been marked as an ancestor
1005 ancestors.add(n) # Mark as ancestor
1005 ancestors.add(n) # Mark as ancestor
1006 # Add non-nullid parents to list of nodes to tag.
1006 # Add non-nullid parents to list of nodes to tag.
1007 nodestotag.update([p for p in self.parents(n) if
1007 nodestotag.update([p for p in self.parents(n) if
1008 p != nullid])
1008 p != nullid])
1009 elif n in heads: # We've seen it before, is it a fake head?
1009 elif n in heads: # We've seen it before, is it a fake head?
1010 # So it is, real heads should not be the ancestors of
1010 # So it is, real heads should not be the ancestors of
1011 # any other heads.
1011 # any other heads.
1012 heads.pop(n)
1012 heads.pop(n)
1013 if not ancestors:
1013 if not ancestors:
1014 return nonodes
1014 return nonodes
1015 # Now that we have our set of ancestors, we want to remove any
1015 # Now that we have our set of ancestors, we want to remove any
1016 # roots that are not ancestors.
1016 # roots that are not ancestors.
1017
1017
1018 # If one of the roots was nullid, everything is included anyway.
1018 # If one of the roots was nullid, everything is included anyway.
1019 if lowestrev > nullrev:
1019 if lowestrev > nullrev:
1020 # But, since we weren't, let's recompute the lowest rev to not
1020 # But, since we weren't, let's recompute the lowest rev to not
1021 # include roots that aren't ancestors.
1021 # include roots that aren't ancestors.
1022
1022
1023 # Filter out roots that aren't ancestors of heads
1023 # Filter out roots that aren't ancestors of heads
1024 roots = [root for root in roots if root in ancestors]
1024 roots = [root for root in roots if root in ancestors]
1025 # Recompute the lowest revision
1025 # Recompute the lowest revision
1026 if roots:
1026 if roots:
1027 lowestrev = min([self.rev(root) for root in roots])
1027 lowestrev = min([self.rev(root) for root in roots])
1028 else:
1028 else:
1029 # No more roots? Return empty list
1029 # No more roots? Return empty list
1030 return nonodes
1030 return nonodes
1031 else:
1031 else:
1032 # We are descending from nullid, and don't need to care about
1032 # We are descending from nullid, and don't need to care about
1033 # any other roots.
1033 # any other roots.
1034 lowestrev = nullrev
1034 lowestrev = nullrev
1035 roots = [nullid]
1035 roots = [nullid]
1036 # Transform our roots list into a set.
1036 # Transform our roots list into a set.
1037 descendants = set(roots)
1037 descendants = set(roots)
1038 # Also, keep the original roots so we can filter out roots that aren't
1038 # Also, keep the original roots so we can filter out roots that aren't
1039 # 'real' roots (i.e. are descended from other roots).
1039 # 'real' roots (i.e. are descended from other roots).
1040 roots = descendants.copy()
1040 roots = descendants.copy()
1041 # Our topologically sorted list of output nodes.
1041 # Our topologically sorted list of output nodes.
1042 orderedout = []
1042 orderedout = []
1043 # Don't start at nullid since we don't want nullid in our output list,
1043 # Don't start at nullid since we don't want nullid in our output list,
1044 # and if nullid shows up in descendants, empty parents will look like
1044 # and if nullid shows up in descendants, empty parents will look like
1045 # they're descendants.
1045 # they're descendants.
1046 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1046 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1047 n = self.node(r)
1047 n = self.node(r)
1048 isdescendant = False
1048 isdescendant = False
1049 if lowestrev == nullrev: # Everybody is a descendant of nullid
1049 if lowestrev == nullrev: # Everybody is a descendant of nullid
1050 isdescendant = True
1050 isdescendant = True
1051 elif n in descendants:
1051 elif n in descendants:
1052 # n is already a descendant
1052 # n is already a descendant
1053 isdescendant = True
1053 isdescendant = True
1054 # This check only needs to be done here because all the roots
1054 # This check only needs to be done here because all the roots
1055 # will start being marked is descendants before the loop.
1055 # will start being marked is descendants before the loop.
1056 if n in roots:
1056 if n in roots:
1057 # If n was a root, check if it's a 'real' root.
1057 # If n was a root, check if it's a 'real' root.
1058 p = tuple(self.parents(n))
1058 p = tuple(self.parents(n))
1059 # If any of its parents are descendants, it's not a root.
1059 # If any of its parents are descendants, it's not a root.
1060 if (p[0] in descendants) or (p[1] in descendants):
1060 if (p[0] in descendants) or (p[1] in descendants):
1061 roots.remove(n)
1061 roots.remove(n)
1062 else:
1062 else:
1063 p = tuple(self.parents(n))
1063 p = tuple(self.parents(n))
1064 # A node is a descendant if either of its parents are
1064 # A node is a descendant if either of its parents are
1065 # descendants. (We seeded the dependents list with the roots
1065 # descendants. (We seeded the dependents list with the roots
1066 # up there, remember?)
1066 # up there, remember?)
1067 if (p[0] in descendants) or (p[1] in descendants):
1067 if (p[0] in descendants) or (p[1] in descendants):
1068 descendants.add(n)
1068 descendants.add(n)
1069 isdescendant = True
1069 isdescendant = True
1070 if isdescendant and ((ancestors is None) or (n in ancestors)):
1070 if isdescendant and ((ancestors is None) or (n in ancestors)):
1071 # Only include nodes that are both descendants and ancestors.
1071 # Only include nodes that are both descendants and ancestors.
1072 orderedout.append(n)
1072 orderedout.append(n)
1073 if (ancestors is not None) and (n in heads):
1073 if (ancestors is not None) and (n in heads):
1074 # We're trying to figure out which heads are reachable
1074 # We're trying to figure out which heads are reachable
1075 # from roots.
1075 # from roots.
1076 # Mark this head as having been reached
1076 # Mark this head as having been reached
1077 heads[n] = True
1077 heads[n] = True
1078 elif ancestors is None:
1078 elif ancestors is None:
1079 # Otherwise, we're trying to discover the heads.
1079 # Otherwise, we're trying to discover the heads.
1080 # Assume this is a head because if it isn't, the next step
1080 # Assume this is a head because if it isn't, the next step
1081 # will eventually remove it.
1081 # will eventually remove it.
1082 heads[n] = True
1082 heads[n] = True
1083 # But, obviously its parents aren't.
1083 # But, obviously its parents aren't.
1084 for p in self.parents(n):
1084 for p in self.parents(n):
1085 heads.pop(p, None)
1085 heads.pop(p, None)
1086 heads = [head for head, flag in heads.iteritems() if flag]
1086 heads = [head for head, flag in heads.iteritems() if flag]
1087 roots = list(roots)
1087 roots = list(roots)
1088 assert orderedout
1088 assert orderedout
1089 assert roots
1089 assert roots
1090 assert heads
1090 assert heads
1091 return (orderedout, roots, heads)
1091 return (orderedout, roots, heads)
1092
1092
1093 def headrevs(self, revs=None):
1093 def headrevs(self, revs=None):
1094 if revs is None:
1094 if revs is None:
1095 try:
1095 try:
1096 return self.index.headrevs()
1096 return self.index.headrevs()
1097 except AttributeError:
1097 except AttributeError:
1098 return self._headrevs()
1098 return self._headrevs()
1099 if rustdagop is not None:
1099 if rustdagop is not None:
1100 return rustdagop.headrevs(self.index, revs)
1100 return rustdagop.headrevs(self.index, revs)
1101 return dagop.headrevs(revs, self._uncheckedparentrevs)
1101 return dagop.headrevs(revs, self._uncheckedparentrevs)
1102
1102
1103 def computephases(self, roots):
1103 def computephases(self, roots):
1104 return self.index.computephasesmapsets(roots)
1104 return self.index.computephasesmapsets(roots)
1105
1105
1106 def _headrevs(self):
1106 def _headrevs(self):
1107 count = len(self)
1107 count = len(self)
1108 if not count:
1108 if not count:
1109 return [nullrev]
1109 return [nullrev]
1110 # we won't iter over filtered rev so nobody is a head at start
1110 # we won't iter over filtered rev so nobody is a head at start
1111 ishead = [0] * (count + 1)
1111 ishead = [0] * (count + 1)
1112 index = self.index
1112 index = self.index
1113 for r in self:
1113 for r in self:
1114 ishead[r] = 1 # I may be an head
1114 ishead[r] = 1 # I may be an head
1115 e = index[r]
1115 e = index[r]
1116 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1116 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1117 return [r for r, val in enumerate(ishead) if val]
1117 return [r for r, val in enumerate(ishead) if val]
1118
1118
1119 def heads(self, start=None, stop=None):
1119 def heads(self, start=None, stop=None):
1120 """return the list of all nodes that have no children
1120 """return the list of all nodes that have no children
1121
1121
1122 if start is specified, only heads that are descendants of
1122 if start is specified, only heads that are descendants of
1123 start will be returned
1123 start will be returned
1124 if stop is specified, it will consider all the revs from stop
1124 if stop is specified, it will consider all the revs from stop
1125 as if they had no children
1125 as if they had no children
1126 """
1126 """
1127 if start is None and stop is None:
1127 if start is None and stop is None:
1128 if not len(self):
1128 if not len(self):
1129 return [nullid]
1129 return [nullid]
1130 return [self.node(r) for r in self.headrevs()]
1130 return [self.node(r) for r in self.headrevs()]
1131
1131
1132 if start is None:
1132 if start is None:
1133 start = nullrev
1133 start = nullrev
1134 else:
1134 else:
1135 start = self.rev(start)
1135 start = self.rev(start)
1136
1136
1137 stoprevs = set(self.rev(n) for n in stop or [])
1137 stoprevs = set(self.rev(n) for n in stop or [])
1138
1138
1139 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1139 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1140 stoprevs=stoprevs)
1140 stoprevs=stoprevs)
1141
1141
1142 return [self.node(rev) for rev in revs]
1142 return [self.node(rev) for rev in revs]
1143
1143
1144 def children(self, node):
1144 def children(self, node):
1145 """find the children of a given node"""
1145 """find the children of a given node"""
1146 c = []
1146 c = []
1147 p = self.rev(node)
1147 p = self.rev(node)
1148 for r in self.revs(start=p + 1):
1148 for r in self.revs(start=p + 1):
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1150 if prevs:
1150 if prevs:
1151 for pr in prevs:
1151 for pr in prevs:
1152 if pr == p:
1152 if pr == p:
1153 c.append(self.node(r))
1153 c.append(self.node(r))
1154 elif p == nullrev:
1154 elif p == nullrev:
1155 c.append(self.node(r))
1155 c.append(self.node(r))
1156 return c
1156 return c
1157
1157
1158 def commonancestorsheads(self, a, b):
1158 def commonancestorsheads(self, a, b):
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1160 a, b = self.rev(a), self.rev(b)
1160 a, b = self.rev(a), self.rev(b)
1161 ancs = self._commonancestorsheads(a, b)
1161 ancs = self._commonancestorsheads(a, b)
1162 return pycompat.maplist(self.node, ancs)
1162 return pycompat.maplist(self.node, ancs)
1163
1163
1164 def _commonancestorsheads(self, *revs):
1164 def _commonancestorsheads(self, *revs):
1165 """calculate all the heads of the common ancestors of revs"""
1165 """calculate all the heads of the common ancestors of revs"""
1166 try:
1166 try:
1167 ancs = self.index.commonancestorsheads(*revs)
1167 ancs = self.index.commonancestorsheads(*revs)
1168 except (AttributeError, OverflowError): # C implementation failed
1168 except (AttributeError, OverflowError): # C implementation failed
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1170 return ancs
1170 return ancs
1171
1171
1172 def isancestor(self, a, b):
1172 def isancestor(self, a, b):
1173 """return True if node a is an ancestor of node b
1173 """return True if node a is an ancestor of node b
1174
1174
1175 A revision is considered an ancestor of itself."""
1175 A revision is considered an ancestor of itself."""
1176 a, b = self.rev(a), self.rev(b)
1176 a, b = self.rev(a), self.rev(b)
1177 return self.isancestorrev(a, b)
1177 return self.isancestorrev(a, b)
1178
1178
1179 def isancestorrev(self, a, b):
1179 def isancestorrev(self, a, b):
1180 """return True if revision a is an ancestor of revision b
1180 """return True if revision a is an ancestor of revision b
1181
1181
1182 A revision is considered an ancestor of itself.
1182 A revision is considered an ancestor of itself.
1183
1183
1184 The implementation of this is trivial but the use of
1184 The implementation of this is trivial but the use of
1185 reachableroots is not."""
1185 reachableroots is not."""
1186 if a == nullrev:
1186 if a == nullrev:
1187 return True
1187 return True
1188 elif a == b:
1188 elif a == b:
1189 return True
1189 return True
1190 elif a > b:
1190 elif a > b:
1191 return False
1191 return False
1192 return bool(self.reachableroots(a, [b], [a], includepath=False))
1192 return bool(self.reachableroots(a, [b], [a], includepath=False))
1193
1193
1194 def reachableroots(self, minroot, heads, roots, includepath=False):
1194 def reachableroots(self, minroot, heads, roots, includepath=False):
1195 """return (heads(::<roots> and <roots>::<heads>))
1195 """return (heads(::<roots> and <roots>::<heads>))
1196
1196
1197 If includepath is True, return (<roots>::<heads>)."""
1197 If includepath is True, return (<roots>::<heads>)."""
1198 try:
1198 try:
1199 return self.index.reachableroots2(minroot, heads, roots,
1199 return self.index.reachableroots2(minroot, heads, roots,
1200 includepath)
1200 includepath)
1201 except AttributeError:
1201 except AttributeError:
1202 return dagop._reachablerootspure(self.parentrevs,
1202 return dagop._reachablerootspure(self.parentrevs,
1203 minroot, roots, heads, includepath)
1203 minroot, roots, heads, includepath)
1204
1204
1205 def ancestor(self, a, b):
1205 def ancestor(self, a, b):
1206 """calculate the "best" common ancestor of nodes a and b"""
1206 """calculate the "best" common ancestor of nodes a and b"""
1207
1207
1208 a, b = self.rev(a), self.rev(b)
1208 a, b = self.rev(a), self.rev(b)
1209 try:
1209 try:
1210 ancs = self.index.ancestors(a, b)
1210 ancs = self.index.ancestors(a, b)
1211 except (AttributeError, OverflowError):
1211 except (AttributeError, OverflowError):
1212 ancs = ancestor.ancestors(self.parentrevs, a, b)
1212 ancs = ancestor.ancestors(self.parentrevs, a, b)
1213 if ancs:
1213 if ancs:
1214 # choose a consistent winner when there's a tie
1214 # choose a consistent winner when there's a tie
1215 return min(map(self.node, ancs))
1215 return min(map(self.node, ancs))
1216 return nullid
1216 return nullid
1217
1217
1218 def _match(self, id):
1218 def _match(self, id):
1219 if isinstance(id, int):
1219 if isinstance(id, int):
1220 # rev
1220 # rev
1221 return self.node(id)
1221 return self.node(id)
1222 if len(id) == 20:
1222 if len(id) == 20:
1223 # possibly a binary node
1223 # possibly a binary node
1224 # odds of a binary node being all hex in ASCII are 1 in 10**25
1224 # odds of a binary node being all hex in ASCII are 1 in 10**25
1225 try:
1225 try:
1226 node = id
1226 node = id
1227 self.rev(node) # quick search the index
1227 self.rev(node) # quick search the index
1228 return node
1228 return node
1229 except error.LookupError:
1229 except error.LookupError:
1230 pass # may be partial hex id
1230 pass # may be partial hex id
1231 try:
1231 try:
1232 # str(rev)
1232 # str(rev)
1233 rev = int(id)
1233 rev = int(id)
1234 if "%d" % rev != id:
1234 if "%d" % rev != id:
1235 raise ValueError
1235 raise ValueError
1236 if rev < 0:
1236 if rev < 0:
1237 rev = len(self) + rev
1237 rev = len(self) + rev
1238 if rev < 0 or rev >= len(self):
1238 if rev < 0 or rev >= len(self):
1239 raise ValueError
1239 raise ValueError
1240 return self.node(rev)
1240 return self.node(rev)
1241 except (ValueError, OverflowError):
1241 except (ValueError, OverflowError):
1242 pass
1242 pass
1243 if len(id) == 40:
1243 if len(id) == 40:
1244 try:
1244 try:
1245 # a full hex nodeid?
1245 # a full hex nodeid?
1246 node = bin(id)
1246 node = bin(id)
1247 self.rev(node)
1247 self.rev(node)
1248 return node
1248 return node
1249 except (TypeError, error.LookupError):
1249 except (TypeError, error.LookupError):
1250 pass
1250 pass
1251
1251
1252 def _partialmatch(self, id):
1252 def _partialmatch(self, id):
1253 # we don't care wdirfilenodeids as they should be always full hash
1253 # we don't care wdirfilenodeids as they should be always full hash
1254 maybewdir = wdirhex.startswith(id)
1254 maybewdir = wdirhex.startswith(id)
1255 try:
1255 try:
1256 partial = self.index.partialmatch(id)
1256 partial = self.index.partialmatch(id)
1257 if partial and self.hasnode(partial):
1257 if partial and self.hasnode(partial):
1258 if maybewdir:
1258 if maybewdir:
1259 # single 'ff...' match in radix tree, ambiguous with wdir
1259 # single 'ff...' match in radix tree, ambiguous with wdir
1260 raise error.RevlogError
1260 raise error.RevlogError
1261 return partial
1261 return partial
1262 if maybewdir:
1262 if maybewdir:
1263 # no 'ff...' match in radix tree, wdir identified
1263 # no 'ff...' match in radix tree, wdir identified
1264 raise error.WdirUnsupported
1264 raise error.WdirUnsupported
1265 return None
1265 return None
1266 except error.RevlogError:
1266 except error.RevlogError:
1267 # parsers.c radix tree lookup gave multiple matches
1267 # parsers.c radix tree lookup gave multiple matches
1268 # fast path: for unfiltered changelog, radix tree is accurate
1268 # fast path: for unfiltered changelog, radix tree is accurate
1269 if not getattr(self, 'filteredrevs', None):
1269 if not getattr(self, 'filteredrevs', None):
1270 raise error.AmbiguousPrefixLookupError(
1270 raise error.AmbiguousPrefixLookupError(
1271 id, self.indexfile, _('ambiguous identifier'))
1271 id, self.indexfile, _('ambiguous identifier'))
1272 # fall through to slow path that filters hidden revisions
1272 # fall through to slow path that filters hidden revisions
1273 except (AttributeError, ValueError):
1273 except (AttributeError, ValueError):
1274 # we are pure python, or key was too short to search radix tree
1274 # we are pure python, or key was too short to search radix tree
1275 pass
1275 pass
1276
1276
1277 if id in self._pcache:
1277 if id in self._pcache:
1278 return self._pcache[id]
1278 return self._pcache[id]
1279
1279
1280 if len(id) <= 40:
1280 if len(id) <= 40:
1281 try:
1281 try:
1282 # hex(node)[:...]
1282 # hex(node)[:...]
1283 l = len(id) // 2 # grab an even number of digits
1283 l = len(id) // 2 # grab an even number of digits
1284 prefix = bin(id[:l * 2])
1284 prefix = bin(id[:l * 2])
1285 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1285 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1286 nl = [n for n in nl if hex(n).startswith(id) and
1286 nl = [n for n in nl if hex(n).startswith(id) and
1287 self.hasnode(n)]
1287 self.hasnode(n)]
1288 if nullhex.startswith(id):
1288 if nullhex.startswith(id):
1289 nl.append(nullid)
1289 nl.append(nullid)
1290 if len(nl) > 0:
1290 if len(nl) > 0:
1291 if len(nl) == 1 and not maybewdir:
1291 if len(nl) == 1 and not maybewdir:
1292 self._pcache[id] = nl[0]
1292 self._pcache[id] = nl[0]
1293 return nl[0]
1293 return nl[0]
1294 raise error.AmbiguousPrefixLookupError(
1294 raise error.AmbiguousPrefixLookupError(
1295 id, self.indexfile, _('ambiguous identifier'))
1295 id, self.indexfile, _('ambiguous identifier'))
1296 if maybewdir:
1296 if maybewdir:
1297 raise error.WdirUnsupported
1297 raise error.WdirUnsupported
1298 return None
1298 return None
1299 except TypeError:
1299 except TypeError:
1300 pass
1300 pass
1301
1301
1302 def lookup(self, id):
1302 def lookup(self, id):
1303 """locate a node based on:
1303 """locate a node based on:
1304 - revision number or str(revision number)
1304 - revision number or str(revision number)
1305 - nodeid or subset of hex nodeid
1305 - nodeid or subset of hex nodeid
1306 """
1306 """
1307 n = self._match(id)
1307 n = self._match(id)
1308 if n is not None:
1308 if n is not None:
1309 return n
1309 return n
1310 n = self._partialmatch(id)
1310 n = self._partialmatch(id)
1311 if n:
1311 if n:
1312 return n
1312 return n
1313
1313
1314 raise error.LookupError(id, self.indexfile, _('no match found'))
1314 raise error.LookupError(id, self.indexfile, _('no match found'))
1315
1315
1316 def shortest(self, node, minlength=1):
1316 def shortest(self, node, minlength=1):
1317 """Find the shortest unambiguous prefix that matches node."""
1317 """Find the shortest unambiguous prefix that matches node."""
1318 def isvalid(prefix):
1318 def isvalid(prefix):
1319 try:
1319 try:
1320 matchednode = self._partialmatch(prefix)
1320 matchednode = self._partialmatch(prefix)
1321 except error.AmbiguousPrefixLookupError:
1321 except error.AmbiguousPrefixLookupError:
1322 return False
1322 return False
1323 except error.WdirUnsupported:
1323 except error.WdirUnsupported:
1324 # single 'ff...' match
1324 # single 'ff...' match
1325 return True
1325 return True
1326 if matchednode is None:
1326 if matchednode is None:
1327 raise error.LookupError(node, self.indexfile, _('no node'))
1327 raise error.LookupError(node, self.indexfile, _('no node'))
1328 return True
1328 return True
1329
1329
1330 def maybewdir(prefix):
1330 def maybewdir(prefix):
1331 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1331 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1332
1332
1333 hexnode = hex(node)
1333 hexnode = hex(node)
1334
1334
1335 def disambiguate(hexnode, minlength):
1335 def disambiguate(hexnode, minlength):
1336 """Disambiguate against wdirid."""
1336 """Disambiguate against wdirid."""
1337 for length in range(minlength, 41):
1337 for length in range(minlength, 41):
1338 prefix = hexnode[:length]
1338 prefix = hexnode[:length]
1339 if not maybewdir(prefix):
1339 if not maybewdir(prefix):
1340 return prefix
1340 return prefix
1341
1341
1342 if not getattr(self, 'filteredrevs', None):
1342 if not getattr(self, 'filteredrevs', None):
1343 try:
1343 try:
1344 length = max(self.index.shortest(node), minlength)
1344 length = max(self.index.shortest(node), minlength)
1345 return disambiguate(hexnode, length)
1345 return disambiguate(hexnode, length)
1346 except error.RevlogError:
1346 except error.RevlogError:
1347 if node != wdirid:
1347 if node != wdirid:
1348 raise error.LookupError(node, self.indexfile, _('no node'))
1348 raise error.LookupError(node, self.indexfile, _('no node'))
1349 except AttributeError:
1349 except AttributeError:
1350 # Fall through to pure code
1350 # Fall through to pure code
1351 pass
1351 pass
1352
1352
1353 if node == wdirid:
1353 if node == wdirid:
1354 for length in range(minlength, 41):
1354 for length in range(minlength, 41):
1355 prefix = hexnode[:length]
1355 prefix = hexnode[:length]
1356 if isvalid(prefix):
1356 if isvalid(prefix):
1357 return prefix
1357 return prefix
1358
1358
1359 for length in range(minlength, 41):
1359 for length in range(minlength, 41):
1360 prefix = hexnode[:length]
1360 prefix = hexnode[:length]
1361 if isvalid(prefix):
1361 if isvalid(prefix):
1362 return disambiguate(hexnode, length)
1362 return disambiguate(hexnode, length)
1363
1363
1364 def cmp(self, node, text):
1364 def cmp(self, node, text):
1365 """compare text with a given file revision
1365 """compare text with a given file revision
1366
1366
1367 returns True if text is different than what is stored.
1367 returns True if text is different than what is stored.
1368 """
1368 """
1369 p1, p2 = self.parents(node)
1369 p1, p2 = self.parents(node)
1370 return storageutil.hashrevisionsha1(text, p1, p2) != node
1370 return storageutil.hashrevisionsha1(text, p1, p2) != node
1371
1371
1372 def _cachesegment(self, offset, data):
1372 def _cachesegment(self, offset, data):
1373 """Add a segment to the revlog cache.
1373 """Add a segment to the revlog cache.
1374
1374
1375 Accepts an absolute offset and the data that is at that location.
1375 Accepts an absolute offset and the data that is at that location.
1376 """
1376 """
1377 o, d = self._chunkcache
1377 o, d = self._chunkcache
1378 # try to add to existing cache
1378 # try to add to existing cache
1379 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1379 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1380 self._chunkcache = o, d + data
1380 self._chunkcache = o, d + data
1381 else:
1381 else:
1382 self._chunkcache = offset, data
1382 self._chunkcache = offset, data
1383
1383
1384 def _readsegment(self, offset, length, df=None):
1384 def _readsegment(self, offset, length, df=None):
1385 """Load a segment of raw data from the revlog.
1385 """Load a segment of raw data from the revlog.
1386
1386
1387 Accepts an absolute offset, length to read, and an optional existing
1387 Accepts an absolute offset, length to read, and an optional existing
1388 file handle to read from.
1388 file handle to read from.
1389
1389
1390 If an existing file handle is passed, it will be seeked and the
1390 If an existing file handle is passed, it will be seeked and the
1391 original seek position will NOT be restored.
1391 original seek position will NOT be restored.
1392
1392
1393 Returns a str or buffer of raw byte data.
1393 Returns a str or buffer of raw byte data.
1394
1394
1395 Raises if the requested number of bytes could not be read.
1395 Raises if the requested number of bytes could not be read.
1396 """
1396 """
1397 # Cache data both forward and backward around the requested
1397 # Cache data both forward and backward around the requested
1398 # data, in a fixed size window. This helps speed up operations
1398 # data, in a fixed size window. This helps speed up operations
1399 # involving reading the revlog backwards.
1399 # involving reading the revlog backwards.
1400 cachesize = self._chunkcachesize
1400 cachesize = self._chunkcachesize
1401 realoffset = offset & ~(cachesize - 1)
1401 realoffset = offset & ~(cachesize - 1)
1402 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1402 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1403 - realoffset)
1403 - realoffset)
1404 with self._datareadfp(df) as df:
1404 with self._datareadfp(df) as df:
1405 df.seek(realoffset)
1405 df.seek(realoffset)
1406 d = df.read(reallength)
1406 d = df.read(reallength)
1407
1407
1408 self._cachesegment(realoffset, d)
1408 self._cachesegment(realoffset, d)
1409 if offset != realoffset or reallength != length:
1409 if offset != realoffset or reallength != length:
1410 startoffset = offset - realoffset
1410 startoffset = offset - realoffset
1411 if len(d) - startoffset < length:
1411 if len(d) - startoffset < length:
1412 raise error.RevlogError(
1412 raise error.RevlogError(
1413 _('partial read of revlog %s; expected %d bytes from '
1413 _('partial read of revlog %s; expected %d bytes from '
1414 'offset %d, got %d') %
1414 'offset %d, got %d') %
1415 (self.indexfile if self._inline else self.datafile,
1415 (self.indexfile if self._inline else self.datafile,
1416 length, realoffset, len(d) - startoffset))
1416 length, realoffset, len(d) - startoffset))
1417
1417
1418 return util.buffer(d, startoffset, length)
1418 return util.buffer(d, startoffset, length)
1419
1419
1420 if len(d) < length:
1420 if len(d) < length:
1421 raise error.RevlogError(
1421 raise error.RevlogError(
1422 _('partial read of revlog %s; expected %d bytes from offset '
1422 _('partial read of revlog %s; expected %d bytes from offset '
1423 '%d, got %d') %
1423 '%d, got %d') %
1424 (self.indexfile if self._inline else self.datafile,
1424 (self.indexfile if self._inline else self.datafile,
1425 length, offset, len(d)))
1425 length, offset, len(d)))
1426
1426
1427 return d
1427 return d
1428
1428
1429 def _getsegment(self, offset, length, df=None):
1429 def _getsegment(self, offset, length, df=None):
1430 """Obtain a segment of raw data from the revlog.
1430 """Obtain a segment of raw data from the revlog.
1431
1431
1432 Accepts an absolute offset, length of bytes to obtain, and an
1432 Accepts an absolute offset, length of bytes to obtain, and an
1433 optional file handle to the already-opened revlog. If the file
1433 optional file handle to the already-opened revlog. If the file
1434 handle is used, it's original seek position will not be preserved.
1434 handle is used, it's original seek position will not be preserved.
1435
1435
1436 Requests for data may be returned from a cache.
1436 Requests for data may be returned from a cache.
1437
1437
1438 Returns a str or a buffer instance of raw byte data.
1438 Returns a str or a buffer instance of raw byte data.
1439 """
1439 """
1440 o, d = self._chunkcache
1440 o, d = self._chunkcache
1441 l = len(d)
1441 l = len(d)
1442
1442
1443 # is it in the cache?
1443 # is it in the cache?
1444 cachestart = offset - o
1444 cachestart = offset - o
1445 cacheend = cachestart + length
1445 cacheend = cachestart + length
1446 if cachestart >= 0 and cacheend <= l:
1446 if cachestart >= 0 and cacheend <= l:
1447 if cachestart == 0 and cacheend == l:
1447 if cachestart == 0 and cacheend == l:
1448 return d # avoid a copy
1448 return d # avoid a copy
1449 return util.buffer(d, cachestart, cacheend - cachestart)
1449 return util.buffer(d, cachestart, cacheend - cachestart)
1450
1450
1451 return self._readsegment(offset, length, df=df)
1451 return self._readsegment(offset, length, df=df)
1452
1452
1453 def _getsegmentforrevs(self, startrev, endrev, df=None):
1453 def _getsegmentforrevs(self, startrev, endrev, df=None):
1454 """Obtain a segment of raw data corresponding to a range of revisions.
1454 """Obtain a segment of raw data corresponding to a range of revisions.
1455
1455
1456 Accepts the start and end revisions and an optional already-open
1456 Accepts the start and end revisions and an optional already-open
1457 file handle to be used for reading. If the file handle is read, its
1457 file handle to be used for reading. If the file handle is read, its
1458 seek position will not be preserved.
1458 seek position will not be preserved.
1459
1459
1460 Requests for data may be satisfied by a cache.
1460 Requests for data may be satisfied by a cache.
1461
1461
1462 Returns a 2-tuple of (offset, data) for the requested range of
1462 Returns a 2-tuple of (offset, data) for the requested range of
1463 revisions. Offset is the integer offset from the beginning of the
1463 revisions. Offset is the integer offset from the beginning of the
1464 revlog and data is a str or buffer of the raw byte data.
1464 revlog and data is a str or buffer of the raw byte data.
1465
1465
1466 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1466 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1467 to determine where each revision's data begins and ends.
1467 to determine where each revision's data begins and ends.
1468 """
1468 """
1469 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1469 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1470 # (functions are expensive).
1470 # (functions are expensive).
1471 index = self.index
1471 index = self.index
1472 istart = index[startrev]
1472 istart = index[startrev]
1473 start = int(istart[0] >> 16)
1473 start = int(istart[0] >> 16)
1474 if startrev == endrev:
1474 if startrev == endrev:
1475 end = start + istart[1]
1475 end = start + istart[1]
1476 else:
1476 else:
1477 iend = index[endrev]
1477 iend = index[endrev]
1478 end = int(iend[0] >> 16) + iend[1]
1478 end = int(iend[0] >> 16) + iend[1]
1479
1479
1480 if self._inline:
1480 if self._inline:
1481 start += (startrev + 1) * self._io.size
1481 start += (startrev + 1) * self._io.size
1482 end += (endrev + 1) * self._io.size
1482 end += (endrev + 1) * self._io.size
1483 length = end - start
1483 length = end - start
1484
1484
1485 return start, self._getsegment(start, length, df=df)
1485 return start, self._getsegment(start, length, df=df)
1486
1486
1487 def _chunk(self, rev, df=None):
1487 def _chunk(self, rev, df=None):
1488 """Obtain a single decompressed chunk for a revision.
1488 """Obtain a single decompressed chunk for a revision.
1489
1489
1490 Accepts an integer revision and an optional already-open file handle
1490 Accepts an integer revision and an optional already-open file handle
1491 to be used for reading. If used, the seek position of the file will not
1491 to be used for reading. If used, the seek position of the file will not
1492 be preserved.
1492 be preserved.
1493
1493
1494 Returns a str holding uncompressed data for the requested revision.
1494 Returns a str holding uncompressed data for the requested revision.
1495 """
1495 """
1496 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1496 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1497
1497
1498 def _chunks(self, revs, df=None, targetsize=None):
1498 def _chunks(self, revs, df=None, targetsize=None):
1499 """Obtain decompressed chunks for the specified revisions.
1499 """Obtain decompressed chunks for the specified revisions.
1500
1500
1501 Accepts an iterable of numeric revisions that are assumed to be in
1501 Accepts an iterable of numeric revisions that are assumed to be in
1502 ascending order. Also accepts an optional already-open file handle
1502 ascending order. Also accepts an optional already-open file handle
1503 to be used for reading. If used, the seek position of the file will
1503 to be used for reading. If used, the seek position of the file will
1504 not be preserved.
1504 not be preserved.
1505
1505
1506 This function is similar to calling ``self._chunk()`` multiple times,
1506 This function is similar to calling ``self._chunk()`` multiple times,
1507 but is faster.
1507 but is faster.
1508
1508
1509 Returns a list with decompressed data for each requested revision.
1509 Returns a list with decompressed data for each requested revision.
1510 """
1510 """
1511 if not revs:
1511 if not revs:
1512 return []
1512 return []
1513 start = self.start
1513 start = self.start
1514 length = self.length
1514 length = self.length
1515 inline = self._inline
1515 inline = self._inline
1516 iosize = self._io.size
1516 iosize = self._io.size
1517 buffer = util.buffer
1517 buffer = util.buffer
1518
1518
1519 l = []
1519 l = []
1520 ladd = l.append
1520 ladd = l.append
1521
1521
1522 if not self._withsparseread:
1522 if not self._withsparseread:
1523 slicedchunks = (revs,)
1523 slicedchunks = (revs,)
1524 else:
1524 else:
1525 slicedchunks = deltautil.slicechunk(self, revs,
1525 slicedchunks = deltautil.slicechunk(self, revs,
1526 targetsize=targetsize)
1526 targetsize=targetsize)
1527
1527
1528 for revschunk in slicedchunks:
1528 for revschunk in slicedchunks:
1529 firstrev = revschunk[0]
1529 firstrev = revschunk[0]
1530 # Skip trailing revisions with empty diff
1530 # Skip trailing revisions with empty diff
1531 for lastrev in revschunk[::-1]:
1531 for lastrev in revschunk[::-1]:
1532 if length(lastrev) != 0:
1532 if length(lastrev) != 0:
1533 break
1533 break
1534
1534
1535 try:
1535 try:
1536 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1536 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1537 except OverflowError:
1537 except OverflowError:
1538 # issue4215 - we can't cache a run of chunks greater than
1538 # issue4215 - we can't cache a run of chunks greater than
1539 # 2G on Windows
1539 # 2G on Windows
1540 return [self._chunk(rev, df=df) for rev in revschunk]
1540 return [self._chunk(rev, df=df) for rev in revschunk]
1541
1541
1542 decomp = self.decompress
1542 decomp = self.decompress
1543 for rev in revschunk:
1543 for rev in revschunk:
1544 chunkstart = start(rev)
1544 chunkstart = start(rev)
1545 if inline:
1545 if inline:
1546 chunkstart += (rev + 1) * iosize
1546 chunkstart += (rev + 1) * iosize
1547 chunklength = length(rev)
1547 chunklength = length(rev)
1548 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1548 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1549
1549
1550 return l
1550 return l
1551
1551
1552 def _chunkclear(self):
1552 def _chunkclear(self):
1553 """Clear the raw chunk cache."""
1553 """Clear the raw chunk cache."""
1554 self._chunkcache = (0, '')
1554 self._chunkcache = (0, '')
1555
1555
1556 def deltaparent(self, rev):
1556 def deltaparent(self, rev):
1557 """return deltaparent of the given revision"""
1557 """return deltaparent of the given revision"""
1558 base = self.index[rev][3]
1558 base = self.index[rev][3]
1559 if base == rev:
1559 if base == rev:
1560 return nullrev
1560 return nullrev
1561 elif self._generaldelta:
1561 elif self._generaldelta:
1562 return base
1562 return base
1563 else:
1563 else:
1564 return rev - 1
1564 return rev - 1
1565
1565
1566 def issnapshot(self, rev):
1566 def issnapshot(self, rev):
1567 """tells whether rev is a snapshot
1567 """tells whether rev is a snapshot
1568 """
1568 """
1569 if not self._sparserevlog:
1569 if not self._sparserevlog:
1570 return self.deltaparent(rev) == nullrev
1570 return self.deltaparent(rev) == nullrev
1571 elif util.safehasattr(self.index, 'issnapshot'):
1571 elif util.safehasattr(self.index, 'issnapshot'):
1572 # directly assign the method to cache the testing and access
1572 # directly assign the method to cache the testing and access
1573 self.issnapshot = self.index.issnapshot
1573 self.issnapshot = self.index.issnapshot
1574 return self.issnapshot(rev)
1574 return self.issnapshot(rev)
1575 if rev == nullrev:
1575 if rev == nullrev:
1576 return True
1576 return True
1577 entry = self.index[rev]
1577 entry = self.index[rev]
1578 base = entry[3]
1578 base = entry[3]
1579 if base == rev:
1579 if base == rev:
1580 return True
1580 return True
1581 if base == nullrev:
1581 if base == nullrev:
1582 return True
1582 return True
1583 p1 = entry[5]
1583 p1 = entry[5]
1584 p2 = entry[6]
1584 p2 = entry[6]
1585 if base == p1 or base == p2:
1585 if base == p1 or base == p2:
1586 return False
1586 return False
1587 return self.issnapshot(base)
1587 return self.issnapshot(base)
1588
1588
1589 def snapshotdepth(self, rev):
1589 def snapshotdepth(self, rev):
1590 """number of snapshot in the chain before this one"""
1590 """number of snapshot in the chain before this one"""
1591 if not self.issnapshot(rev):
1591 if not self.issnapshot(rev):
1592 raise error.ProgrammingError('revision %d not a snapshot')
1592 raise error.ProgrammingError('revision %d not a snapshot')
1593 return len(self._deltachain(rev)[0]) - 1
1593 return len(self._deltachain(rev)[0]) - 1
1594
1594
1595 def revdiff(self, rev1, rev2):
1595 def revdiff(self, rev1, rev2):
1596 """return or calculate a delta between two revisions
1596 """return or calculate a delta between two revisions
1597
1597
1598 The delta calculated is in binary form and is intended to be written to
1598 The delta calculated is in binary form and is intended to be written to
1599 revlog data directly. So this function needs raw revision data.
1599 revlog data directly. So this function needs raw revision data.
1600 """
1600 """
1601 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1601 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1602 return bytes(self._chunk(rev2))
1602 return bytes(self._chunk(rev2))
1603
1603
1604 return mdiff.textdiff(self.rawdata(rev1),
1604 return mdiff.textdiff(self.rawdata(rev1),
1605 self.rawdata(rev2))
1605 self.rawdata(rev2))
1606
1606
1607 def _processflags(self, text, flags, operation, raw=False):
1607 def _processflags(self, text, flags, operation, raw=False):
1608 """deprecated entry point to access flag processors"""
1608 """deprecated entry point to access flag processors"""
1609 msg = ('_processflag(...) use the specialized variant')
1609 msg = ('_processflag(...) use the specialized variant')
1610 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1610 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1611 if raw:
1611 if raw:
1612 return text, flagutil.processflagsraw(self, text, flags)
1612 return text, flagutil.processflagsraw(self, text, flags)
1613 elif operation == 'read':
1613 elif operation == 'read':
1614 return flagutil.processflagsread(self, text, flags)
1614 return flagutil.processflagsread(self, text, flags)
1615 else: # write operation
1615 else: # write operation
1616 return flagutil.processflagswrite(self, text, flags)
1616 return flagutil.processflagswrite(self, text, flags)
1617
1617
1618 def revision(self, nodeorrev, _df=None, raw=False):
1618 def revision(self, nodeorrev, _df=None, raw=False):
1619 """return an uncompressed revision of a given node or revision
1619 """return an uncompressed revision of a given node or revision
1620 number.
1620 number.
1621
1621
1622 _df - an existing file handle to read from. (internal-only)
1622 _df - an existing file handle to read from. (internal-only)
1623 raw - an optional argument specifying if the revision data is to be
1623 raw - an optional argument specifying if the revision data is to be
1624 treated as raw data when applying flag transforms. 'raw' should be set
1624 treated as raw data when applying flag transforms. 'raw' should be set
1625 to True when generating changegroups or in debug commands.
1625 to True when generating changegroups or in debug commands.
1626 """
1626 """
1627 if raw:
1627 if raw:
1628 msg = ('revlog.revision(..., raw=True) is deprecated, '
1628 msg = ('revlog.revision(..., raw=True) is deprecated, '
1629 'use revlog.rawdata(...)')
1629 'use revlog.rawdata(...)')
1630 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1630 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1631 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1631 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1632
1632
1633 def sidedata(self, nodeorrev, _df=None):
1633 def sidedata(self, nodeorrev, _df=None):
1634 """a map of extra data related to the changeset but not part of the hash
1634 """a map of extra data related to the changeset but not part of the hash
1635
1635
1636 This function currently return a dictionary. However, more advanced
1636 This function currently return a dictionary. However, more advanced
1637 mapping object will likely be used in the future for a more
1637 mapping object will likely be used in the future for a more
1638 efficient/lazy code.
1638 efficient/lazy code.
1639 """
1639 """
1640 return self._revisiondata(nodeorrev, _df)[1]
1640 return self._revisiondata(nodeorrev, _df)[1]
1641
1641
1642 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1642 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1643 # deal with <nodeorrev> argument type
1643 # deal with <nodeorrev> argument type
1644 if isinstance(nodeorrev, int):
1644 if isinstance(nodeorrev, int):
1645 rev = nodeorrev
1645 rev = nodeorrev
1646 node = self.node(rev)
1646 node = self.node(rev)
1647 else:
1647 else:
1648 node = nodeorrev
1648 node = nodeorrev
1649 rev = None
1649 rev = None
1650
1650
1651 # fast path the special `nullid` rev
1651 # fast path the special `nullid` rev
1652 if node == nullid:
1652 if node == nullid:
1653 return "", {}
1653 return "", {}
1654
1654
1655 # The text as stored inside the revlog. Might be the revision or might
1655 # The text as stored inside the revlog. Might be the revision or might
1656 # need to be processed to retrieve the revision.
1656 # need to be processed to retrieve the revision.
1657 rawtext = None
1657 rawtext = None
1658
1658
1659 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1659 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1660
1660
1661 if raw and validated:
1661 if raw and validated:
1662 # if we don't want to process the raw text and that raw
1662 # if we don't want to process the raw text and that raw
1663 # text is cached, we can exit early.
1663 # text is cached, we can exit early.
1664 return rawtext, {}
1664 return rawtext, {}
1665 if rev is None:
1665 if rev is None:
1666 rev = self.rev(node)
1666 rev = self.rev(node)
1667 # the revlog's flag for this revision
1667 # the revlog's flag for this revision
1668 # (usually alter its state or content)
1668 # (usually alter its state or content)
1669 flags = self.flags(rev)
1669 flags = self.flags(rev)
1670
1670
1671 if validated and flags == REVIDX_DEFAULT_FLAGS:
1671 if validated and flags == REVIDX_DEFAULT_FLAGS:
1672 # no extra flags set, no flag processor runs, text = rawtext
1672 # no extra flags set, no flag processor runs, text = rawtext
1673 return rawtext, {}
1673 return rawtext, {}
1674
1674
1675 sidedata = {}
1675 sidedata = {}
1676 if raw:
1676 if raw:
1677 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1677 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1678 text = rawtext
1678 text = rawtext
1679 else:
1679 else:
1680 r = flagutil.processflagsread(self, rawtext, flags)
1680 r = flagutil.processflagsread(self, rawtext, flags)
1681 text, validatehash, sidedata = r
1681 text, validatehash, sidedata = r
1682 if validatehash:
1682 if validatehash:
1683 self.checkhash(text, node, rev=rev)
1683 self.checkhash(text, node, rev=rev)
1684 if not validated:
1684 if not validated:
1685 self._revisioncache = (node, rev, rawtext)
1685 self._revisioncache = (node, rev, rawtext)
1686
1686
1687 return text, sidedata
1687 return text, sidedata
1688
1688
1689 def _rawtext(self, node, rev, _df=None):
1689 def _rawtext(self, node, rev, _df=None):
1690 """return the possibly unvalidated rawtext for a revision
1690 """return the possibly unvalidated rawtext for a revision
1691
1691
1692 returns (rev, rawtext, validated)
1692 returns (rev, rawtext, validated)
1693 """
1693 """
1694
1694
1695 # revision in the cache (could be useful to apply delta)
1695 # revision in the cache (could be useful to apply delta)
1696 cachedrev = None
1696 cachedrev = None
1697 # An intermediate text to apply deltas to
1697 # An intermediate text to apply deltas to
1698 basetext = None
1698 basetext = None
1699
1699
1700 # Check if we have the entry in cache
1700 # Check if we have the entry in cache
1701 # The cache entry looks like (node, rev, rawtext)
1701 # The cache entry looks like (node, rev, rawtext)
1702 if self._revisioncache:
1702 if self._revisioncache:
1703 if self._revisioncache[0] == node:
1703 if self._revisioncache[0] == node:
1704 return (rev, self._revisioncache[2], True)
1704 return (rev, self._revisioncache[2], True)
1705 cachedrev = self._revisioncache[1]
1705 cachedrev = self._revisioncache[1]
1706
1706
1707 if rev is None:
1707 if rev is None:
1708 rev = self.rev(node)
1708 rev = self.rev(node)
1709
1709
1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1711 if stopped:
1711 if stopped:
1712 basetext = self._revisioncache[2]
1712 basetext = self._revisioncache[2]
1713
1713
1714 # drop cache to save memory, the caller is expected to
1714 # drop cache to save memory, the caller is expected to
1715 # update self._revisioncache after validating the text
1715 # update self._revisioncache after validating the text
1716 self._revisioncache = None
1716 self._revisioncache = None
1717
1717
1718 targetsize = None
1718 targetsize = None
1719 rawsize = self.index[rev][2]
1719 rawsize = self.index[rev][2]
1720 if 0 <= rawsize:
1720 if 0 <= rawsize:
1721 targetsize = 4 * rawsize
1721 targetsize = 4 * rawsize
1722
1722
1723 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1723 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1724 if basetext is None:
1724 if basetext is None:
1725 basetext = bytes(bins[0])
1725 basetext = bytes(bins[0])
1726 bins = bins[1:]
1726 bins = bins[1:]
1727
1727
1728 rawtext = mdiff.patches(basetext, bins)
1728 rawtext = mdiff.patches(basetext, bins)
1729 del basetext # let us have a chance to free memory early
1729 del basetext # let us have a chance to free memory early
1730 return (rev, rawtext, False)
1730 return (rev, rawtext, False)
1731
1731
1732 def rawdata(self, nodeorrev, _df=None):
1732 def rawdata(self, nodeorrev, _df=None):
1733 """return an uncompressed raw data of a given node or revision number.
1733 """return an uncompressed raw data of a given node or revision number.
1734
1734
1735 _df - an existing file handle to read from. (internal-only)
1735 _df - an existing file handle to read from. (internal-only)
1736 """
1736 """
1737 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1737 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1738
1738
1739 def hash(self, text, p1, p2):
1739 def hash(self, text, p1, p2):
1740 """Compute a node hash.
1740 """Compute a node hash.
1741
1741
1742 Available as a function so that subclasses can replace the hash
1742 Available as a function so that subclasses can replace the hash
1743 as needed.
1743 as needed.
1744 """
1744 """
1745 return storageutil.hashrevisionsha1(text, p1, p2)
1745 return storageutil.hashrevisionsha1(text, p1, p2)
1746
1746
1747 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1747 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1748 """Check node hash integrity.
1748 """Check node hash integrity.
1749
1749
1750 Available as a function so that subclasses can extend hash mismatch
1750 Available as a function so that subclasses can extend hash mismatch
1751 behaviors as needed.
1751 behaviors as needed.
1752 """
1752 """
1753 try:
1753 try:
1754 if p1 is None and p2 is None:
1754 if p1 is None and p2 is None:
1755 p1, p2 = self.parents(node)
1755 p1, p2 = self.parents(node)
1756 if node != self.hash(text, p1, p2):
1756 if node != self.hash(text, p1, p2):
1757 # Clear the revision cache on hash failure. The revision cache
1757 # Clear the revision cache on hash failure. The revision cache
1758 # only stores the raw revision and clearing the cache does have
1758 # only stores the raw revision and clearing the cache does have
1759 # the side-effect that we won't have a cache hit when the raw
1759 # the side-effect that we won't have a cache hit when the raw
1760 # revision data is accessed. But this case should be rare and
1760 # revision data is accessed. But this case should be rare and
1761 # it is extra work to teach the cache about the hash
1761 # it is extra work to teach the cache about the hash
1762 # verification state.
1762 # verification state.
1763 if self._revisioncache and self._revisioncache[0] == node:
1763 if self._revisioncache and self._revisioncache[0] == node:
1764 self._revisioncache = None
1764 self._revisioncache = None
1765
1765
1766 revornode = rev
1766 revornode = rev
1767 if revornode is None:
1767 if revornode is None:
1768 revornode = templatefilters.short(hex(node))
1768 revornode = templatefilters.short(hex(node))
1769 raise error.RevlogError(_("integrity check failed on %s:%s")
1769 raise error.RevlogError(_("integrity check failed on %s:%s")
1770 % (self.indexfile, pycompat.bytestr(revornode)))
1770 % (self.indexfile, pycompat.bytestr(revornode)))
1771 except error.RevlogError:
1771 except error.RevlogError:
1772 if self._censorable and storageutil.iscensoredtext(text):
1772 if self._censorable and storageutil.iscensoredtext(text):
1773 raise error.CensoredNodeError(self.indexfile, node, text)
1773 raise error.CensoredNodeError(self.indexfile, node, text)
1774 raise
1774 raise
1775
1775
1776 def _enforceinlinesize(self, tr, fp=None):
1776 def _enforceinlinesize(self, tr, fp=None):
1777 """Check if the revlog is too big for inline and convert if so.
1777 """Check if the revlog is too big for inline and convert if so.
1778
1778
1779 This should be called after revisions are added to the revlog. If the
1779 This should be called after revisions are added to the revlog. If the
1780 revlog has grown too large to be an inline revlog, it will convert it
1780 revlog has grown too large to be an inline revlog, it will convert it
1781 to use multiple index and data files.
1781 to use multiple index and data files.
1782 """
1782 """
1783 tiprev = len(self) - 1
1783 tiprev = len(self) - 1
1784 if (not self._inline or
1784 if (not self._inline or
1785 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1785 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1786 return
1786 return
1787
1787
1788 trinfo = tr.find(self.indexfile)
1788 trinfo = tr.find(self.indexfile)
1789 if trinfo is None:
1789 if trinfo is None:
1790 raise error.RevlogError(_("%s not found in the transaction")
1790 raise error.RevlogError(_("%s not found in the transaction")
1791 % self.indexfile)
1791 % self.indexfile)
1792
1792
1793 trindex = trinfo[2]
1793 trindex = trinfo[2]
1794 if trindex is not None:
1794 if trindex is not None:
1795 dataoff = self.start(trindex)
1795 dataoff = self.start(trindex)
1796 else:
1796 else:
1797 # revlog was stripped at start of transaction, use all leftover data
1797 # revlog was stripped at start of transaction, use all leftover data
1798 trindex = len(self) - 1
1798 trindex = len(self) - 1
1799 dataoff = self.end(tiprev)
1799 dataoff = self.end(tiprev)
1800
1800
1801 tr.add(self.datafile, dataoff)
1801 tr.add(self.datafile, dataoff)
1802
1802
1803 if fp:
1803 if fp:
1804 fp.flush()
1804 fp.flush()
1805 fp.close()
1805 fp.close()
1806 # We can't use the cached file handle after close(). So prevent
1806 # We can't use the cached file handle after close(). So prevent
1807 # its usage.
1807 # its usage.
1808 self._writinghandles = None
1808 self._writinghandles = None
1809
1809
1810 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1810 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1811 for r in self:
1811 for r in self:
1812 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1812 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1813
1813
1814 with self._indexfp('w') as fp:
1814 with self._indexfp('w') as fp:
1815 self.version &= ~FLAG_INLINE_DATA
1815 self.version &= ~FLAG_INLINE_DATA
1816 self._inline = False
1816 self._inline = False
1817 io = self._io
1817 io = self._io
1818 for i in self:
1818 for i in self:
1819 e = io.packentry(self.index[i], self.node, self.version, i)
1819 e = io.packentry(self.index[i], self.node, self.version, i)
1820 fp.write(e)
1820 fp.write(e)
1821
1821
1822 # the temp file replace the real index when we exit the context
1822 # the temp file replace the real index when we exit the context
1823 # manager
1823 # manager
1824
1824
1825 tr.replace(self.indexfile, trindex * self._io.size)
1825 tr.replace(self.indexfile, trindex * self._io.size)
1826 self._chunkclear()
1826 self._chunkclear()
1827
1827
1828 def _nodeduplicatecallback(self, transaction, node):
1828 def _nodeduplicatecallback(self, transaction, node):
1829 """called when trying to add a node already stored.
1829 """called when trying to add a node already stored.
1830 """
1830 """
1831
1831
1832 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1832 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1833 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1833 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1834 sidedata=None):
1834 sidedata=None):
1835 """add a revision to the log
1835 """add a revision to the log
1836
1836
1837 text - the revision data to add
1837 text - the revision data to add
1838 transaction - the transaction object used for rollback
1838 transaction - the transaction object used for rollback
1839 link - the linkrev data to add
1839 link - the linkrev data to add
1840 p1, p2 - the parent nodeids of the revision
1840 p1, p2 - the parent nodeids of the revision
1841 cachedelta - an optional precomputed delta
1841 cachedelta - an optional precomputed delta
1842 node - nodeid of revision; typically node is not specified, and it is
1842 node - nodeid of revision; typically node is not specified, and it is
1843 computed by default as hash(text, p1, p2), however subclasses might
1843 computed by default as hash(text, p1, p2), however subclasses might
1844 use different hashing method (and override checkhash() in such case)
1844 use different hashing method (and override checkhash() in such case)
1845 flags - the known flags to set on the revision
1845 flags - the known flags to set on the revision
1846 deltacomputer - an optional deltacomputer instance shared between
1846 deltacomputer - an optional deltacomputer instance shared between
1847 multiple calls
1847 multiple calls
1848 """
1848 """
1849 if link == nullrev:
1849 if link == nullrev:
1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1851 % self.indexfile)
1851 % self.indexfile)
1852
1852
1853 if sidedata is None:
1853 if sidedata is None:
1854 sidedata = {}
1854 sidedata = {}
1855
1855
1856 if flags:
1856 if flags:
1857 node = node or self.hash(text, p1, p2)
1857 node = node or self.hash(text, p1, p2)
1858
1858
1859 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1859 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1860 sidedata=sidedata)
1860 sidedata=sidedata)
1861
1861
1862 # If the flag processor modifies the revision data, ignore any provided
1862 # If the flag processor modifies the revision data, ignore any provided
1863 # cachedelta.
1863 # cachedelta.
1864 if rawtext != text:
1864 if rawtext != text:
1865 cachedelta = None
1865 cachedelta = None
1866
1866
1867 if len(rawtext) > _maxentrysize:
1867 if len(rawtext) > _maxentrysize:
1868 raise error.RevlogError(
1868 raise error.RevlogError(
1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1870 % (self.indexfile, len(rawtext)))
1870 % (self.indexfile, len(rawtext)))
1871
1871
1872 node = node or self.hash(rawtext, p1, p2)
1872 node = node or self.hash(rawtext, p1, p2)
1873 if node in self.nodemap:
1873 if node in self.nodemap:
1874 return node
1874 return node
1875
1875
1876 if validatehash:
1876 if validatehash:
1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1878
1878
1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1880 flags, cachedelta=cachedelta,
1880 flags, cachedelta=cachedelta,
1881 deltacomputer=deltacomputer)
1881 deltacomputer=deltacomputer)
1882
1882
1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1884 cachedelta=None, deltacomputer=None):
1884 cachedelta=None, deltacomputer=None):
1885 """add a raw revision with known flags, node and parents
1885 """add a raw revision with known flags, node and parents
1886 useful when reusing a revision not stored in this revlog (ex: received
1886 useful when reusing a revision not stored in this revlog (ex: received
1887 over wire, or read from an external bundle).
1887 over wire, or read from an external bundle).
1888 """
1888 """
1889 dfh = None
1889 dfh = None
1890 if not self._inline:
1890 if not self._inline:
1891 dfh = self._datafp("a+")
1891 dfh = self._datafp("a+")
1892 ifh = self._indexfp("a+")
1892 ifh = self._indexfp("a+")
1893 try:
1893 try:
1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1895 flags, cachedelta, ifh, dfh,
1895 flags, cachedelta, ifh, dfh,
1896 deltacomputer=deltacomputer)
1896 deltacomputer=deltacomputer)
1897 finally:
1897 finally:
1898 if dfh:
1898 if dfh:
1899 dfh.close()
1899 dfh.close()
1900 ifh.close()
1900 ifh.close()
1901
1901
1902 def compress(self, data):
1902 def compress(self, data):
1903 """Generate a possibly-compressed representation of data."""
1903 """Generate a possibly-compressed representation of data."""
1904 if not data:
1904 if not data:
1905 return '', data
1905 return '', data
1906
1906
1907 compressed = self._compressor.compress(data)
1907 compressed = self._compressor.compress(data)
1908
1908
1909 if compressed:
1909 if compressed:
1910 # The revlog compressor added the header in the returned data.
1910 # The revlog compressor added the header in the returned data.
1911 return '', compressed
1911 return '', compressed
1912
1912
1913 if data[0:1] == '\0':
1913 if data[0:1] == '\0':
1914 return '', data
1914 return '', data
1915 return 'u', data
1915 return 'u', data
1916
1916
1917 def decompress(self, data):
1917 def decompress(self, data):
1918 """Decompress a revlog chunk.
1918 """Decompress a revlog chunk.
1919
1919
1920 The chunk is expected to begin with a header identifying the
1920 The chunk is expected to begin with a header identifying the
1921 format type so it can be routed to an appropriate decompressor.
1921 format type so it can be routed to an appropriate decompressor.
1922 """
1922 """
1923 if not data:
1923 if not data:
1924 return data
1924 return data
1925
1925
1926 # Revlogs are read much more frequently than they are written and many
1926 # Revlogs are read much more frequently than they are written and many
1927 # chunks only take microseconds to decompress, so performance is
1927 # chunks only take microseconds to decompress, so performance is
1928 # important here.
1928 # important here.
1929 #
1929 #
1930 # We can make a few assumptions about revlogs:
1930 # We can make a few assumptions about revlogs:
1931 #
1931 #
1932 # 1) the majority of chunks will be compressed (as opposed to inline
1932 # 1) the majority of chunks will be compressed (as opposed to inline
1933 # raw data).
1933 # raw data).
1934 # 2) decompressing *any* data will likely by at least 10x slower than
1934 # 2) decompressing *any* data will likely by at least 10x slower than
1935 # returning raw inline data.
1935 # returning raw inline data.
1936 # 3) we want to prioritize common and officially supported compression
1936 # 3) we want to prioritize common and officially supported compression
1937 # engines
1937 # engines
1938 #
1938 #
1939 # It follows that we want to optimize for "decompress compressed data
1939 # It follows that we want to optimize for "decompress compressed data
1940 # when encoded with common and officially supported compression engines"
1940 # when encoded with common and officially supported compression engines"
1941 # case over "raw data" and "data encoded by less common or non-official
1941 # case over "raw data" and "data encoded by less common or non-official
1942 # compression engines." That is why we have the inline lookup first
1942 # compression engines." That is why we have the inline lookup first
1943 # followed by the compengines lookup.
1943 # followed by the compengines lookup.
1944 #
1944 #
1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1946 # compressed chunks. And this matters for changelog and manifest reads.
1946 # compressed chunks. And this matters for changelog and manifest reads.
1947 t = data[0:1]
1947 t = data[0:1]
1948
1948
1949 if t == 'x':
1949 if t == 'x':
1950 try:
1950 try:
1951 return _zlibdecompress(data)
1951 return _zlibdecompress(data)
1952 except zlib.error as e:
1952 except zlib.error as e:
1953 raise error.RevlogError(_('revlog decompress error: %s') %
1953 raise error.RevlogError(_('revlog decompress error: %s') %
1954 stringutil.forcebytestr(e))
1954 stringutil.forcebytestr(e))
1955 # '\0' is more common than 'u' so it goes first.
1955 # '\0' is more common than 'u' so it goes first.
1956 elif t == '\0':
1956 elif t == '\0':
1957 return data
1957 return data
1958 elif t == 'u':
1958 elif t == 'u':
1959 return util.buffer(data, 1)
1959 return util.buffer(data, 1)
1960
1960
1961 try:
1961 try:
1962 compressor = self._decompressors[t]
1962 compressor = self._decompressors[t]
1963 except KeyError:
1963 except KeyError:
1964 try:
1964 try:
1965 engine = util.compengines.forrevlogheader(t)
1965 engine = util.compengines.forrevlogheader(t)
1966 compressor = engine.revlogcompressor(self._compengineopts)
1966 compressor = engine.revlogcompressor(self._compengineopts)
1967 self._decompressors[t] = compressor
1967 self._decompressors[t] = compressor
1968 except KeyError:
1968 except KeyError:
1969 raise error.RevlogError(_('unknown compression type %r') % t)
1969 raise error.RevlogError(_('unknown compression type %r') % t)
1970
1970
1971 return compressor.decompress(data)
1971 return compressor.decompress(data)
1972
1972
1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1974 cachedelta, ifh, dfh, alwayscache=False,
1974 cachedelta, ifh, dfh, alwayscache=False,
1975 deltacomputer=None):
1975 deltacomputer=None):
1976 """internal function to add revisions to the log
1976 """internal function to add revisions to the log
1977
1977
1978 see addrevision for argument descriptions.
1978 see addrevision for argument descriptions.
1979
1979
1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1981
1981
1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1983 be used.
1983 be used.
1984
1984
1985 invariants:
1985 invariants:
1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1987 if both are set, they must correspond to each other.
1987 if both are set, they must correspond to each other.
1988 """
1988 """
1989 if node == nullid:
1989 if node == nullid:
1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1991 self.indexfile)
1991 self.indexfile)
1992 if node == wdirid or node in wdirfilenodeids:
1992 if node == wdirid or node in wdirfilenodeids:
1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1994 self.indexfile)
1994 self.indexfile)
1995
1995
1996 if self._inline:
1996 if self._inline:
1997 fh = ifh
1997 fh = ifh
1998 else:
1998 else:
1999 fh = dfh
1999 fh = dfh
2000
2000
2001 btext = [rawtext]
2001 btext = [rawtext]
2002
2002
2003 curr = len(self)
2003 curr = len(self)
2004 prev = curr - 1
2004 prev = curr - 1
2005 offset = self.end(prev)
2005 offset = self.end(prev)
2006 p1r, p2r = self.rev(p1), self.rev(p2)
2006 p1r, p2r = self.rev(p1), self.rev(p2)
2007
2007
2008 # full versions are inserted when the needed deltas
2008 # full versions are inserted when the needed deltas
2009 # become comparable to the uncompressed text
2009 # become comparable to the uncompressed text
2010 if rawtext is None:
2010 if rawtext is None:
2011 # need rawtext size, before changed by flag processors, which is
2011 # need rawtext size, before changed by flag processors, which is
2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2013 # logic that might remove metadata size.
2013 # logic that might remove metadata size.
2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2015 cachedelta[1])
2015 cachedelta[1])
2016 else:
2016 else:
2017 textlen = len(rawtext)
2017 textlen = len(rawtext)
2018
2018
2019 if deltacomputer is None:
2019 if deltacomputer is None:
2020 deltacomputer = deltautil.deltacomputer(self)
2020 deltacomputer = deltautil.deltacomputer(self)
2021
2021
2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2023
2023
2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2025
2025
2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2027 deltainfo.base, link, p1r, p2r, node)
2027 deltainfo.base, link, p1r, p2r, node)
2028 self.index.append(e)
2028 self.index.append(e)
2029 self.nodemap[node] = curr
2029 self.nodemap[node] = curr
2030
2030
2031 # Reset the pure node cache start lookup offset to account for new
2031 # Reset the pure node cache start lookup offset to account for new
2032 # revision.
2032 # revision.
2033 if self._nodepos is not None:
2033 if self._nodepos is not None:
2034 self._nodepos = curr
2034 self._nodepos = curr
2035
2035
2036 entry = self._io.packentry(e, self.node, self.version, curr)
2036 entry = self._io.packentry(e, self.node, self.version, curr)
2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2038 link, offset)
2038 link, offset)
2039
2039
2040 rawtext = btext[0]
2040 rawtext = btext[0]
2041
2041
2042 if alwayscache and rawtext is None:
2042 if alwayscache and rawtext is None:
2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2044
2044
2045 if type(rawtext) == bytes: # only accept immutable objects
2045 if type(rawtext) == bytes: # only accept immutable objects
2046 self._revisioncache = (node, curr, rawtext)
2046 self._revisioncache = (node, curr, rawtext)
2047 self._chainbasecache[curr] = deltainfo.chainbase
2047 self._chainbasecache[curr] = deltainfo.chainbase
2048 return node
2048 return node
2049
2049
2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2051 # Files opened in a+ mode have inconsistent behavior on various
2051 # Files opened in a+ mode have inconsistent behavior on various
2052 # platforms. Windows requires that a file positioning call be made
2052 # platforms. Windows requires that a file positioning call be made
2053 # when the file handle transitions between reads and writes. See
2053 # when the file handle transitions between reads and writes. See
2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2055 # platforms, Python or the platform itself can be buggy. Some versions
2055 # platforms, Python or the platform itself can be buggy. Some versions
2056 # of Solaris have been observed to not append at the end of the file
2056 # of Solaris have been observed to not append at the end of the file
2057 # if the file was seeked to before the end. See issue4943 for more.
2057 # if the file was seeked to before the end. See issue4943 for more.
2058 #
2058 #
2059 # We work around this issue by inserting a seek() before writing.
2059 # We work around this issue by inserting a seek() before writing.
2060 # Note: This is likely not necessary on Python 3. However, because
2060 # Note: This is likely not necessary on Python 3. However, because
2061 # the file handle is reused for reads and may be seeked there, we need
2061 # the file handle is reused for reads and may be seeked there, we need
2062 # to be careful before changing this.
2062 # to be careful before changing this.
2063 ifh.seek(0, os.SEEK_END)
2063 ifh.seek(0, os.SEEK_END)
2064 if dfh:
2064 if dfh:
2065 dfh.seek(0, os.SEEK_END)
2065 dfh.seek(0, os.SEEK_END)
2066
2066
2067 curr = len(self) - 1
2067 curr = len(self) - 1
2068 if not self._inline:
2068 if not self._inline:
2069 transaction.add(self.datafile, offset)
2069 transaction.add(self.datafile, offset)
2070 transaction.add(self.indexfile, curr * len(entry))
2070 transaction.add(self.indexfile, curr * len(entry))
2071 if data[0]:
2071 if data[0]:
2072 dfh.write(data[0])
2072 dfh.write(data[0])
2073 dfh.write(data[1])
2073 dfh.write(data[1])
2074 ifh.write(entry)
2074 ifh.write(entry)
2075 else:
2075 else:
2076 offset += curr * self._io.size
2076 offset += curr * self._io.size
2077 transaction.add(self.indexfile, offset, curr)
2077 transaction.add(self.indexfile, offset, curr)
2078 ifh.write(entry)
2078 ifh.write(entry)
2079 ifh.write(data[0])
2079 ifh.write(data[0])
2080 ifh.write(data[1])
2080 ifh.write(data[1])
2081 self._enforceinlinesize(transaction, ifh)
2081 self._enforceinlinesize(transaction, ifh)
2082
2082
2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2084 """
2084 """
2085 add a delta group
2085 add a delta group
2086
2086
2087 given a set of deltas, add them to the revision log. the
2087 given a set of deltas, add them to the revision log. the
2088 first delta is against its parent, which should be in our
2088 first delta is against its parent, which should be in our
2089 log, the rest are against the previous delta.
2089 log, the rest are against the previous delta.
2090
2090
2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2092 this revlog and the node that was added.
2092 this revlog and the node that was added.
2093 """
2093 """
2094
2094
2095 if self._writinghandles:
2095 if self._writinghandles:
2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2097
2097
2098 nodes = []
2098 nodes = []
2099
2099
2100 r = len(self)
2100 r = len(self)
2101 end = 0
2101 end = 0
2102 if r:
2102 if r:
2103 end = self.end(r - 1)
2103 end = self.end(r - 1)
2104 ifh = self._indexfp("a+")
2104 ifh = self._indexfp("a+")
2105 isize = r * self._io.size
2105 isize = r * self._io.size
2106 if self._inline:
2106 if self._inline:
2107 transaction.add(self.indexfile, end + isize, r)
2107 transaction.add(self.indexfile, end + isize, r)
2108 dfh = None
2108 dfh = None
2109 else:
2109 else:
2110 transaction.add(self.indexfile, isize, r)
2110 transaction.add(self.indexfile, isize, r)
2111 transaction.add(self.datafile, end)
2111 transaction.add(self.datafile, end)
2112 dfh = self._datafp("a+")
2112 dfh = self._datafp("a+")
2113 def flush():
2113 def flush():
2114 if dfh:
2114 if dfh:
2115 dfh.flush()
2115 dfh.flush()
2116 ifh.flush()
2116 ifh.flush()
2117
2117
2118 self._writinghandles = (ifh, dfh)
2118 self._writinghandles = (ifh, dfh)
2119
2119
2120 try:
2120 try:
2121 deltacomputer = deltautil.deltacomputer(self)
2121 deltacomputer = deltautil.deltacomputer(self)
2122 # loop through our set of deltas
2122 # loop through our set of deltas
2123 for data in deltas:
2123 for data in deltas:
2124 node, p1, p2, linknode, deltabase, delta, flags = data
2124 node, p1, p2, linknode, deltabase, delta, flags = data
2125 link = linkmapper(linknode)
2125 link = linkmapper(linknode)
2126 flags = flags or REVIDX_DEFAULT_FLAGS
2126 flags = flags or REVIDX_DEFAULT_FLAGS
2127
2127
2128 nodes.append(node)
2128 nodes.append(node)
2129
2129
2130 if node in self.nodemap:
2130 if node in self.nodemap:
2131 self._nodeduplicatecallback(transaction, node)
2131 self._nodeduplicatecallback(transaction, node)
2132 # this can happen if two branches make the same change
2132 # this can happen if two branches make the same change
2133 continue
2133 continue
2134
2134
2135 for p in (p1, p2):
2135 for p in (p1, p2):
2136 if p not in self.nodemap:
2136 if p not in self.nodemap:
2137 raise error.LookupError(p, self.indexfile,
2137 raise error.LookupError(p, self.indexfile,
2138 _('unknown parent'))
2138 _('unknown parent'))
2139
2139
2140 if deltabase not in self.nodemap:
2140 if deltabase not in self.nodemap:
2141 raise error.LookupError(deltabase, self.indexfile,
2141 raise error.LookupError(deltabase, self.indexfile,
2142 _('unknown delta base'))
2142 _('unknown delta base'))
2143
2143
2144 baserev = self.rev(deltabase)
2144 baserev = self.rev(deltabase)
2145
2145
2146 if baserev != nullrev and self.iscensored(baserev):
2146 if baserev != nullrev and self.iscensored(baserev):
2147 # if base is censored, delta must be full replacement in a
2147 # if base is censored, delta must be full replacement in a
2148 # single patch operation
2148 # single patch operation
2149 hlen = struct.calcsize(">lll")
2149 hlen = struct.calcsize(">lll")
2150 oldlen = self.rawsize(baserev)
2150 oldlen = self.rawsize(baserev)
2151 newlen = len(delta) - hlen
2151 newlen = len(delta) - hlen
2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2153 raise error.CensoredBaseError(self.indexfile,
2153 raise error.CensoredBaseError(self.indexfile,
2154 self.node(baserev))
2154 self.node(baserev))
2155
2155
2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2157 flags |= REVIDX_ISCENSORED
2157 flags |= REVIDX_ISCENSORED
2158
2158
2159 # We assume consumers of addrevisioncb will want to retrieve
2159 # We assume consumers of addrevisioncb will want to retrieve
2160 # the added revision, which will require a call to
2160 # the added revision, which will require a call to
2161 # revision(). revision() will fast path if there is a cache
2161 # revision(). revision() will fast path if there is a cache
2162 # hit. So, we tell _addrevision() to always cache in this case.
2162 # hit. So, we tell _addrevision() to always cache in this case.
2163 # We're only using addgroup() in the context of changegroup
2163 # We're only using addgroup() in the context of changegroup
2164 # generation so the revision data can always be handled as raw
2164 # generation so the revision data can always be handled as raw
2165 # by the flagprocessor.
2165 # by the flagprocessor.
2166 self._addrevision(node, None, transaction, link,
2166 self._addrevision(node, None, transaction, link,
2167 p1, p2, flags, (baserev, delta),
2167 p1, p2, flags, (baserev, delta),
2168 ifh, dfh,
2168 ifh, dfh,
2169 alwayscache=bool(addrevisioncb),
2169 alwayscache=bool(addrevisioncb),
2170 deltacomputer=deltacomputer)
2170 deltacomputer=deltacomputer)
2171
2171
2172 if addrevisioncb:
2172 if addrevisioncb:
2173 addrevisioncb(self, node)
2173 addrevisioncb(self, node)
2174
2174
2175 if not dfh and not self._inline:
2175 if not dfh and not self._inline:
2176 # addrevision switched from inline to conventional
2176 # addrevision switched from inline to conventional
2177 # reopen the index
2177 # reopen the index
2178 ifh.close()
2178 ifh.close()
2179 dfh = self._datafp("a+")
2179 dfh = self._datafp("a+")
2180 ifh = self._indexfp("a+")
2180 ifh = self._indexfp("a+")
2181 self._writinghandles = (ifh, dfh)
2181 self._writinghandles = (ifh, dfh)
2182 finally:
2182 finally:
2183 self._writinghandles = None
2183 self._writinghandles = None
2184
2184
2185 if dfh:
2185 if dfh:
2186 dfh.close()
2186 dfh.close()
2187 ifh.close()
2187 ifh.close()
2188
2188
2189 return nodes
2189 return nodes
2190
2190
2191 def iscensored(self, rev):
2191 def iscensored(self, rev):
2192 """Check if a file revision is censored."""
2192 """Check if a file revision is censored."""
2193 if not self._censorable:
2193 if not self._censorable:
2194 return False
2194 return False
2195
2195
2196 return self.flags(rev) & REVIDX_ISCENSORED
2196 return self.flags(rev) & REVIDX_ISCENSORED
2197
2197
2198 def _peek_iscensored(self, baserev, delta, flush):
2198 def _peek_iscensored(self, baserev, delta, flush):
2199 """Quickly check if a delta produces a censored revision."""
2199 """Quickly check if a delta produces a censored revision."""
2200 if not self._censorable:
2200 if not self._censorable:
2201 return False
2201 return False
2202
2202
2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2204
2204
2205 def getstrippoint(self, minlink):
2205 def getstrippoint(self, minlink):
2206 """find the minimum rev that must be stripped to strip the linkrev
2206 """find the minimum rev that must be stripped to strip the linkrev
2207
2207
2208 Returns a tuple containing the minimum rev and a set of all revs that
2208 Returns a tuple containing the minimum rev and a set of all revs that
2209 have linkrevs that will be broken by this strip.
2209 have linkrevs that will be broken by this strip.
2210 """
2210 """
2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2212 self.headrevs(),
2212 self.headrevs(),
2213 self.linkrev, self.parentrevs)
2213 self.linkrev, self.parentrevs)
2214
2214
2215 def strip(self, minlink, transaction):
2215 def strip(self, minlink, transaction):
2216 """truncate the revlog on the first revision with a linkrev >= minlink
2216 """truncate the revlog on the first revision with a linkrev >= minlink
2217
2217
2218 This function is called when we're stripping revision minlink and
2218 This function is called when we're stripping revision minlink and
2219 its descendants from the repository.
2219 its descendants from the repository.
2220
2220
2221 We have to remove all revisions with linkrev >= minlink, because
2221 We have to remove all revisions with linkrev >= minlink, because
2222 the equivalent changelog revisions will be renumbered after the
2222 the equivalent changelog revisions will be renumbered after the
2223 strip.
2223 strip.
2224
2224
2225 So we truncate the revlog on the first of these revisions, and
2225 So we truncate the revlog on the first of these revisions, and
2226 trust that the caller has saved the revisions that shouldn't be
2226 trust that the caller has saved the revisions that shouldn't be
2227 removed and that it'll re-add them after this truncation.
2227 removed and that it'll re-add them after this truncation.
2228 """
2228 """
2229 if len(self) == 0:
2229 if len(self) == 0:
2230 return
2230 return
2231
2231
2232 rev, _ = self.getstrippoint(minlink)
2232 rev, _ = self.getstrippoint(minlink)
2233 if rev == len(self):
2233 if rev == len(self):
2234 return
2234 return
2235
2235
2236 # first truncate the files on disk
2236 # first truncate the files on disk
2237 end = self.start(rev)
2237 end = self.start(rev)
2238 if not self._inline:
2238 if not self._inline:
2239 transaction.add(self.datafile, end)
2239 transaction.add(self.datafile, end)
2240 end = rev * self._io.size
2240 end = rev * self._io.size
2241 else:
2241 else:
2242 end += rev * self._io.size
2242 end += rev * self._io.size
2243
2243
2244 transaction.add(self.indexfile, end)
2244 transaction.add(self.indexfile, end)
2245
2245
2246 # then reset internal state in memory to forget those revisions
2246 # then reset internal state in memory to forget those revisions
2247 self._revisioncache = None
2247 self._revisioncache = None
2248 self._chaininfocache = {}
2248 self._chaininfocache = {}
2249 self._chunkclear()
2249 self._chunkclear()
2250 for x in pycompat.xrange(rev, len(self)):
2250 for x in pycompat.xrange(rev, len(self)):
2251 del self.nodemap[self.node(x)]
2251 del self.nodemap[self.node(x)]
2252
2252
2253 del self.index[rev:-1]
2253 del self.index[rev:-1]
2254 self._nodepos = None
2254 self._nodepos = None
2255
2255
2256 def checksize(self):
2256 def checksize(self):
2257 """Check size of index and data files
2257 """Check size of index and data files
2258
2258
2259 return a (dd, di) tuple.
2259 return a (dd, di) tuple.
2260 - dd: extra bytes for the "data" file
2260 - dd: extra bytes for the "data" file
2261 - di: extra bytes for the "index" file
2261 - di: extra bytes for the "index" file
2262
2262
2263 A healthy revlog will return (0, 0).
2263 A healthy revlog will return (0, 0).
2264 """
2264 """
2265 expected = 0
2265 expected = 0
2266 if len(self):
2266 if len(self):
2267 expected = max(0, self.end(len(self) - 1))
2267 expected = max(0, self.end(len(self) - 1))
2268
2268
2269 try:
2269 try:
2270 with self._datafp() as f:
2270 with self._datafp() as f:
2271 f.seek(0, io.SEEK_END)
2271 f.seek(0, io.SEEK_END)
2272 actual = f.tell()
2272 actual = f.tell()
2273 dd = actual - expected
2273 dd = actual - expected
2274 except IOError as inst:
2274 except IOError as inst:
2275 if inst.errno != errno.ENOENT:
2275 if inst.errno != errno.ENOENT:
2276 raise
2276 raise
2277 dd = 0
2277 dd = 0
2278
2278
2279 try:
2279 try:
2280 f = self.opener(self.indexfile)
2280 f = self.opener(self.indexfile)
2281 f.seek(0, io.SEEK_END)
2281 f.seek(0, io.SEEK_END)
2282 actual = f.tell()
2282 actual = f.tell()
2283 f.close()
2283 f.close()
2284 s = self._io.size
2284 s = self._io.size
2285 i = max(0, actual // s)
2285 i = max(0, actual // s)
2286 di = actual - (i * s)
2286 di = actual - (i * s)
2287 if self._inline:
2287 if self._inline:
2288 databytes = 0
2288 databytes = 0
2289 for r in self:
2289 for r in self:
2290 databytes += max(0, self.length(r))
2290 databytes += max(0, self.length(r))
2291 dd = 0
2291 dd = 0
2292 di = actual - len(self) * s - databytes
2292 di = actual - len(self) * s - databytes
2293 except IOError as inst:
2293 except IOError as inst:
2294 if inst.errno != errno.ENOENT:
2294 if inst.errno != errno.ENOENT:
2295 raise
2295 raise
2296 di = 0
2296 di = 0
2297
2297
2298 return (dd, di)
2298 return (dd, di)
2299
2299
2300 def files(self):
2300 def files(self):
2301 res = [self.indexfile]
2301 res = [self.indexfile]
2302 if not self._inline:
2302 if not self._inline:
2303 res.append(self.datafile)
2303 res.append(self.datafile)
2304 return res
2304 return res
2305
2305
2306 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2306 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2307 assumehaveparentrevisions=False,
2307 assumehaveparentrevisions=False,
2308 deltamode=repository.CG_DELTAMODE_STD):
2308 deltamode=repository.CG_DELTAMODE_STD):
2309 if nodesorder not in ('nodes', 'storage', 'linear', None):
2309 if nodesorder not in ('nodes', 'storage', 'linear', None):
2310 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2310 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2311 nodesorder)
2311 nodesorder)
2312
2312
2313 if nodesorder is None and not self._generaldelta:
2313 if nodesorder is None and not self._generaldelta:
2314 nodesorder = 'storage'
2314 nodesorder = 'storage'
2315
2315
2316 if (not self._storedeltachains and
2316 if (not self._storedeltachains and
2317 deltamode != repository.CG_DELTAMODE_PREV):
2317 deltamode != repository.CG_DELTAMODE_PREV):
2318 deltamode = repository.CG_DELTAMODE_FULL
2318 deltamode = repository.CG_DELTAMODE_FULL
2319
2319
2320 return storageutil.emitrevisions(
2320 return storageutil.emitrevisions(
2321 self, nodes, nodesorder, revlogrevisiondelta,
2321 self, nodes, nodesorder, revlogrevisiondelta,
2322 deltaparentfn=self.deltaparent,
2322 deltaparentfn=self.deltaparent,
2323 candeltafn=self.candelta,
2323 candeltafn=self.candelta,
2324 rawsizefn=self.rawsize,
2324 rawsizefn=self.rawsize,
2325 revdifffn=self.revdiff,
2325 revdifffn=self.revdiff,
2326 flagsfn=self.flags,
2326 flagsfn=self.flags,
2327 deltamode=deltamode,
2327 deltamode=deltamode,
2328 revisiondata=revisiondata,
2328 revisiondata=revisiondata,
2329 assumehaveparentrevisions=assumehaveparentrevisions)
2329 assumehaveparentrevisions=assumehaveparentrevisions)
2330
2330
2331 DELTAREUSEALWAYS = 'always'
2331 DELTAREUSEALWAYS = 'always'
2332 DELTAREUSESAMEREVS = 'samerevs'
2332 DELTAREUSESAMEREVS = 'samerevs'
2333 DELTAREUSENEVER = 'never'
2333 DELTAREUSENEVER = 'never'
2334
2334
2335 DELTAREUSEFULLADD = 'fulladd'
2335 DELTAREUSEFULLADD = 'fulladd'
2336
2336
2337 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2337 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2338
2338
2339 def clone(self, tr, destrevlog, addrevisioncb=None,
2339 def clone(self, tr, destrevlog, addrevisioncb=None,
2340 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2340 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2341 """Copy this revlog to another, possibly with format changes.
2341 """Copy this revlog to another, possibly with format changes.
2342
2342
2343 The destination revlog will contain the same revisions and nodes.
2343 The destination revlog will contain the same revisions and nodes.
2344 However, it may not be bit-for-bit identical due to e.g. delta encoding
2344 However, it may not be bit-for-bit identical due to e.g. delta encoding
2345 differences.
2345 differences.
2346
2346
2347 The ``deltareuse`` argument control how deltas from the existing revlog
2347 The ``deltareuse`` argument control how deltas from the existing revlog
2348 are preserved in the destination revlog. The argument can have the
2348 are preserved in the destination revlog. The argument can have the
2349 following values:
2349 following values:
2350
2350
2351 DELTAREUSEALWAYS
2351 DELTAREUSEALWAYS
2352 Deltas will always be reused (if possible), even if the destination
2352 Deltas will always be reused (if possible), even if the destination
2353 revlog would not select the same revisions for the delta. This is the
2353 revlog would not select the same revisions for the delta. This is the
2354 fastest mode of operation.
2354 fastest mode of operation.
2355 DELTAREUSESAMEREVS
2355 DELTAREUSESAMEREVS
2356 Deltas will be reused if the destination revlog would pick the same
2356 Deltas will be reused if the destination revlog would pick the same
2357 revisions for the delta. This mode strikes a balance between speed
2357 revisions for the delta. This mode strikes a balance between speed
2358 and optimization.
2358 and optimization.
2359 DELTAREUSENEVER
2359 DELTAREUSENEVER
2360 Deltas will never be reused. This is the slowest mode of execution.
2360 Deltas will never be reused. This is the slowest mode of execution.
2361 This mode can be used to recompute deltas (e.g. if the diff/delta
2361 This mode can be used to recompute deltas (e.g. if the diff/delta
2362 algorithm changes).
2362 algorithm changes).
2363
2363
2364 Delta computation can be slow, so the choice of delta reuse policy can
2364 Delta computation can be slow, so the choice of delta reuse policy can
2365 significantly affect run time.
2365 significantly affect run time.
2366
2366
2367 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2367 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2368 two extremes. Deltas will be reused if they are appropriate. But if the
2368 two extremes. Deltas will be reused if they are appropriate. But if the
2369 delta could choose a better revision, it will do so. This means if you
2369 delta could choose a better revision, it will do so. This means if you
2370 are converting a non-generaldelta revlog to a generaldelta revlog,
2370 are converting a non-generaldelta revlog to a generaldelta revlog,
2371 deltas will be recomputed if the delta's parent isn't a parent of the
2371 deltas will be recomputed if the delta's parent isn't a parent of the
2372 revision.
2372 revision.
2373
2373
2374 In addition to the delta policy, the ``forcedeltabothparents``
2374 In addition to the delta policy, the ``forcedeltabothparents``
2375 argument controls whether to force compute deltas against both parents
2375 argument controls whether to force compute deltas against both parents
2376 for merges. By default, the current default is used.
2376 for merges. By default, the current default is used.
2377 """
2377 """
2378 if deltareuse not in self.DELTAREUSEALL:
2378 if deltareuse not in self.DELTAREUSEALL:
2379 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2379 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2380
2380
2381 if len(destrevlog):
2381 if len(destrevlog):
2382 raise ValueError(_('destination revlog is not empty'))
2382 raise ValueError(_('destination revlog is not empty'))
2383
2383
2384 if getattr(self, 'filteredrevs', None):
2384 if getattr(self, 'filteredrevs', None):
2385 raise ValueError(_('source revlog has filtered revisions'))
2385 raise ValueError(_('source revlog has filtered revisions'))
2386 if getattr(destrevlog, 'filteredrevs', None):
2386 if getattr(destrevlog, 'filteredrevs', None):
2387 raise ValueError(_('destination revlog has filtered revisions'))
2387 raise ValueError(_('destination revlog has filtered revisions'))
2388
2388
2389 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2389 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2390 # if possible.
2390 # if possible.
2391 oldlazydelta = destrevlog._lazydelta
2391 oldlazydelta = destrevlog._lazydelta
2392 oldlazydeltabase = destrevlog._lazydeltabase
2392 oldlazydeltabase = destrevlog._lazydeltabase
2393 oldamd = destrevlog._deltabothparents
2393 oldamd = destrevlog._deltabothparents
2394
2394
2395 try:
2395 try:
2396 if deltareuse == self.DELTAREUSEALWAYS:
2396 if deltareuse == self.DELTAREUSEALWAYS:
2397 destrevlog._lazydeltabase = True
2397 destrevlog._lazydeltabase = True
2398 destrevlog._lazydelta = True
2398 destrevlog._lazydelta = True
2399 elif deltareuse == self.DELTAREUSESAMEREVS:
2399 elif deltareuse == self.DELTAREUSESAMEREVS:
2400 destrevlog._lazydeltabase = False
2400 destrevlog._lazydeltabase = False
2401 destrevlog._lazydelta = True
2401 destrevlog._lazydelta = True
2402 elif deltareuse == self.DELTAREUSENEVER:
2402 elif deltareuse == self.DELTAREUSENEVER:
2403 destrevlog._lazydeltabase = False
2403 destrevlog._lazydeltabase = False
2404 destrevlog._lazydelta = False
2404 destrevlog._lazydelta = False
2405
2405
2406 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2406 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2407
2407
2408 deltacomputer = deltautil.deltacomputer(destrevlog)
2408 deltacomputer = deltautil.deltacomputer(destrevlog)
2409 index = self.index
2409 index = self.index
2410 for rev in self:
2410 for rev in self:
2411 entry = index[rev]
2411 entry = index[rev]
2412
2412
2413 # Some classes override linkrev to take filtered revs into
2413 # Some classes override linkrev to take filtered revs into
2414 # account. Use raw entry from index.
2414 # account. Use raw entry from index.
2415 flags = entry[0] & 0xffff
2415 flags = entry[0] & 0xffff
2416 linkrev = entry[4]
2416 linkrev = entry[4]
2417 p1 = index[entry[5]][7]
2417 p1 = index[entry[5]][7]
2418 p2 = index[entry[6]][7]
2418 p2 = index[entry[6]][7]
2419 node = entry[7]
2419 node = entry[7]
2420
2420
2421 # (Possibly) reuse the delta from the revlog if allowed and
2421 # (Possibly) reuse the delta from the revlog if allowed and
2422 # the revlog chunk is a delta.
2422 # the revlog chunk is a delta.
2423 cachedelta = None
2423 cachedelta = None
2424 rawtext = None
2424 rawtext = None
2425 if (deltareuse != self.DELTAREUSEFULLADD
2425 if (deltareuse != self.DELTAREUSEFULLADD
2426 and destrevlog._lazydelta):
2426 and destrevlog._lazydelta):
2427 dp = self.deltaparent(rev)
2427 dp = self.deltaparent(rev)
2428 if dp != nullrev:
2428 if dp != nullrev:
2429 cachedelta = (dp, bytes(self._chunk(rev)))
2429 cachedelta = (dp, bytes(self._chunk(rev)))
2430
2430
2431 if not cachedelta:
2431 if not cachedelta:
2432 rawtext = self.rawdata(rev)
2432 rawtext = self.rawdata(rev)
2433
2433
2434
2434
2435 if deltareuse == self.DELTAREUSEFULLADD:
2435 if deltareuse == self.DELTAREUSEFULLADD:
2436 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2436 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2437 cachedelta=cachedelta,
2437 cachedelta=cachedelta,
2438 node=node, flags=flags,
2438 node=node, flags=flags,
2439 deltacomputer=deltacomputer)
2439 deltacomputer=deltacomputer)
2440 else:
2440 else:
2441 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2441 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2442 checkambig=False)
2442 checkambig=False)
2443 dfh = None
2443 dfh = None
2444 if not destrevlog._inline:
2444 if not destrevlog._inline:
2445 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2445 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2446 try:
2446 try:
2447 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2447 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2448 p2, flags, cachedelta, ifh, dfh,
2448 p2, flags, cachedelta, ifh, dfh,
2449 deltacomputer=deltacomputer)
2449 deltacomputer=deltacomputer)
2450 finally:
2450 finally:
2451 if dfh:
2451 if dfh:
2452 dfh.close()
2452 dfh.close()
2453 ifh.close()
2453 ifh.close()
2454
2454
2455 if addrevisioncb:
2455 if addrevisioncb:
2456 addrevisioncb(self, rev, node)
2456 addrevisioncb(self, rev, node)
2457 finally:
2457 finally:
2458 destrevlog._lazydelta = oldlazydelta
2458 destrevlog._lazydelta = oldlazydelta
2459 destrevlog._lazydeltabase = oldlazydeltabase
2459 destrevlog._lazydeltabase = oldlazydeltabase
2460 destrevlog._deltabothparents = oldamd
2460 destrevlog._deltabothparents = oldamd
2461
2461
2462 def censorrevision(self, tr, censornode, tombstone=b''):
2462 def censorrevision(self, tr, censornode, tombstone=b''):
2463 if (self.version & 0xFFFF) == REVLOGV0:
2463 if (self.version & 0xFFFF) == REVLOGV0:
2464 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2464 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2465 self.version)
2465 self.version)
2466
2466
2467 censorrev = self.rev(censornode)
2467 censorrev = self.rev(censornode)
2468 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2468 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2469
2469
2470 if len(tombstone) > self.rawsize(censorrev):
2470 if len(tombstone) > self.rawsize(censorrev):
2471 raise error.Abort(_('censor tombstone must be no longer than '
2471 raise error.Abort(_('censor tombstone must be no longer than '
2472 'censored data'))
2472 'censored data'))
2473
2473
2474 # Rewriting the revlog in place is hard. Our strategy for censoring is
2474 # Rewriting the revlog in place is hard. Our strategy for censoring is
2475 # to create a new revlog, copy all revisions to it, then replace the
2475 # to create a new revlog, copy all revisions to it, then replace the
2476 # revlogs on transaction close.
2476 # revlogs on transaction close.
2477
2477
2478 newindexfile = self.indexfile + b'.tmpcensored'
2478 newindexfile = self.indexfile + b'.tmpcensored'
2479 newdatafile = self.datafile + b'.tmpcensored'
2479 newdatafile = self.datafile + b'.tmpcensored'
2480
2480
2481 # This is a bit dangerous. We could easily have a mismatch of state.
2481 # This is a bit dangerous. We could easily have a mismatch of state.
2482 newrl = revlog(self.opener, newindexfile, newdatafile,
2482 newrl = revlog(self.opener, newindexfile, newdatafile,
2483 censorable=True)
2483 censorable=True)
2484 newrl.version = self.version
2484 newrl.version = self.version
2485 newrl._generaldelta = self._generaldelta
2485 newrl._generaldelta = self._generaldelta
2486 newrl._io = self._io
2486 newrl._io = self._io
2487
2487
2488 for rev in self.revs():
2488 for rev in self.revs():
2489 node = self.node(rev)
2489 node = self.node(rev)
2490 p1, p2 = self.parents(node)
2490 p1, p2 = self.parents(node)
2491
2491
2492 if rev == censorrev:
2492 if rev == censorrev:
2493 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2493 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2494 p1, p2, censornode, REVIDX_ISCENSORED)
2494 p1, p2, censornode, REVIDX_ISCENSORED)
2495
2495
2496 if newrl.deltaparent(rev) != nullrev:
2496 if newrl.deltaparent(rev) != nullrev:
2497 raise error.Abort(_('censored revision stored as delta; '
2497 raise error.Abort(_('censored revision stored as delta; '
2498 'cannot censor'),
2498 'cannot censor'),
2499 hint=_('censoring of revlogs is not '
2499 hint=_('censoring of revlogs is not '
2500 'fully implemented; please report '
2500 'fully implemented; please report '
2501 'this bug'))
2501 'this bug'))
2502 continue
2502 continue
2503
2503
2504 if self.iscensored(rev):
2504 if self.iscensored(rev):
2505 if self.deltaparent(rev) != nullrev:
2505 if self.deltaparent(rev) != nullrev:
2506 raise error.Abort(_('cannot censor due to censored '
2506 raise error.Abort(_('cannot censor due to censored '
2507 'revision having delta stored'))
2507 'revision having delta stored'))
2508 rawtext = self._chunk(rev)
2508 rawtext = self._chunk(rev)
2509 else:
2509 else:
2510 rawtext = self.rawdata(rev)
2510 rawtext = self.rawdata(rev)
2511
2511
2512 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2512 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2513 self.flags(rev))
2513 self.flags(rev))
2514
2514
2515 tr.addbackup(self.indexfile, location='store')
2515 tr.addbackup(self.indexfile, location='store')
2516 if not self._inline:
2516 if not self._inline:
2517 tr.addbackup(self.datafile, location='store')
2517 tr.addbackup(self.datafile, location='store')
2518
2518
2519 self.opener.rename(newrl.indexfile, self.indexfile)
2519 self.opener.rename(newrl.indexfile, self.indexfile)
2520 if not self._inline:
2520 if not self._inline:
2521 self.opener.rename(newrl.datafile, self.datafile)
2521 self.opener.rename(newrl.datafile, self.datafile)
2522
2522
2523 self.clearcaches()
2523 self.clearcaches()
2524 self._loadindex()
2524 self._loadindex()
2525
2525
2526 def verifyintegrity(self, state):
2526 def verifyintegrity(self, state):
2527 """Verifies the integrity of the revlog.
2527 """Verifies the integrity of the revlog.
2528
2528
2529 Yields ``revlogproblem`` instances describing problems that are
2529 Yields ``revlogproblem`` instances describing problems that are
2530 found.
2530 found.
2531 """
2531 """
2532 dd, di = self.checksize()
2532 dd, di = self.checksize()
2533 if dd:
2533 if dd:
2534 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2534 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2535 if di:
2535 if di:
2536 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2536 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2537
2537
2538 version = self.version & 0xFFFF
2538 version = self.version & 0xFFFF
2539
2539
2540 # The verifier tells us what version revlog we should be.
2540 # The verifier tells us what version revlog we should be.
2541 if version != state['expectedversion']:
2541 if version != state['expectedversion']:
2542 yield revlogproblem(
2542 yield revlogproblem(
2543 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2543 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2544 (self.indexfile, version, state['expectedversion']))
2544 (self.indexfile, version, state['expectedversion']))
2545
2545
2546 state['skipread'] = set()
2546 state['skipread'] = set()
2547
2547
2548 for rev in self:
2548 for rev in self:
2549 node = self.node(rev)
2549 node = self.node(rev)
2550
2550
2551 # Verify contents. 4 cases to care about:
2551 # Verify contents. 4 cases to care about:
2552 #
2552 #
2553 # common: the most common case
2553 # common: the most common case
2554 # rename: with a rename
2554 # rename: with a rename
2555 # meta: file content starts with b'\1\n', the metadata
2555 # meta: file content starts with b'\1\n', the metadata
2556 # header defined in filelog.py, but without a rename
2556 # header defined in filelog.py, but without a rename
2557 # ext: content stored externally
2557 # ext: content stored externally
2558 #
2558 #
2559 # More formally, their differences are shown below:
2559 # More formally, their differences are shown below:
2560 #
2560 #
2561 # | common | rename | meta | ext
2561 # | common | rename | meta | ext
2562 # -------------------------------------------------------
2562 # -------------------------------------------------------
2563 # flags() | 0 | 0 | 0 | not 0
2563 # flags() | 0 | 0 | 0 | not 0
2564 # renamed() | False | True | False | ?
2564 # renamed() | False | True | False | ?
2565 # rawtext[0:2]=='\1\n'| False | True | True | ?
2565 # rawtext[0:2]=='\1\n'| False | True | True | ?
2566 #
2566 #
2567 # "rawtext" means the raw text stored in revlog data, which
2567 # "rawtext" means the raw text stored in revlog data, which
2568 # could be retrieved by "rawdata(rev)". "text"
2568 # could be retrieved by "rawdata(rev)". "text"
2569 # mentioned below is "revision(rev)".
2569 # mentioned below is "revision(rev)".
2570 #
2570 #
2571 # There are 3 different lengths stored physically:
2571 # There are 3 different lengths stored physically:
2572 # 1. L1: rawsize, stored in revlog index
2572 # 1. L1: rawsize, stored in revlog index
2573 # 2. L2: len(rawtext), stored in revlog data
2573 # 2. L2: len(rawtext), stored in revlog data
2574 # 3. L3: len(text), stored in revlog data if flags==0, or
2574 # 3. L3: len(text), stored in revlog data if flags==0, or
2575 # possibly somewhere else if flags!=0
2575 # possibly somewhere else if flags!=0
2576 #
2576 #
2577 # L1 should be equal to L2. L3 could be different from them.
2577 # L1 should be equal to L2. L3 could be different from them.
2578 # "text" may or may not affect commit hash depending on flag
2578 # "text" may or may not affect commit hash depending on flag
2579 # processors (see flagutil.addflagprocessor).
2579 # processors (see flagutil.addflagprocessor).
2580 #
2580 #
2581 # | common | rename | meta | ext
2581 # | common | rename | meta | ext
2582 # -------------------------------------------------
2582 # -------------------------------------------------
2583 # rawsize() | L1 | L1 | L1 | L1
2583 # rawsize() | L1 | L1 | L1 | L1
2584 # size() | L1 | L2-LM | L1(*) | L1 (?)
2584 # size() | L1 | L2-LM | L1(*) | L1 (?)
2585 # len(rawtext) | L2 | L2 | L2 | L2
2585 # len(rawtext) | L2 | L2 | L2 | L2
2586 # len(text) | L2 | L2 | L2 | L3
2586 # len(text) | L2 | L2 | L2 | L3
2587 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2587 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2588 #
2588 #
2589 # LM: length of metadata, depending on rawtext
2589 # LM: length of metadata, depending on rawtext
2590 # (*): not ideal, see comment in filelog.size
2590 # (*): not ideal, see comment in filelog.size
2591 # (?): could be "- len(meta)" if the resolved content has
2591 # (?): could be "- len(meta)" if the resolved content has
2592 # rename metadata
2592 # rename metadata
2593 #
2593 #
2594 # Checks needed to be done:
2594 # Checks needed to be done:
2595 # 1. length check: L1 == L2, in all cases.
2595 # 1. length check: L1 == L2, in all cases.
2596 # 2. hash check: depending on flag processor, we may need to
2596 # 2. hash check: depending on flag processor, we may need to
2597 # use either "text" (external), or "rawtext" (in revlog).
2597 # use either "text" (external), or "rawtext" (in revlog).
2598
2598
2599 try:
2599 try:
2600 skipflags = state.get('skipflags', 0)
2600 skipflags = state.get('skipflags', 0)
2601 if skipflags:
2601 if skipflags:
2602 skipflags &= self.flags(rev)
2602 skipflags &= self.flags(rev)
2603
2603
2604 if skipflags:
2604 if skipflags:
2605 state['skipread'].add(node)
2605 state['skipread'].add(node)
2606 else:
2606 else:
2607 # Side-effect: read content and verify hash.
2607 # Side-effect: read content and verify hash.
2608 self.revision(node)
2608 self.revision(node)
2609
2609
2610 l1 = self.rawsize(rev)
2610 l1 = self.rawsize(rev)
2611 l2 = len(self.rawdata(node))
2611 l2 = len(self.rawdata(node))
2612
2612
2613 if l1 != l2:
2613 if l1 != l2:
2614 yield revlogproblem(
2614 yield revlogproblem(
2615 error=_('unpacked size is %d, %d expected') % (l2, l1),
2615 error=_('unpacked size is %d, %d expected') % (l2, l1),
2616 node=node)
2616 node=node)
2617
2617
2618 except error.CensoredNodeError:
2618 except error.CensoredNodeError:
2619 if state['erroroncensored']:
2619 if state['erroroncensored']:
2620 yield revlogproblem(error=_('censored file data'),
2620 yield revlogproblem(error=_('censored file data'),
2621 node=node)
2621 node=node)
2622 state['skipread'].add(node)
2622 state['skipread'].add(node)
2623 except Exception as e:
2623 except Exception as e:
2624 yield revlogproblem(
2624 yield revlogproblem(
2625 error=_('unpacking %s: %s') % (short(node),
2625 error=_('unpacking %s: %s') % (short(node),
2626 stringutil.forcebytestr(e)),
2626 stringutil.forcebytestr(e)),
2627 node=node)
2627 node=node)
2628 state['skipread'].add(node)
2628 state['skipread'].add(node)
2629
2629
2630 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2630 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2631 revisionscount=False, trackedsize=False,
2631 revisionscount=False, trackedsize=False,
2632 storedsize=False):
2632 storedsize=False):
2633 d = {}
2633 d = {}
2634
2634
2635 if exclusivefiles:
2635 if exclusivefiles:
2636 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2636 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2637 if not self._inline:
2637 if not self._inline:
2638 d['exclusivefiles'].append((self.opener, self.datafile))
2638 d['exclusivefiles'].append((self.opener, self.datafile))
2639
2639
2640 if sharedfiles:
2640 if sharedfiles:
2641 d['sharedfiles'] = []
2641 d['sharedfiles'] = []
2642
2642
2643 if revisionscount:
2643 if revisionscount:
2644 d['revisionscount'] = len(self)
2644 d['revisionscount'] = len(self)
2645
2645
2646 if trackedsize:
2646 if trackedsize:
2647 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2647 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2648
2648
2649 if storedsize:
2649 if storedsize:
2650 d['storedsize'] = sum(self.opener.stat(path).st_size
2650 d['storedsize'] = sum(self.opener.stat(path).st_size
2651 for path in self.files())
2651 for path in self.files())
2652
2652
2653 return d
2653 return d
@@ -1,193 +1,185 b''
1 # flagutils.py - code to deal with revlog flags and their processors
1 # flagutils.py - code to deal with revlog flags and their processors
2 #
2 #
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 from ..i18n import _
11 from ..i18n import _
12
12
13 from .constants import (
13 from .constants import (
14 REVIDX_DEFAULT_FLAGS,
14 REVIDX_DEFAULT_FLAGS,
15 REVIDX_ELLIPSIS,
15 REVIDX_ELLIPSIS,
16 REVIDX_EXTSTORED,
16 REVIDX_EXTSTORED,
17 REVIDX_FLAGS_ORDER,
17 REVIDX_FLAGS_ORDER,
18 REVIDX_ISCENSORED,
18 REVIDX_ISCENSORED,
19 REVIDX_RAWTEXT_CHANGING_FLAGS,
19 REVIDX_RAWTEXT_CHANGING_FLAGS,
20 )
20 )
21
21
22 from .. import (
22 from .. import (
23 error,
23 error,
24 util
24 util
25 )
25 )
26
26
27 # blanked usage of all the name to prevent pyflakes constraints
27 # blanked usage of all the name to prevent pyflakes constraints
28 # We need these name available in the module for extensions.
28 # We need these name available in the module for extensions.
29 REVIDX_ISCENSORED
29 REVIDX_ISCENSORED
30 REVIDX_ELLIPSIS
30 REVIDX_ELLIPSIS
31 REVIDX_EXTSTORED
31 REVIDX_EXTSTORED
32 REVIDX_DEFAULT_FLAGS
32 REVIDX_DEFAULT_FLAGS
33 REVIDX_FLAGS_ORDER
33 REVIDX_FLAGS_ORDER
34 REVIDX_RAWTEXT_CHANGING_FLAGS
34 REVIDX_RAWTEXT_CHANGING_FLAGS
35
35
36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
37
37
38 # Store flag processors (cf. 'addflagprocessor()' to register)
38 # Store flag processors (cf. 'addflagprocessor()' to register)
39 flagprocessors = {
39 flagprocessors = {
40 REVIDX_ISCENSORED: None,
40 REVIDX_ISCENSORED: None,
41 }
41 }
42
42
43 def addflagprocessor(flag, processor):
43 def addflagprocessor(flag, processor):
44 """Register a flag processor on a revision data flag.
44 """Register a flag processor on a revision data flag.
45
45
46 Invariant:
46 Invariant:
47 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
47 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
48 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
48 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
49 - Only one flag processor can be registered on a specific flag.
49 - Only one flag processor can be registered on a specific flag.
50 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
50 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
51 following signatures:
51 following signatures:
52 - (read) f(self, rawtext) -> text, bool
52 - (read) f(self, rawtext) -> text, bool
53 - (write) f(self, text) -> rawtext, bool
53 - (write) f(self, text) -> rawtext, bool
54 - (raw) f(self, rawtext) -> bool
54 - (raw) f(self, rawtext) -> bool
55 "text" is presented to the user. "rawtext" is stored in revlog data, not
55 "text" is presented to the user. "rawtext" is stored in revlog data, not
56 directly visible to the user.
56 directly visible to the user.
57 The boolean returned by these transforms is used to determine whether
57 The boolean returned by these transforms is used to determine whether
58 the returned text can be used for hash integrity checking. For example,
58 the returned text can be used for hash integrity checking. For example,
59 if "write" returns False, then "text" is used to generate hash. If
59 if "write" returns False, then "text" is used to generate hash. If
60 "write" returns True, that basically means "rawtext" returned by "write"
60 "write" returns True, that basically means "rawtext" returned by "write"
61 should be used to generate hash. Usually, "write" and "read" return
61 should be used to generate hash. Usually, "write" and "read" return
62 different booleans. And "raw" returns a same boolean as "write".
62 different booleans. And "raw" returns a same boolean as "write".
63
63
64 Note: The 'raw' transform is used for changegroup generation and in some
64 Note: The 'raw' transform is used for changegroup generation and in some
65 debug commands. In this case the transform only indicates whether the
65 debug commands. In this case the transform only indicates whether the
66 contents can be used for hash integrity checks.
66 contents can be used for hash integrity checks.
67 """
67 """
68 insertflagprocessor(flag, processor, flagprocessors)
68 insertflagprocessor(flag, processor, flagprocessors)
69
69
70 def insertflagprocessor(flag, processor, flagprocessors):
70 def insertflagprocessor(flag, processor, flagprocessors):
71 if not flag & REVIDX_KNOWN_FLAGS:
71 if not flag & REVIDX_KNOWN_FLAGS:
72 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
72 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
73 raise error.ProgrammingError(msg)
73 raise error.ProgrammingError(msg)
74 if flag not in REVIDX_FLAGS_ORDER:
74 if flag not in REVIDX_FLAGS_ORDER:
75 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
75 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
76 raise error.ProgrammingError(msg)
76 raise error.ProgrammingError(msg)
77 if flag in flagprocessors:
77 if flag in flagprocessors:
78 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
78 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
79 raise error.Abort(msg)
79 raise error.Abort(msg)
80 flagprocessors[flag] = processor
80 flagprocessors[flag] = processor
81
81
82 class flagprocessorsmixin(object):
83 """basic mixin to support revlog flag processing
84
85 Make sure the `_flagprocessors` attribute is set at ``__init__`` time.
86
87 See the documentation of the ``_processflags`` method for details.
88 """
89
90 def processflagswrite(revlog, text, flags, sidedata):
82 def processflagswrite(revlog, text, flags, sidedata):
91 """Inspect revision data flags and applies write transformations defined
83 """Inspect revision data flags and applies write transformations defined
92 by registered flag processors.
84 by registered flag processors.
93
85
94 ``text`` - the revision data to process
86 ``text`` - the revision data to process
95 ``flags`` - the revision flags
87 ``flags`` - the revision flags
96
88
97 This method processes the flags in the order (or reverse order if
89 This method processes the flags in the order (or reverse order if
98 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
90 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
99 flag processors registered for present flags. The order of flags defined
91 flag processors registered for present flags. The order of flags defined
100 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
92 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
101
93
102 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
94 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
103 processed text and ``validatehash`` is a bool indicating whether the
95 processed text and ``validatehash`` is a bool indicating whether the
104 returned text should be checked for hash integrity.
96 returned text should be checked for hash integrity.
105 """
97 """
106 return _processflagsfunc(revlog, text, flags, 'write',
98 return _processflagsfunc(revlog, text, flags, 'write',
107 sidedata=sidedata)[:2]
99 sidedata=sidedata)[:2]
108
100
109 def processflagsread(revlog, text, flags):
101 def processflagsread(revlog, text, flags):
110 """Inspect revision data flags and applies read transformations defined
102 """Inspect revision data flags and applies read transformations defined
111 by registered flag processors.
103 by registered flag processors.
112
104
113 ``text`` - the revision data to process
105 ``text`` - the revision data to process
114 ``flags`` - the revision flags
106 ``flags`` - the revision flags
115 ``raw`` - an optional argument describing if the raw transform should be
107 ``raw`` - an optional argument describing if the raw transform should be
116 applied.
108 applied.
117
109
118 This method processes the flags in the order (or reverse order if
110 This method processes the flags in the order (or reverse order if
119 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
111 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
120 flag processors registered for present flags. The order of flags defined
112 flag processors registered for present flags. The order of flags defined
121 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
113 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
122
114
123 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
115 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
124 processed text and ``validatehash`` is a bool indicating whether the
116 processed text and ``validatehash`` is a bool indicating whether the
125 returned text should be checked for hash integrity.
117 returned text should be checked for hash integrity.
126 """
118 """
127 return _processflagsfunc(revlog, text, flags, 'read')
119 return _processflagsfunc(revlog, text, flags, 'read')
128
120
129 def processflagsraw(revlog, text, flags):
121 def processflagsraw(revlog, text, flags):
130 """Inspect revision data flags to check is the content hash should be
122 """Inspect revision data flags to check is the content hash should be
131 validated.
123 validated.
132
124
133 ``text`` - the revision data to process
125 ``text`` - the revision data to process
134 ``flags`` - the revision flags
126 ``flags`` - the revision flags
135
127
136 This method processes the flags in the order (or reverse order if
128 This method processes the flags in the order (or reverse order if
137 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
129 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
138 flag processors registered for present flags. The order of flags defined
130 flag processors registered for present flags. The order of flags defined
139 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
131 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
140
132
141 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
133 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
142 processed text and ``validatehash`` is a bool indicating whether the
134 processed text and ``validatehash`` is a bool indicating whether the
143 returned text should be checked for hash integrity.
135 returned text should be checked for hash integrity.
144 """
136 """
145 return _processflagsfunc(revlog, text, flags, 'raw')[1]
137 return _processflagsfunc(revlog, text, flags, 'raw')[1]
146
138
147 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
139 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
148 """internal function to process flag on a revlog
140 """internal function to process flag on a revlog
149
141
150 This function is private to this module, code should never needs to call it
142 This function is private to this module, code should never needs to call it
151 directly."""
143 directly."""
152 # fast path: no flag processors will run
144 # fast path: no flag processors will run
153 if flags == 0:
145 if flags == 0:
154 return text, True, {}
146 return text, True, {}
155 if operation not in ('read', 'write', 'raw'):
147 if operation not in ('read', 'write', 'raw'):
156 raise error.ProgrammingError(_("invalid '%s' operation") %
148 raise error.ProgrammingError(_("invalid '%s' operation") %
157 operation)
149 operation)
158 # Check all flags are known.
150 # Check all flags are known.
159 if flags & ~REVIDX_KNOWN_FLAGS:
151 if flags & ~REVIDX_KNOWN_FLAGS:
160 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
152 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
161 (flags & ~REVIDX_KNOWN_FLAGS))
153 (flags & ~REVIDX_KNOWN_FLAGS))
162 validatehash = True
154 validatehash = True
163 # Depending on the operation (read or write), the order might be
155 # Depending on the operation (read or write), the order might be
164 # reversed due to non-commutative transforms.
156 # reversed due to non-commutative transforms.
165 orderedflags = REVIDX_FLAGS_ORDER
157 orderedflags = REVIDX_FLAGS_ORDER
166 if operation == 'write':
158 if operation == 'write':
167 orderedflags = reversed(orderedflags)
159 orderedflags = reversed(orderedflags)
168
160
169 outsidedata = {}
161 outsidedata = {}
170 for flag in orderedflags:
162 for flag in orderedflags:
171 # If a flagprocessor has been registered for a known flag, apply the
163 # If a flagprocessor has been registered for a known flag, apply the
172 # related operation transform and update result tuple.
164 # related operation transform and update result tuple.
173 if flag & flags:
165 if flag & flags:
174 vhash = True
166 vhash = True
175
167
176 if flag not in revlog._flagprocessors:
168 if flag not in revlog._flagprocessors:
177 message = _("missing processor for flag '%#x'") % (flag)
169 message = _("missing processor for flag '%#x'") % (flag)
178 raise revlog._flagserrorclass(message)
170 raise revlog._flagserrorclass(message)
179
171
180 processor = revlog._flagprocessors[flag]
172 processor = revlog._flagprocessors[flag]
181 if processor is not None:
173 if processor is not None:
182 readtransform, writetransform, rawtransform = processor
174 readtransform, writetransform, rawtransform = processor
183
175
184 if operation == 'raw':
176 if operation == 'raw':
185 vhash = rawtransform(revlog, text)
177 vhash = rawtransform(revlog, text)
186 elif operation == 'read':
178 elif operation == 'read':
187 text, vhash, s = readtransform(revlog, text)
179 text, vhash, s = readtransform(revlog, text)
188 outsidedata.update(s)
180 outsidedata.update(s)
189 else: # write operation
181 else: # write operation
190 text, vhash = writetransform(revlog, text, sidedata)
182 text, vhash = writetransform(revlog, text, sidedata)
191 validatehash = validatehash and vhash
183 validatehash = validatehash and vhash
192
184
193 return text, validatehash, outsidedata
185 return text, validatehash, outsidedata
@@ -1,683 +1,683 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import (
24 from mercurial.thirdparty import (
25 attr,
25 attr,
26 )
26 )
27 from mercurial import (
27 from mercurial import (
28 ancestor,
28 ancestor,
29 bundlerepo,
29 bundlerepo,
30 error,
30 error,
31 extensions,
31 extensions,
32 localrepo,
32 localrepo,
33 mdiff,
33 mdiff,
34 pycompat,
34 pycompat,
35 revlog,
35 revlog,
36 store,
36 store,
37 verify,
37 verify,
38 )
38 )
39 from mercurial.interfaces import (
39 from mercurial.interfaces import (
40 repository,
40 repository,
41 util as interfaceutil,
41 util as interfaceutil,
42 )
42 )
43 from mercurial.utils import (
43 from mercurial.utils import (
44 cborutil,
44 cborutil,
45 storageutil,
45 storageutil,
46 )
46 )
47 from mercurial.revlogutils import (
47 from mercurial.revlogutils import (
48 flagutil,
48 flagutil,
49 )
49 )
50
50
51 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
51 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
52 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
52 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
53 # be specifying the version(s) of Mercurial they are tested with, or
53 # be specifying the version(s) of Mercurial they are tested with, or
54 # leave the attribute unspecified.
54 # leave the attribute unspecified.
55 testedwith = 'ships-with-hg-core'
55 testedwith = 'ships-with-hg-core'
56
56
57 REQUIREMENT = 'testonly-simplestore'
57 REQUIREMENT = 'testonly-simplestore'
58
58
59 def validatenode(node):
59 def validatenode(node):
60 if isinstance(node, int):
60 if isinstance(node, int):
61 raise ValueError('expected node; got int')
61 raise ValueError('expected node; got int')
62
62
63 if len(node) != 20:
63 if len(node) != 20:
64 raise ValueError('expected 20 byte node')
64 raise ValueError('expected 20 byte node')
65
65
66 def validaterev(rev):
66 def validaterev(rev):
67 if not isinstance(rev, int):
67 if not isinstance(rev, int):
68 raise ValueError('expected int')
68 raise ValueError('expected int')
69
69
70 class simplestoreerror(error.StorageError):
70 class simplestoreerror(error.StorageError):
71 pass
71 pass
72
72
73 @interfaceutil.implementer(repository.irevisiondelta)
73 @interfaceutil.implementer(repository.irevisiondelta)
74 @attr.s(slots=True)
74 @attr.s(slots=True)
75 class simplestorerevisiondelta(object):
75 class simplestorerevisiondelta(object):
76 node = attr.ib()
76 node = attr.ib()
77 p1node = attr.ib()
77 p1node = attr.ib()
78 p2node = attr.ib()
78 p2node = attr.ib()
79 basenode = attr.ib()
79 basenode = attr.ib()
80 flags = attr.ib()
80 flags = attr.ib()
81 baserevisionsize = attr.ib()
81 baserevisionsize = attr.ib()
82 revision = attr.ib()
82 revision = attr.ib()
83 delta = attr.ib()
83 delta = attr.ib()
84 linknode = attr.ib(default=None)
84 linknode = attr.ib(default=None)
85
85
86 @interfaceutil.implementer(repository.iverifyproblem)
86 @interfaceutil.implementer(repository.iverifyproblem)
87 @attr.s(frozen=True)
87 @attr.s(frozen=True)
88 class simplefilestoreproblem(object):
88 class simplefilestoreproblem(object):
89 warning = attr.ib(default=None)
89 warning = attr.ib(default=None)
90 error = attr.ib(default=None)
90 error = attr.ib(default=None)
91 node = attr.ib(default=None)
91 node = attr.ib(default=None)
92
92
93 @interfaceutil.implementer(repository.ifilestorage)
93 @interfaceutil.implementer(repository.ifilestorage)
94 class filestorage(flagutil.flagprocessorsmixin):
94 class filestorage(object):
95 """Implements storage for a tracked path.
95 """Implements storage for a tracked path.
96
96
97 Data is stored in the VFS in a directory corresponding to the tracked
97 Data is stored in the VFS in a directory corresponding to the tracked
98 path.
98 path.
99
99
100 Index data is stored in an ``index`` file using CBOR.
100 Index data is stored in an ``index`` file using CBOR.
101
101
102 Fulltext data is stored in files having names of the node.
102 Fulltext data is stored in files having names of the node.
103 """
103 """
104
104
105 _flagserrorclass = simplestoreerror
105 _flagserrorclass = simplestoreerror
106
106
107 def __init__(self, svfs, path):
107 def __init__(self, svfs, path):
108 self._svfs = svfs
108 self._svfs = svfs
109 self._path = path
109 self._path = path
110
110
111 self._storepath = b'/'.join([b'data', path])
111 self._storepath = b'/'.join([b'data', path])
112 self._indexpath = b'/'.join([self._storepath, b'index'])
112 self._indexpath = b'/'.join([self._storepath, b'index'])
113
113
114 indexdata = self._svfs.tryread(self._indexpath)
114 indexdata = self._svfs.tryread(self._indexpath)
115 if indexdata:
115 if indexdata:
116 indexdata = cborutil.decodeall(indexdata)
116 indexdata = cborutil.decodeall(indexdata)
117
117
118 self._indexdata = indexdata or []
118 self._indexdata = indexdata or []
119 self._indexbynode = {}
119 self._indexbynode = {}
120 self._indexbyrev = {}
120 self._indexbyrev = {}
121 self._index = []
121 self._index = []
122 self._refreshindex()
122 self._refreshindex()
123
123
124 self._flagprocessors = dict(flagutil.flagprocessors)
124 self._flagprocessors = dict(flagutil.flagprocessors)
125
125
126 def _refreshindex(self):
126 def _refreshindex(self):
127 self._indexbynode.clear()
127 self._indexbynode.clear()
128 self._indexbyrev.clear()
128 self._indexbyrev.clear()
129 self._index = []
129 self._index = []
130
130
131 for i, entry in enumerate(self._indexdata):
131 for i, entry in enumerate(self._indexdata):
132 self._indexbynode[entry[b'node']] = entry
132 self._indexbynode[entry[b'node']] = entry
133 self._indexbyrev[i] = entry
133 self._indexbyrev[i] = entry
134
134
135 self._indexbynode[nullid] = {
135 self._indexbynode[nullid] = {
136 b'node': nullid,
136 b'node': nullid,
137 b'p1': nullid,
137 b'p1': nullid,
138 b'p2': nullid,
138 b'p2': nullid,
139 b'linkrev': nullrev,
139 b'linkrev': nullrev,
140 b'flags': 0,
140 b'flags': 0,
141 }
141 }
142
142
143 self._indexbyrev[nullrev] = {
143 self._indexbyrev[nullrev] = {
144 b'node': nullid,
144 b'node': nullid,
145 b'p1': nullid,
145 b'p1': nullid,
146 b'p2': nullid,
146 b'p2': nullid,
147 b'linkrev': nullrev,
147 b'linkrev': nullrev,
148 b'flags': 0,
148 b'flags': 0,
149 }
149 }
150
150
151 for i, entry in enumerate(self._indexdata):
151 for i, entry in enumerate(self._indexdata):
152 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
152 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
153
153
154 # start, length, rawsize, chainbase, linkrev, p1, p2, node
154 # start, length, rawsize, chainbase, linkrev, p1, p2, node
155 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
155 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
156 entry[b'node']))
156 entry[b'node']))
157
157
158 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
158 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
159
159
160 def __len__(self):
160 def __len__(self):
161 return len(self._indexdata)
161 return len(self._indexdata)
162
162
163 def __iter__(self):
163 def __iter__(self):
164 return iter(range(len(self)))
164 return iter(range(len(self)))
165
165
166 def revs(self, start=0, stop=None):
166 def revs(self, start=0, stop=None):
167 step = 1
167 step = 1
168 if stop is not None:
168 if stop is not None:
169 if start > stop:
169 if start > stop:
170 step = -1
170 step = -1
171
171
172 stop += step
172 stop += step
173 else:
173 else:
174 stop = len(self)
174 stop = len(self)
175
175
176 return range(start, stop, step)
176 return range(start, stop, step)
177
177
178 def parents(self, node):
178 def parents(self, node):
179 validatenode(node)
179 validatenode(node)
180
180
181 if node not in self._indexbynode:
181 if node not in self._indexbynode:
182 raise KeyError('unknown node')
182 raise KeyError('unknown node')
183
183
184 entry = self._indexbynode[node]
184 entry = self._indexbynode[node]
185
185
186 return entry[b'p1'], entry[b'p2']
186 return entry[b'p1'], entry[b'p2']
187
187
188 def parentrevs(self, rev):
188 def parentrevs(self, rev):
189 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
189 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
190 return self.rev(p1), self.rev(p2)
190 return self.rev(p1), self.rev(p2)
191
191
192 def rev(self, node):
192 def rev(self, node):
193 validatenode(node)
193 validatenode(node)
194
194
195 try:
195 try:
196 self._indexbynode[node]
196 self._indexbynode[node]
197 except KeyError:
197 except KeyError:
198 raise error.LookupError(node, self._indexpath, _('no node'))
198 raise error.LookupError(node, self._indexpath, _('no node'))
199
199
200 for rev, entry in self._indexbyrev.items():
200 for rev, entry in self._indexbyrev.items():
201 if entry[b'node'] == node:
201 if entry[b'node'] == node:
202 return rev
202 return rev
203
203
204 raise error.ProgrammingError('this should not occur')
204 raise error.ProgrammingError('this should not occur')
205
205
206 def node(self, rev):
206 def node(self, rev):
207 validaterev(rev)
207 validaterev(rev)
208
208
209 return self._indexbyrev[rev][b'node']
209 return self._indexbyrev[rev][b'node']
210
210
211 def hasnode(self, node):
211 def hasnode(self, node):
212 validatenode(node)
212 validatenode(node)
213 return node in self._indexbynode
213 return node in self._indexbynode
214
214
215 def censorrevision(self, tr, censornode, tombstone=b''):
215 def censorrevision(self, tr, censornode, tombstone=b''):
216 raise NotImplementedError('TODO')
216 raise NotImplementedError('TODO')
217
217
218 def lookup(self, node):
218 def lookup(self, node):
219 if isinstance(node, int):
219 if isinstance(node, int):
220 return self.node(node)
220 return self.node(node)
221
221
222 if len(node) == 20:
222 if len(node) == 20:
223 self.rev(node)
223 self.rev(node)
224 return node
224 return node
225
225
226 try:
226 try:
227 rev = int(node)
227 rev = int(node)
228 if '%d' % rev != node:
228 if '%d' % rev != node:
229 raise ValueError
229 raise ValueError
230
230
231 if rev < 0:
231 if rev < 0:
232 rev = len(self) + rev
232 rev = len(self) + rev
233 if rev < 0 or rev >= len(self):
233 if rev < 0 or rev >= len(self):
234 raise ValueError
234 raise ValueError
235
235
236 return self.node(rev)
236 return self.node(rev)
237 except (ValueError, OverflowError):
237 except (ValueError, OverflowError):
238 pass
238 pass
239
239
240 if len(node) == 40:
240 if len(node) == 40:
241 try:
241 try:
242 rawnode = bin(node)
242 rawnode = bin(node)
243 self.rev(rawnode)
243 self.rev(rawnode)
244 return rawnode
244 return rawnode
245 except TypeError:
245 except TypeError:
246 pass
246 pass
247
247
248 raise error.LookupError(node, self._path, _('invalid lookup input'))
248 raise error.LookupError(node, self._path, _('invalid lookup input'))
249
249
250 def linkrev(self, rev):
250 def linkrev(self, rev):
251 validaterev(rev)
251 validaterev(rev)
252
252
253 return self._indexbyrev[rev][b'linkrev']
253 return self._indexbyrev[rev][b'linkrev']
254
254
255 def _flags(self, rev):
255 def _flags(self, rev):
256 validaterev(rev)
256 validaterev(rev)
257
257
258 return self._indexbyrev[rev][b'flags']
258 return self._indexbyrev[rev][b'flags']
259
259
260 def _candelta(self, baserev, rev):
260 def _candelta(self, baserev, rev):
261 validaterev(baserev)
261 validaterev(baserev)
262 validaterev(rev)
262 validaterev(rev)
263
263
264 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
264 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
265 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
265 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
266 return False
266 return False
267
267
268 return True
268 return True
269
269
270 def checkhash(self, text, node, p1=None, p2=None, rev=None):
270 def checkhash(self, text, node, p1=None, p2=None, rev=None):
271 if p1 is None and p2 is None:
271 if p1 is None and p2 is None:
272 p1, p2 = self.parents(node)
272 p1, p2 = self.parents(node)
273 if node != storageutil.hashrevisionsha1(text, p1, p2):
273 if node != storageutil.hashrevisionsha1(text, p1, p2):
274 raise simplestoreerror(_("integrity check failed on %s") %
274 raise simplestoreerror(_("integrity check failed on %s") %
275 self._path)
275 self._path)
276
276
277 def revision(self, nodeorrev, raw=False):
277 def revision(self, nodeorrev, raw=False):
278 if isinstance(nodeorrev, int):
278 if isinstance(nodeorrev, int):
279 node = self.node(nodeorrev)
279 node = self.node(nodeorrev)
280 else:
280 else:
281 node = nodeorrev
281 node = nodeorrev
282 validatenode(node)
282 validatenode(node)
283
283
284 if node == nullid:
284 if node == nullid:
285 return b''
285 return b''
286
286
287 rev = self.rev(node)
287 rev = self.rev(node)
288 flags = self._flags(rev)
288 flags = self._flags(rev)
289
289
290 path = b'/'.join([self._storepath, hex(node)])
290 path = b'/'.join([self._storepath, hex(node)])
291 rawtext = self._svfs.read(path)
291 rawtext = self._svfs.read(path)
292
292
293 if raw:
293 if raw:
294 validatehash = flagutil.processflagsraw(self, rawtext, flags)
294 validatehash = flagutil.processflagsraw(self, rawtext, flags)
295 text = rawtext
295 text = rawtext
296 else:
296 else:
297 r = flagutil.processflagsread(self, rawtext, flags)
297 r = flagutil.processflagsread(self, rawtext, flags)
298 text, validatehash, sidedata = r
298 text, validatehash, sidedata = r
299 if validatehash:
299 if validatehash:
300 self.checkhash(text, node, rev=rev)
300 self.checkhash(text, node, rev=rev)
301
301
302 return text
302 return text
303
303
304 def rawdata(self, nodeorrev):
304 def rawdata(self, nodeorrev):
305 return self.revision(raw=True)
305 return self.revision(raw=True)
306
306
307 def read(self, node):
307 def read(self, node):
308 validatenode(node)
308 validatenode(node)
309
309
310 revision = self.revision(node)
310 revision = self.revision(node)
311
311
312 if not revision.startswith(b'\1\n'):
312 if not revision.startswith(b'\1\n'):
313 return revision
313 return revision
314
314
315 start = revision.index(b'\1\n', 2)
315 start = revision.index(b'\1\n', 2)
316 return revision[start + 2:]
316 return revision[start + 2:]
317
317
318 def renamed(self, node):
318 def renamed(self, node):
319 validatenode(node)
319 validatenode(node)
320
320
321 if self.parents(node)[0] != nullid:
321 if self.parents(node)[0] != nullid:
322 return False
322 return False
323
323
324 fulltext = self.revision(node)
324 fulltext = self.revision(node)
325 m = storageutil.parsemeta(fulltext)[0]
325 m = storageutil.parsemeta(fulltext)[0]
326
326
327 if m and 'copy' in m:
327 if m and 'copy' in m:
328 return m['copy'], bin(m['copyrev'])
328 return m['copy'], bin(m['copyrev'])
329
329
330 return False
330 return False
331
331
332 def cmp(self, node, text):
332 def cmp(self, node, text):
333 validatenode(node)
333 validatenode(node)
334
334
335 t = text
335 t = text
336
336
337 if text.startswith(b'\1\n'):
337 if text.startswith(b'\1\n'):
338 t = b'\1\n\1\n' + text
338 t = b'\1\n\1\n' + text
339
339
340 p1, p2 = self.parents(node)
340 p1, p2 = self.parents(node)
341
341
342 if storageutil.hashrevisionsha1(t, p1, p2) == node:
342 if storageutil.hashrevisionsha1(t, p1, p2) == node:
343 return False
343 return False
344
344
345 if self.iscensored(self.rev(node)):
345 if self.iscensored(self.rev(node)):
346 return text != b''
346 return text != b''
347
347
348 if self.renamed(node):
348 if self.renamed(node):
349 t2 = self.read(node)
349 t2 = self.read(node)
350 return t2 != text
350 return t2 != text
351
351
352 return True
352 return True
353
353
354 def size(self, rev):
354 def size(self, rev):
355 validaterev(rev)
355 validaterev(rev)
356
356
357 node = self._indexbyrev[rev][b'node']
357 node = self._indexbyrev[rev][b'node']
358
358
359 if self.renamed(node):
359 if self.renamed(node):
360 return len(self.read(node))
360 return len(self.read(node))
361
361
362 if self.iscensored(rev):
362 if self.iscensored(rev):
363 return 0
363 return 0
364
364
365 return len(self.revision(node))
365 return len(self.revision(node))
366
366
367 def iscensored(self, rev):
367 def iscensored(self, rev):
368 validaterev(rev)
368 validaterev(rev)
369
369
370 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
370 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
371
371
372 def commonancestorsheads(self, a, b):
372 def commonancestorsheads(self, a, b):
373 validatenode(a)
373 validatenode(a)
374 validatenode(b)
374 validatenode(b)
375
375
376 a = self.rev(a)
376 a = self.rev(a)
377 b = self.rev(b)
377 b = self.rev(b)
378
378
379 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
379 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
380 return pycompat.maplist(self.node, ancestors)
380 return pycompat.maplist(self.node, ancestors)
381
381
382 def descendants(self, revs):
382 def descendants(self, revs):
383 # This is a copy of revlog.descendants()
383 # This is a copy of revlog.descendants()
384 first = min(revs)
384 first = min(revs)
385 if first == nullrev:
385 if first == nullrev:
386 for i in self:
386 for i in self:
387 yield i
387 yield i
388 return
388 return
389
389
390 seen = set(revs)
390 seen = set(revs)
391 for i in self.revs(start=first + 1):
391 for i in self.revs(start=first + 1):
392 for x in self.parentrevs(i):
392 for x in self.parentrevs(i):
393 if x != nullrev and x in seen:
393 if x != nullrev and x in seen:
394 seen.add(i)
394 seen.add(i)
395 yield i
395 yield i
396 break
396 break
397
397
398 # Required by verify.
398 # Required by verify.
399 def files(self):
399 def files(self):
400 entries = self._svfs.listdir(self._storepath)
400 entries = self._svfs.listdir(self._storepath)
401
401
402 # Strip out undo.backup.* files created as part of transaction
402 # Strip out undo.backup.* files created as part of transaction
403 # recording.
403 # recording.
404 entries = [f for f in entries if not f.startswith('undo.backup.')]
404 entries = [f for f in entries if not f.startswith('undo.backup.')]
405
405
406 return [b'/'.join((self._storepath, f)) for f in entries]
406 return [b'/'.join((self._storepath, f)) for f in entries]
407
407
408 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
408 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
409 revisionscount=False, trackedsize=False,
409 revisionscount=False, trackedsize=False,
410 storedsize=False):
410 storedsize=False):
411 # TODO do a real implementation of this
411 # TODO do a real implementation of this
412 return {
412 return {
413 'exclusivefiles': [],
413 'exclusivefiles': [],
414 'sharedfiles': [],
414 'sharedfiles': [],
415 'revisionscount': len(self),
415 'revisionscount': len(self),
416 'trackedsize': 0,
416 'trackedsize': 0,
417 'storedsize': None,
417 'storedsize': None,
418 }
418 }
419
419
420 def verifyintegrity(self, state):
420 def verifyintegrity(self, state):
421 state['skipread'] = set()
421 state['skipread'] = set()
422 for rev in self:
422 for rev in self:
423 node = self.node(rev)
423 node = self.node(rev)
424 try:
424 try:
425 self.revision(node)
425 self.revision(node)
426 except Exception as e:
426 except Exception as e:
427 yield simplefilestoreproblem(
427 yield simplefilestoreproblem(
428 error='unpacking %s: %s' % (node, e),
428 error='unpacking %s: %s' % (node, e),
429 node=node)
429 node=node)
430 state['skipread'].add(node)
430 state['skipread'].add(node)
431
431
432 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
432 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
433 assumehaveparentrevisions=False,
433 assumehaveparentrevisions=False,
434 deltamode=repository.CG_DELTAMODE_STD):
434 deltamode=repository.CG_DELTAMODE_STD):
435 # TODO this will probably break on some ordering options.
435 # TODO this will probably break on some ordering options.
436 nodes = [n for n in nodes if n != nullid]
436 nodes = [n for n in nodes if n != nullid]
437 if not nodes:
437 if not nodes:
438 return
438 return
439 for delta in storageutil.emitrevisions(
439 for delta in storageutil.emitrevisions(
440 self, nodes, nodesorder, simplestorerevisiondelta,
440 self, nodes, nodesorder, simplestorerevisiondelta,
441 revisiondata=revisiondata,
441 revisiondata=revisiondata,
442 assumehaveparentrevisions=assumehaveparentrevisions,
442 assumehaveparentrevisions=assumehaveparentrevisions,
443 deltamode=deltamode):
443 deltamode=deltamode):
444 yield delta
444 yield delta
445
445
446 def add(self, text, meta, transaction, linkrev, p1, p2):
446 def add(self, text, meta, transaction, linkrev, p1, p2):
447 if meta or text.startswith(b'\1\n'):
447 if meta or text.startswith(b'\1\n'):
448 text = storageutil.packmeta(meta, text)
448 text = storageutil.packmeta(meta, text)
449
449
450 return self.addrevision(text, transaction, linkrev, p1, p2)
450 return self.addrevision(text, transaction, linkrev, p1, p2)
451
451
452 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
452 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
453 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
453 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
454 validatenode(p1)
454 validatenode(p1)
455 validatenode(p2)
455 validatenode(p2)
456
456
457 if flags:
457 if flags:
458 node = node or storageutil.hashrevisionsha1(text, p1, p2)
458 node = node or storageutil.hashrevisionsha1(text, p1, p2)
459
459
460 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
460 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
461
461
462 node = node or storageutil.hashrevisionsha1(text, p1, p2)
462 node = node or storageutil.hashrevisionsha1(text, p1, p2)
463
463
464 if node in self._indexbynode:
464 if node in self._indexbynode:
465 return node
465 return node
466
466
467 if validatehash:
467 if validatehash:
468 self.checkhash(rawtext, node, p1=p1, p2=p2)
468 self.checkhash(rawtext, node, p1=p1, p2=p2)
469
469
470 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
470 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
471 flags)
471 flags)
472
472
473 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
473 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
474 transaction.addbackup(self._indexpath)
474 transaction.addbackup(self._indexpath)
475
475
476 path = b'/'.join([self._storepath, hex(node)])
476 path = b'/'.join([self._storepath, hex(node)])
477
477
478 self._svfs.write(path, rawtext)
478 self._svfs.write(path, rawtext)
479
479
480 self._indexdata.append({
480 self._indexdata.append({
481 b'node': node,
481 b'node': node,
482 b'p1': p1,
482 b'p1': p1,
483 b'p2': p2,
483 b'p2': p2,
484 b'linkrev': link,
484 b'linkrev': link,
485 b'flags': flags,
485 b'flags': flags,
486 })
486 })
487
487
488 self._reflectindexupdate()
488 self._reflectindexupdate()
489
489
490 return node
490 return node
491
491
492 def _reflectindexupdate(self):
492 def _reflectindexupdate(self):
493 self._refreshindex()
493 self._refreshindex()
494 self._svfs.write(self._indexpath,
494 self._svfs.write(self._indexpath,
495 ''.join(cborutil.streamencode(self._indexdata)))
495 ''.join(cborutil.streamencode(self._indexdata)))
496
496
497 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
497 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
498 maybemissingparents=False):
498 maybemissingparents=False):
499 if maybemissingparents:
499 if maybemissingparents:
500 raise error.Abort(_('simple store does not support missing parents '
500 raise error.Abort(_('simple store does not support missing parents '
501 'write mode'))
501 'write mode'))
502
502
503 nodes = []
503 nodes = []
504
504
505 transaction.addbackup(self._indexpath)
505 transaction.addbackup(self._indexpath)
506
506
507 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
507 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
508 linkrev = linkmapper(linknode)
508 linkrev = linkmapper(linknode)
509 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
509 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
510
510
511 nodes.append(node)
511 nodes.append(node)
512
512
513 if node in self._indexbynode:
513 if node in self._indexbynode:
514 continue
514 continue
515
515
516 # Need to resolve the fulltext from the delta base.
516 # Need to resolve the fulltext from the delta base.
517 if deltabase == nullid:
517 if deltabase == nullid:
518 text = mdiff.patch(b'', delta)
518 text = mdiff.patch(b'', delta)
519 else:
519 else:
520 text = mdiff.patch(self.revision(deltabase), delta)
520 text = mdiff.patch(self.revision(deltabase), delta)
521
521
522 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
522 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
523 flags)
523 flags)
524
524
525 if addrevisioncb:
525 if addrevisioncb:
526 addrevisioncb(self, node)
526 addrevisioncb(self, node)
527 return nodes
527 return nodes
528
528
529 def _headrevs(self):
529 def _headrevs(self):
530 # Assume all revisions are heads by default.
530 # Assume all revisions are heads by default.
531 revishead = {rev: True for rev in self._indexbyrev}
531 revishead = {rev: True for rev in self._indexbyrev}
532
532
533 for rev, entry in self._indexbyrev.items():
533 for rev, entry in self._indexbyrev.items():
534 # Unset head flag for all seen parents.
534 # Unset head flag for all seen parents.
535 revishead[self.rev(entry[b'p1'])] = False
535 revishead[self.rev(entry[b'p1'])] = False
536 revishead[self.rev(entry[b'p2'])] = False
536 revishead[self.rev(entry[b'p2'])] = False
537
537
538 return [rev for rev, ishead in sorted(revishead.items())
538 return [rev for rev, ishead in sorted(revishead.items())
539 if ishead]
539 if ishead]
540
540
541 def heads(self, start=None, stop=None):
541 def heads(self, start=None, stop=None):
542 # This is copied from revlog.py.
542 # This is copied from revlog.py.
543 if start is None and stop is None:
543 if start is None and stop is None:
544 if not len(self):
544 if not len(self):
545 return [nullid]
545 return [nullid]
546 return [self.node(r) for r in self._headrevs()]
546 return [self.node(r) for r in self._headrevs()]
547
547
548 if start is None:
548 if start is None:
549 start = nullid
549 start = nullid
550 if stop is None:
550 if stop is None:
551 stop = []
551 stop = []
552 stoprevs = set([self.rev(n) for n in stop])
552 stoprevs = set([self.rev(n) for n in stop])
553 startrev = self.rev(start)
553 startrev = self.rev(start)
554 reachable = {startrev}
554 reachable = {startrev}
555 heads = {startrev}
555 heads = {startrev}
556
556
557 parentrevs = self.parentrevs
557 parentrevs = self.parentrevs
558 for r in self.revs(start=startrev + 1):
558 for r in self.revs(start=startrev + 1):
559 for p in parentrevs(r):
559 for p in parentrevs(r):
560 if p in reachable:
560 if p in reachable:
561 if r not in stoprevs:
561 if r not in stoprevs:
562 reachable.add(r)
562 reachable.add(r)
563 heads.add(r)
563 heads.add(r)
564 if p in heads and p not in stoprevs:
564 if p in heads and p not in stoprevs:
565 heads.remove(p)
565 heads.remove(p)
566
566
567 return [self.node(r) for r in heads]
567 return [self.node(r) for r in heads]
568
568
569 def children(self, node):
569 def children(self, node):
570 validatenode(node)
570 validatenode(node)
571
571
572 # This is a copy of revlog.children().
572 # This is a copy of revlog.children().
573 c = []
573 c = []
574 p = self.rev(node)
574 p = self.rev(node)
575 for r in self.revs(start=p + 1):
575 for r in self.revs(start=p + 1):
576 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
576 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
577 if prevs:
577 if prevs:
578 for pr in prevs:
578 for pr in prevs:
579 if pr == p:
579 if pr == p:
580 c.append(self.node(r))
580 c.append(self.node(r))
581 elif p == nullrev:
581 elif p == nullrev:
582 c.append(self.node(r))
582 c.append(self.node(r))
583 return c
583 return c
584
584
585 def getstrippoint(self, minlink):
585 def getstrippoint(self, minlink):
586 return storageutil.resolvestripinfo(
586 return storageutil.resolvestripinfo(
587 minlink, len(self) - 1, self._headrevs(), self.linkrev,
587 minlink, len(self) - 1, self._headrevs(), self.linkrev,
588 self.parentrevs)
588 self.parentrevs)
589
589
590 def strip(self, minlink, transaction):
590 def strip(self, minlink, transaction):
591 if not len(self):
591 if not len(self):
592 return
592 return
593
593
594 rev, _ignored = self.getstrippoint(minlink)
594 rev, _ignored = self.getstrippoint(minlink)
595 if rev == len(self):
595 if rev == len(self):
596 return
596 return
597
597
598 # Purge index data starting at the requested revision.
598 # Purge index data starting at the requested revision.
599 self._indexdata[rev:] = []
599 self._indexdata[rev:] = []
600 self._reflectindexupdate()
600 self._reflectindexupdate()
601
601
602 def issimplestorefile(f, kind, st):
602 def issimplestorefile(f, kind, st):
603 if kind != stat.S_IFREG:
603 if kind != stat.S_IFREG:
604 return False
604 return False
605
605
606 if store.isrevlog(f, kind, st):
606 if store.isrevlog(f, kind, st):
607 return False
607 return False
608
608
609 # Ignore transaction undo files.
609 # Ignore transaction undo files.
610 if f.startswith('undo.'):
610 if f.startswith('undo.'):
611 return False
611 return False
612
612
613 # Otherwise assume it belongs to the simple store.
613 # Otherwise assume it belongs to the simple store.
614 return True
614 return True
615
615
616 class simplestore(store.encodedstore):
616 class simplestore(store.encodedstore):
617 def datafiles(self):
617 def datafiles(self):
618 for x in super(simplestore, self).datafiles():
618 for x in super(simplestore, self).datafiles():
619 yield x
619 yield x
620
620
621 # Supplement with non-revlog files.
621 # Supplement with non-revlog files.
622 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
622 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
623
623
624 for unencoded, encoded, size in extrafiles:
624 for unencoded, encoded, size in extrafiles:
625 try:
625 try:
626 unencoded = store.decodefilename(unencoded)
626 unencoded = store.decodefilename(unencoded)
627 except KeyError:
627 except KeyError:
628 unencoded = None
628 unencoded = None
629
629
630 yield unencoded, encoded, size
630 yield unencoded, encoded, size
631
631
632 def reposetup(ui, repo):
632 def reposetup(ui, repo):
633 if not repo.local():
633 if not repo.local():
634 return
634 return
635
635
636 if isinstance(repo, bundlerepo.bundlerepository):
636 if isinstance(repo, bundlerepo.bundlerepository):
637 raise error.Abort(_('cannot use simple store with bundlerepo'))
637 raise error.Abort(_('cannot use simple store with bundlerepo'))
638
638
639 class simplestorerepo(repo.__class__):
639 class simplestorerepo(repo.__class__):
640 def file(self, f):
640 def file(self, f):
641 return filestorage(self.svfs, f)
641 return filestorage(self.svfs, f)
642
642
643 repo.__class__ = simplestorerepo
643 repo.__class__ = simplestorerepo
644
644
645 def featuresetup(ui, supported):
645 def featuresetup(ui, supported):
646 supported.add(REQUIREMENT)
646 supported.add(REQUIREMENT)
647
647
648 def newreporequirements(orig, ui, createopts):
648 def newreporequirements(orig, ui, createopts):
649 """Modifies default requirements for new repos to use the simple store."""
649 """Modifies default requirements for new repos to use the simple store."""
650 requirements = orig(ui, createopts)
650 requirements = orig(ui, createopts)
651
651
652 # These requirements are only used to affect creation of the store
652 # These requirements are only used to affect creation of the store
653 # object. We have our own store. So we can remove them.
653 # object. We have our own store. So we can remove them.
654 # TODO do this once we feel like taking the test hit.
654 # TODO do this once we feel like taking the test hit.
655 #if 'fncache' in requirements:
655 #if 'fncache' in requirements:
656 # requirements.remove('fncache')
656 # requirements.remove('fncache')
657 #if 'dotencode' in requirements:
657 #if 'dotencode' in requirements:
658 # requirements.remove('dotencode')
658 # requirements.remove('dotencode')
659
659
660 requirements.add(REQUIREMENT)
660 requirements.add(REQUIREMENT)
661
661
662 return requirements
662 return requirements
663
663
664 def makestore(orig, requirements, path, vfstype):
664 def makestore(orig, requirements, path, vfstype):
665 if REQUIREMENT not in requirements:
665 if REQUIREMENT not in requirements:
666 return orig(requirements, path, vfstype)
666 return orig(requirements, path, vfstype)
667
667
668 return simplestore(path, vfstype)
668 return simplestore(path, vfstype)
669
669
670 def verifierinit(orig, self, *args, **kwargs):
670 def verifierinit(orig, self, *args, **kwargs):
671 orig(self, *args, **kwargs)
671 orig(self, *args, **kwargs)
672
672
673 # We don't care that files in the store don't align with what is
673 # We don't care that files in the store don't align with what is
674 # advertised. So suppress these warnings.
674 # advertised. So suppress these warnings.
675 self.warnorphanstorefiles = False
675 self.warnorphanstorefiles = False
676
676
677 def extsetup(ui):
677 def extsetup(ui):
678 localrepo.featuresetupfuncs.add(featuresetup)
678 localrepo.featuresetupfuncs.add(featuresetup)
679
679
680 extensions.wrapfunction(localrepo, 'newreporequirements',
680 extensions.wrapfunction(localrepo, 'newreporequirements',
681 newreporequirements)
681 newreporequirements)
682 extensions.wrapfunction(localrepo, 'makestore', makestore)
682 extensions.wrapfunction(localrepo, 'makestore', makestore)
683 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
683 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now