##// END OF EJS Templates
storageutil: make all callables optional...
Gregory Szorc -
r40045:631c6f50 default
parent child Browse files
Show More
@@ -1,410 +1,439 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import re
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullid,
17 17 nullrev,
18 18 )
19 19 from .. import (
20 20 error,
21 mdiff,
21 22 pycompat,
22 23 )
23 24
24 25 _nullhash = hashlib.sha1(nullid)
25 26
26 27 def hashrevisionsha1(text, p1, p2):
27 28 """Compute the SHA-1 for revision data and its parents.
28 29
29 30 This hash combines both the current file contents and its history
30 31 in a manner that makes it easy to distinguish nodes with the same
31 32 content in the revision graph.
32 33 """
33 34 # As of now, if one of the parent node is null, p2 is null
34 35 if p2 == nullid:
35 36 # deep copy of a hash is faster than creating one
36 37 s = _nullhash.copy()
37 38 s.update(p1)
38 39 else:
39 40 # none of the parent nodes are nullid
40 41 if p1 < p2:
41 42 a = p1
42 43 b = p2
43 44 else:
44 45 a = p2
45 46 b = p1
46 47 s = hashlib.sha1(a)
47 48 s.update(b)
48 49 s.update(text)
49 50 return s.digest()
50 51
51 52 METADATA_RE = re.compile(b'\x01\n')
52 53
53 54 def parsemeta(text):
54 55 """Parse metadata header from revision data.
55 56
56 57 Returns a 2-tuple of (metadata, offset), where both can be None if there
57 58 is no metadata.
58 59 """
59 60 # text can be buffer, so we can't use .startswith or .index
60 61 if text[:2] != b'\x01\n':
61 62 return None, None
62 63 s = METADATA_RE.search(text, 2).start()
63 64 mtext = text[2:s]
64 65 meta = {}
65 66 for l in mtext.splitlines():
66 67 k, v = l.split(b': ', 1)
67 68 meta[k] = v
68 69 return meta, s + 2
69 70
70 71 def packmeta(meta, text):
71 72 """Add metadata to fulltext to produce revision text."""
72 73 keys = sorted(meta)
73 74 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
74 75 return b'\x01\n%s\x01\n%s' % (metatext, text)
75 76
76 77 def iscensoredtext(text):
77 78 meta = parsemeta(text)[0]
78 79 return meta and b'censored' in meta
79 80
80 81 def filtermetadata(text):
81 82 """Extract just the revision data from source text.
82 83
83 84 Returns ``text`` unless it has a metadata header, in which case we return
84 85 a new buffer without hte metadata.
85 86 """
86 87 if not text.startswith(b'\x01\n'):
87 88 return text
88 89
89 90 offset = text.index(b'\x01\n', 2)
90 91 return text[offset + 2:]
91 92
92 93 def filerevisioncopied(store, node):
93 94 """Resolve file revision copy metadata.
94 95
95 96 Returns ``False`` if the file has no copy metadata. Otherwise a
96 97 2-tuple of the source filename and node.
97 98 """
98 99 if store.parents(node)[0] != nullid:
99 100 return False
100 101
101 102 meta = parsemeta(store.revision(node))[0]
102 103
103 104 # copy and copyrev occur in pairs. In rare cases due to old bugs,
104 105 # one can occur without the other. So ensure both are present to flag
105 106 # as a copy.
106 107 if meta and b'copy' in meta and b'copyrev' in meta:
107 108 return meta[b'copy'], bin(meta[b'copyrev'])
108 109
109 110 return False
110 111
111 112 def filedataequivalent(store, node, filedata):
112 113 """Determines whether file data is equivalent to a stored node.
113 114
114 115 Returns True if the passed file data would hash to the same value
115 116 as a stored revision and False otherwise.
116 117
117 118 When a stored revision is censored, filedata must be empty to have
118 119 equivalence.
119 120
120 121 When a stored revision has copy metadata, it is ignored as part
121 122 of the compare.
122 123 """
123 124
124 125 if filedata.startswith(b'\x01\n'):
125 126 revisiontext = b'\x01\n\x01\n' + filedata
126 127 else:
127 128 revisiontext = filedata
128 129
129 130 p1, p2 = store.parents(node)
130 131
131 132 computednode = hashrevisionsha1(revisiontext, p1, p2)
132 133
133 134 if computednode == node:
134 135 return True
135 136
136 137 # Censored files compare against the empty file.
137 138 if store.iscensored(store.rev(node)):
138 139 return filedata == b''
139 140
140 141 # Renaming a file produces a different hash, even if the data
141 142 # remains unchanged. Check if that's the case.
142 143 if store.renamed(node):
143 144 return store.read(node) == filedata
144 145
145 146 return False
146 147
147 148 def iterrevs(storelen, start=0, stop=None):
148 149 """Iterate over revision numbers in a store."""
149 150 step = 1
150 151
151 152 if stop is not None:
152 153 if start > stop:
153 154 step = -1
154 155 stop += step
155 156 if stop > storelen:
156 157 stop = storelen
157 158 else:
158 159 stop = storelen
159 160
160 161 return pycompat.xrange(start, stop, step)
161 162
162 163 def fileidlookup(store, fileid, identifier):
163 164 """Resolve the file node for a value.
164 165
165 166 ``store`` is an object implementing the ``ifileindex`` interface.
166 167
167 168 ``fileid`` can be:
168 169
169 170 * A 20 byte binary node.
170 171 * An integer revision number
171 172 * A 40 byte hex node.
172 173 * A bytes that can be parsed as an integer representing a revision number.
173 174
174 175 ``identifier`` is used to populate ``error.LookupError`` with an identifier
175 176 for the store.
176 177
177 178 Raises ``error.LookupError`` on failure.
178 179 """
179 180 if isinstance(fileid, int):
180 181 try:
181 182 return store.node(fileid)
182 183 except IndexError:
183 184 raise error.LookupError(fileid, identifier, _('no match found'))
184 185
185 186 if len(fileid) == 20:
186 187 try:
187 188 store.rev(fileid)
188 189 return fileid
189 190 except error.LookupError:
190 191 pass
191 192
192 193 if len(fileid) == 40:
193 194 try:
194 195 rawnode = bin(fileid)
195 196 store.rev(rawnode)
196 197 return rawnode
197 198 except TypeError:
198 199 pass
199 200
200 201 try:
201 202 rev = int(fileid)
202 203
203 204 if b'%d' % rev != fileid:
204 205 raise ValueError
205 206
206 207 try:
207 208 return store.node(rev)
208 209 except (IndexError, TypeError):
209 210 pass
210 211 except (ValueError, OverflowError):
211 212 pass
212 213
213 214 raise error.LookupError(fileid, identifier, _('no match found'))
214 215
215 216 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
216 217 """Resolve information needed to strip revisions.
217 218
218 219 Finds the minimum revision number that must be stripped in order to
219 220 strip ``minlinkrev``.
220 221
221 222 Returns a 2-tuple of the minimum revision number to do that and a set
222 223 of all revision numbers that have linkrevs that would be broken
223 224 by that strip.
224 225
225 226 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
226 227 ``headrevs`` is an iterable of head revisions.
227 228 ``linkrevfn`` is a callable that receives a revision and returns a linked
228 229 revision.
229 230 ``parentrevsfn`` is a callable that receives a revision number and returns
230 231 an iterable of its parent revision numbers.
231 232 """
232 233 brokenrevs = set()
233 234 strippoint = tiprev + 1
234 235
235 236 heads = {}
236 237 futurelargelinkrevs = set()
237 238 for head in headrevs:
238 239 headlinkrev = linkrevfn(head)
239 240 heads[head] = headlinkrev
240 241 if headlinkrev >= minlinkrev:
241 242 futurelargelinkrevs.add(headlinkrev)
242 243
243 244 # This algorithm involves walking down the rev graph, starting at the
244 245 # heads. Since the revs are topologically sorted according to linkrev,
245 246 # once all head linkrevs are below the minlink, we know there are
246 247 # no more revs that could have a linkrev greater than minlink.
247 248 # So we can stop walking.
248 249 while futurelargelinkrevs:
249 250 strippoint -= 1
250 251 linkrev = heads.pop(strippoint)
251 252
252 253 if linkrev < minlinkrev:
253 254 brokenrevs.add(strippoint)
254 255 else:
255 256 futurelargelinkrevs.remove(linkrev)
256 257
257 258 for p in parentrevsfn(strippoint):
258 259 if p != nullrev:
259 260 plinkrev = linkrevfn(p)
260 261 heads[p] = plinkrev
261 262 if plinkrev >= minlinkrev:
262 263 futurelargelinkrevs.add(plinkrev)
263 264
264 265 return strippoint, brokenrevs
265 266
266 def emitrevisions(store, revs, resultcls, deltaparentfn, candeltafn,
267 rawsizefn, revdifffn, flagsfn, sendfulltext=False,
267 def emitrevisions(store, revs, resultcls, deltaparentfn=None, candeltafn=None,
268 rawsizefn=None, revdifffn=None, flagsfn=None,
269 sendfulltext=False,
268 270 revisiondata=False, assumehaveparentrevisions=False,
269 271 deltaprevious=False):
270 272 """Generic implementation of ifiledata.emitrevisions().
271 273
272 274 Emitting revision data is subtly complex. This function attempts to
273 275 encapsulate all the logic for doing so in a backend-agnostic way.
274 276
275 277 ``store``
276 278 Object conforming to ``ifilestorage`` interface.
277 279
278 280 ``revs``
279 281 List of integer revision numbers whose data to emit.
280 282
281 283 ``resultcls``
282 284 A type implementing the ``irevisiondelta`` interface that will be
283 285 constructed and returned.
284 286
285 ``deltaparentfn``
287 ``deltaparentfn`` (optional)
286 288 Callable receiving a revision number and returning the revision number
287 289 of a revision that the internal delta is stored against. This delta
288 290 will be preferred over computing a new arbitrary delta.
289 291
290 ``candeltafn``
292 If not defined, a delta will always be computed from raw revision
293 data.
294
295 ``candeltafn`` (optional)
291 296 Callable receiving a pair of revision numbers that returns a bool
292 297 indicating whether a delta between them can be produced.
293 298
294 ``rawsizefn``
299 If not defined, it is assumed that any two revisions can delta with
300 each other.
301
302 ``rawsizefn`` (optional)
295 303 Callable receiving a revision number and returning the length of the
296 304 ``store.revision(rev, raw=True)``.
297 305
298 ``revdifffn``
306 If not defined, ``len(store.revision(rev, raw=True))`` will be called.
307
308 ``revdifffn`` (optional)
299 309 Callable receiving a pair of revision numbers that returns a delta
300 310 between them.
301 311
302 ``flagsfn``
312 If not defined, a delta will be computed by invoking mdiff code
313 on ``store.revision()`` results.
314
315 Defining this function allows a precomputed or stored delta to be
316 used without having to compute on.
317
318 ``flagsfn`` (optional)
303 319 Callable receiving a revision number and returns the integer flags
304 value for it.
320 value for it. If not defined, flags value will be 0.
305 321
306 322 ``sendfulltext``
307 323 Whether to send fulltext revisions instead of deltas, if allowed.
308 324
309 325 ``revisiondata``
310 326 ``assumehaveparentrevisions``
311 327 ``deltaprevious``
312 328 See ``ifiledata.emitrevisions()`` interface documentation.
313 329 """
314 330
315 331 fnode = store.node
316 332
317 333 prevrev = None
318 334
319 335 if deltaprevious or assumehaveparentrevisions:
320 336 prevrev = store.parentrevs(revs[0])[0]
321 337
322 338 # Set of revs available to delta against.
323 339 available = set()
324 340
325 341 for rev in revs:
326 342 if rev == nullrev:
327 343 continue
328 344
329 345 node = fnode(rev)
330 deltaparentrev = deltaparentfn(rev)
331 346 p1rev, p2rev = store.parentrevs(rev)
332 347
348 if deltaparentfn:
349 deltaparentrev = deltaparentfn(rev)
350 else:
351 deltaparentrev = nullrev
352
333 353 # Forced delta against previous mode.
334 354 if deltaprevious:
335 355 baserev = prevrev
336 356
337 357 # We're instructed to send fulltext. Honor that.
338 358 elif sendfulltext:
339 359 baserev = nullrev
340 360
341 361 # There is a delta in storage. We try to use that because it
342 362 # amounts to effectively copying data from storage and is
343 363 # therefore the fastest.
344 364 elif deltaparentrev != nullrev:
345 365 # Base revision was already emitted in this group. We can
346 366 # always safely use the delta.
347 367 if deltaparentrev in available:
348 368 baserev = deltaparentrev
349 369
350 370 # Base revision is a parent that hasn't been emitted already.
351 371 # Use it if we can assume the receiver has the parent revision.
352 372 elif (assumehaveparentrevisions
353 373 and deltaparentrev in (p1rev, p2rev)):
354 374 baserev = deltaparentrev
355 375
356 376 # No guarantee the receiver has the delta parent. Send delta
357 377 # against last revision (if possible), which in the common case
358 378 # should be similar enough to this revision that the delta is
359 379 # reasonable.
360 380 elif prevrev is not None:
361 381 baserev = prevrev
362 382 else:
363 383 baserev = nullrev
364 384
365 385 # Storage has a fulltext revision.
366 386
367 387 # Let's use the previous revision, which is as good a guess as any.
368 388 # There is definitely room to improve this logic.
369 389 elif prevrev is not None:
370 390 baserev = prevrev
371 391 else:
372 392 baserev = nullrev
373 393
374 394 # But we can't actually use our chosen delta base for whatever
375 395 # reason. Reset to fulltext.
376 if baserev != nullrev and not candeltafn(baserev, rev):
396 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
377 397 baserev = nullrev
378 398
379 399 revision = None
380 400 delta = None
381 401 baserevisionsize = None
382 402
383 403 if revisiondata:
384 404 if store.iscensored(baserev) or store.iscensored(rev):
385 405 try:
386 406 revision = store.revision(node, raw=True)
387 407 except error.CensoredNodeError as e:
388 408 revision = e.tombstone
389 409
390 410 if baserev != nullrev:
391 baserevisionsize = rawsizefn(baserev)
411 if rawsizefn:
412 baserevisionsize = rawsizefn(baserev)
413 else:
414 baserevisionsize = len(store.revision(baserev,
415 raw=True))
392 416
393 417 elif baserev == nullrev and not deltaprevious:
394 418 revision = store.revision(node, raw=True)
395 419 available.add(rev)
396 420 else:
397 delta = revdifffn(baserev, rev)
421 if revdifffn:
422 delta = revdifffn(baserev, rev)
423 else:
424 delta = mdiff.textdiff(store.revision(baserev, raw=True),
425 store.revision(rev, raw=True))
426
398 427 available.add(rev)
399 428
400 429 yield resultcls(
401 430 node=node,
402 431 p1node=fnode(p1rev),
403 432 p2node=fnode(p2rev),
404 433 basenode=fnode(baserev),
405 flags=flagsfn(rev),
434 flags=flagsfn(rev) if flagsfn else 0,
406 435 baserevisionsize=baserevisionsize,
407 436 revision=revision,
408 437 delta=delta)
409 438
410 439 prevrev = rev
General Comments 0
You need to be logged in to leave comments. Login now