##// END OF EJS Templates
rawdata: implement the method for `remotefilelog` too...
marmoute -
r42951:7492899c default
parent child Browse files
Show More
@@ -1,459 +1,462 b''
1 1 # remotefilelog.py - filelog implementation where filelog history is stored
2 2 # remotely
3 3 #
4 4 # Copyright 2013 Facebook, Inc.
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from mercurial.node import (
14 14 bin,
15 15 nullid,
16 16 wdirfilenodeids,
17 17 wdirid,
18 18 )
19 19 from mercurial.i18n import _
20 20 from mercurial import (
21 21 ancestor,
22 22 error,
23 23 mdiff,
24 24 revlog,
25 25 )
26 26 from mercurial.utils import storageutil
27 27
28 28 from . import (
29 29 constants,
30 30 fileserverclient,
31 31 shallowutil,
32 32 )
33 33
34 34 class remotefilelognodemap(object):
35 35 def __init__(self, filename, store):
36 36 self._filename = filename
37 37 self._store = store
38 38
39 39 def __contains__(self, node):
40 40 missing = self._store.getmissing([(self._filename, node)])
41 41 return not bool(missing)
42 42
43 43 def __get__(self, node):
44 44 if node not in self:
45 45 raise KeyError(node)
46 46 return node
47 47
48 48 class remotefilelog(object):
49 49
50 50 _generaldelta = True
51 51
52 52 def __init__(self, opener, path, repo):
53 53 self.opener = opener
54 54 self.filename = path
55 55 self.repo = repo
56 56 self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
57 57
58 58 self.version = 1
59 59
60 60 def read(self, node):
61 61 """returns the file contents at this node"""
62 62 t = self.revision(node)
63 63 if not t.startswith('\1\n'):
64 64 return t
65 65 s = t.index('\1\n', 2)
66 66 return t[s + 2:]
67 67
68 68 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
69 69 # hash with the metadata, like in vanilla filelogs
70 70 hashtext = shallowutil.createrevlogtext(text, meta.get('copy'),
71 71 meta.get('copyrev'))
72 72 node = storageutil.hashrevisionsha1(hashtext, p1, p2)
73 73 return self.addrevision(hashtext, transaction, linknode, p1, p2,
74 74 node=node)
75 75
76 76 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
77 77 # text passed to "_createfileblob" does not include filelog metadata
78 78 header = shallowutil.buildfileblobheader(len(text), flags)
79 79 data = "%s\0%s" % (header, text)
80 80
81 81 realp1 = p1
82 82 copyfrom = ""
83 83 if meta and 'copy' in meta:
84 84 copyfrom = meta['copy']
85 85 realp1 = bin(meta['copyrev'])
86 86
87 87 data += "%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
88 88
89 89 visited = set()
90 90
91 91 pancestors = {}
92 92 queue = []
93 93 if realp1 != nullid:
94 94 p1flog = self
95 95 if copyfrom:
96 96 p1flog = remotefilelog(self.opener, copyfrom, self.repo)
97 97
98 98 pancestors.update(p1flog.ancestormap(realp1))
99 99 queue.append(realp1)
100 100 visited.add(realp1)
101 101 if p2 != nullid:
102 102 pancestors.update(self.ancestormap(p2))
103 103 queue.append(p2)
104 104 visited.add(p2)
105 105
106 106 ancestortext = ""
107 107
108 108 # add the ancestors in topological order
109 109 while queue:
110 110 c = queue.pop(0)
111 111 pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
112 112
113 113 pacopyfrom = pacopyfrom or ''
114 114 ancestortext += "%s%s%s%s%s\0" % (
115 115 c, pa1, pa2, ancestorlinknode, pacopyfrom)
116 116
117 117 if pa1 != nullid and pa1 not in visited:
118 118 queue.append(pa1)
119 119 visited.add(pa1)
120 120 if pa2 != nullid and pa2 not in visited:
121 121 queue.append(pa2)
122 122 visited.add(pa2)
123 123
124 124 data += ancestortext
125 125
126 126 return data
127 127
128 128 def addrevision(self, text, transaction, linknode, p1, p2, cachedelta=None,
129 129 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS):
130 130 # text passed to "addrevision" includes hg filelog metadata header
131 131 if node is None:
132 132 node = storageutil.hashrevisionsha1(text, p1, p2)
133 133
134 134 meta, metaoffset = storageutil.parsemeta(text)
135 135 rawtext, validatehash = self._processflags(text, flags, 'write')
136 136 return self.addrawrevision(rawtext, transaction, linknode, p1, p2,
137 137 node, flags, cachedelta,
138 138 _metatuple=(meta, metaoffset))
139 139
140 140 def addrawrevision(self, rawtext, transaction, linknode, p1, p2, node,
141 141 flags, cachedelta=None, _metatuple=None):
142 142 if _metatuple:
143 143 # _metatuple: used by "addrevision" internally by remotefilelog
144 144 # meta was parsed confidently
145 145 meta, metaoffset = _metatuple
146 146 else:
147 147 # not from self.addrevision, but something else (repo._filecommit)
148 148 # calls addrawrevision directly. remotefilelog needs to get and
149 149 # strip filelog metadata.
150 150 # we don't have confidence about whether rawtext contains filelog
151 151 # metadata or not (flag processor could replace it), so we just
152 152 # parse it as best-effort.
153 153 # in LFS (flags != 0)'s case, the best way is to call LFS code to
154 154 # get the meta information, instead of storageutil.parsemeta.
155 155 meta, metaoffset = storageutil.parsemeta(rawtext)
156 156 if flags != 0:
157 157 # when flags != 0, be conservative and do not mangle rawtext, since
158 158 # a read flag processor expects the text not being mangled at all.
159 159 metaoffset = 0
160 160 if metaoffset:
161 161 # remotefilelog fileblob stores copy metadata in its ancestortext,
162 162 # not its main blob. so we need to remove filelog metadata
163 163 # (containing copy information) from text.
164 164 blobtext = rawtext[metaoffset:]
165 165 else:
166 166 blobtext = rawtext
167 167 data = self._createfileblob(blobtext, meta, flags, p1, p2, node,
168 168 linknode)
169 169 self.repo.contentstore.addremotefilelognode(self.filename, node, data)
170 170
171 171 return node
172 172
173 173 def renamed(self, node):
174 174 ancestors = self.repo.metadatastore.getancestors(self.filename, node)
175 175 p1, p2, linknode, copyfrom = ancestors[node]
176 176 if copyfrom:
177 177 return (copyfrom, p1)
178 178
179 179 return False
180 180
181 181 def size(self, node):
182 182 """return the size of a given revision"""
183 183 return len(self.read(node))
184 184
185 185 rawsize = size
186 186
187 187 def cmp(self, node, text):
188 188 """compare text with a given file revision
189 189
190 190 returns True if text is different than what is stored.
191 191 """
192 192
193 193 if node == nullid:
194 194 return True
195 195
196 196 nodetext = self.read(node)
197 197 return nodetext != text
198 198
199 199 def __nonzero__(self):
200 200 return True
201 201
202 202 __bool__ = __nonzero__
203 203
204 204 def __len__(self):
205 205 if self.filename == '.hgtags':
206 206 # The length of .hgtags is used to fast path tag checking.
207 207 # remotefilelog doesn't support .hgtags since the entire .hgtags
208 208 # history is needed. Use the excludepattern setting to make
209 209 # .hgtags a normal filelog.
210 210 return 0
211 211
212 212 raise RuntimeError("len not supported")
213 213
214 214 def empty(self):
215 215 return False
216 216
217 217 def flags(self, node):
218 218 if isinstance(node, int):
219 219 raise error.ProgrammingError(
220 220 'remotefilelog does not accept integer rev for flags')
221 221 store = self.repo.contentstore
222 222 return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
223 223
224 224 def parents(self, node):
225 225 if node == nullid:
226 226 return nullid, nullid
227 227
228 228 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
229 229 p1, p2, linknode, copyfrom = ancestormap[node]
230 230 if copyfrom:
231 231 p1 = nullid
232 232
233 233 return p1, p2
234 234
235 235 def parentrevs(self, rev):
236 236 # TODO(augie): this is a node and should be a rev, but for now
237 237 # nothing in core seems to actually break.
238 238 return self.parents(rev)
239 239
240 240 def linknode(self, node):
241 241 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
242 242 p1, p2, linknode, copyfrom = ancestormap[node]
243 243 return linknode
244 244
245 245 def linkrev(self, node):
246 246 return self.repo.unfiltered().changelog.rev(self.linknode(node))
247 247
248 248 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
249 249 assumehaveparentrevisions=False, deltaprevious=False,
250 250 deltamode=None):
251 251 # we don't use any of these parameters here
252 252 del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
253 253 del deltamode
254 254 prevnode = None
255 255 for node in nodes:
256 256 p1, p2 = self.parents(node)
257 257 if prevnode is None:
258 258 basenode = prevnode = p1
259 259 if basenode == node:
260 260 basenode = nullid
261 261 if basenode != nullid:
262 262 revision = None
263 263 delta = self.revdiff(basenode, node)
264 264 else:
265 265 revision = self.revision(node, raw=True)
266 266 delta = None
267 267 yield revlog.revlogrevisiondelta(
268 268 node=node,
269 269 p1node=p1,
270 270 p2node=p2,
271 271 linknode=self.linknode(node),
272 272 basenode=basenode,
273 273 flags=self.flags(node),
274 274 baserevisionsize=None,
275 275 revision=revision,
276 276 delta=delta,
277 277 )
278 278
279 279 def revdiff(self, node1, node2):
280 280 return mdiff.textdiff(self.revision(node1, raw=True),
281 281 self.revision(node2, raw=True))
282 282
283 283 def lookup(self, node):
284 284 if len(node) == 40:
285 285 node = bin(node)
286 286 if len(node) != 20:
287 287 raise error.LookupError(node, self.filename,
288 288 _('invalid lookup input'))
289 289
290 290 return node
291 291
292 292 def rev(self, node):
293 293 # This is a hack to make TortoiseHG work.
294 294 return node
295 295
296 296 def node(self, rev):
297 297 # This is a hack.
298 298 if isinstance(rev, int):
299 299 raise error.ProgrammingError(
300 300 'remotefilelog does not convert integer rev to node')
301 301 return rev
302 302
303 303 def revision(self, node, raw=False):
304 304 """returns the revlog contents at this node.
305 305 this includes the meta data traditionally included in file revlogs.
306 306 this is generally only used for bundling and communicating with vanilla
307 307 hg clients.
308 308 """
309 309 if node == nullid:
310 310 return ""
311 311 if len(node) != 20:
312 312 raise error.LookupError(node, self.filename,
313 313 _('invalid revision input'))
314 314 if node == wdirid or node in wdirfilenodeids:
315 315 raise error.WdirUnsupported
316 316
317 317 store = self.repo.contentstore
318 318 rawtext = store.get(self.filename, node)
319 319 if raw:
320 320 return rawtext
321 321 flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
322 322 if flags == 0:
323 323 return rawtext
324 324 text, verifyhash = self._processflags(rawtext, flags, 'read')
325 325 return text
326 326
327 def rawdata(self, node):
328 return self.revision(node, raw=False)
329
327 330 def _processflags(self, text, flags, operation, raw=False):
328 331 # mostly copied from hg/mercurial/revlog.py
329 332 validatehash = True
330 333 orderedflags = revlog.REVIDX_FLAGS_ORDER
331 334 if operation == 'write':
332 335 orderedflags = reversed(orderedflags)
333 336 for flag in orderedflags:
334 337 if flag & flags:
335 338 vhash = True
336 339 if flag not in revlog._flagprocessors:
337 340 message = _("missing processor for flag '%#x'") % (flag)
338 341 raise revlog.RevlogError(message)
339 342 readfunc, writefunc, rawfunc = revlog._flagprocessors[flag]
340 343 if raw:
341 344 vhash = rawfunc(self, text)
342 345 elif operation == 'read':
343 346 text, vhash = readfunc(self, text)
344 347 elif operation == 'write':
345 348 text, vhash = writefunc(self, text)
346 349 validatehash = validatehash and vhash
347 350 return text, validatehash
348 351
349 352 def _read(self, id):
350 353 """reads the raw file blob from disk, cache, or server"""
351 354 fileservice = self.repo.fileservice
352 355 localcache = fileservice.localcache
353 356 cachekey = fileserverclient.getcachekey(self.repo.name, self.filename,
354 357 id)
355 358 try:
356 359 return localcache.read(cachekey)
357 360 except KeyError:
358 361 pass
359 362
360 363 localkey = fileserverclient.getlocalkey(self.filename, id)
361 364 localpath = os.path.join(self.localpath, localkey)
362 365 try:
363 366 return shallowutil.readfile(localpath)
364 367 except IOError:
365 368 pass
366 369
367 370 fileservice.prefetch([(self.filename, id)])
368 371 try:
369 372 return localcache.read(cachekey)
370 373 except KeyError:
371 374 pass
372 375
373 376 raise error.LookupError(id, self.filename, _('no node'))
374 377
375 378 def ancestormap(self, node):
376 379 return self.repo.metadatastore.getancestors(self.filename, node)
377 380
378 381 def ancestor(self, a, b):
379 382 if a == nullid or b == nullid:
380 383 return nullid
381 384
382 385 revmap, parentfunc = self._buildrevgraph(a, b)
383 386 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
384 387
385 388 ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
386 389 if ancs:
387 390 # choose a consistent winner when there's a tie
388 391 return min(map(nodemap.__getitem__, ancs))
389 392 return nullid
390 393
391 394 def commonancestorsheads(self, a, b):
392 395 """calculate all the heads of the common ancestors of nodes a and b"""
393 396
394 397 if a == nullid or b == nullid:
395 398 return nullid
396 399
397 400 revmap, parentfunc = self._buildrevgraph(a, b)
398 401 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
399 402
400 403 ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
401 404 return map(nodemap.__getitem__, ancs)
402 405
403 406 def _buildrevgraph(self, a, b):
404 407 """Builds a numeric revision graph for the given two nodes.
405 408 Returns a node->rev map and a rev->[revs] parent function.
406 409 """
407 410 amap = self.ancestormap(a)
408 411 bmap = self.ancestormap(b)
409 412
410 413 # Union the two maps
411 414 parentsmap = collections.defaultdict(list)
412 415 allparents = set()
413 416 for mapping in (amap, bmap):
414 417 for node, pdata in mapping.iteritems():
415 418 parents = parentsmap[node]
416 419 p1, p2, linknode, copyfrom = pdata
417 420 # Don't follow renames (copyfrom).
418 421 # remotefilectx.ancestor does that.
419 422 if p1 != nullid and not copyfrom:
420 423 parents.append(p1)
421 424 allparents.add(p1)
422 425 if p2 != nullid:
423 426 parents.append(p2)
424 427 allparents.add(p2)
425 428
426 429 # Breadth first traversal to build linkrev graph
427 430 parentrevs = collections.defaultdict(list)
428 431 revmap = {}
429 432 queue = collections.deque(((None, n) for n in parentsmap
430 433 if n not in allparents))
431 434 while queue:
432 435 prevrev, current = queue.pop()
433 436 if current in revmap:
434 437 if prevrev:
435 438 parentrevs[prevrev].append(revmap[current])
436 439 continue
437 440
438 441 # Assign linkrevs in reverse order, so start at
439 442 # len(parentsmap) and work backwards.
440 443 currentrev = len(parentsmap) - len(revmap) - 1
441 444 revmap[current] = currentrev
442 445
443 446 if prevrev:
444 447 parentrevs[prevrev].append(currentrev)
445 448
446 449 for parent in parentsmap.get(current):
447 450 queue.appendleft((currentrev, parent))
448 451
449 452 return revmap, parentrevs.__getitem__
450 453
451 454 def strip(self, minlink, transaction):
452 455 pass
453 456
454 457 # misc unused things
455 458 def files(self):
456 459 return []
457 460
458 461 def checksize(self):
459 462 return 0, 0
General Comments 0
You need to be logged in to leave comments. Login now