##// END OF EJS Templates
remotefilelog: correctly reject wdir filenodes...
Augie Fackler -
r42264:864f9f63 default
parent child Browse files
Show More
@@ -1,452 +1,459 b''
1 1 # remotefilelog.py - filelog implementation where filelog history is stored
2 2 # remotely
3 3 #
4 4 # Copyright 2013 Facebook, Inc.
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 from mercurial.node import bin, nullid
13 from mercurial.node import (
14 bin,
15 nullid,
16 wdirfilenodeids,
17 wdirid,
18 )
14 19 from mercurial.i18n import _
15 20 from mercurial import (
16 21 ancestor,
17 22 error,
18 23 mdiff,
19 24 revlog,
20 25 )
21 26 from mercurial.utils import storageutil
22 27
23 28 from . import (
24 29 constants,
25 30 fileserverclient,
26 31 shallowutil,
27 32 )
28 33
29 34 class remotefilelognodemap(object):
30 35 def __init__(self, filename, store):
31 36 self._filename = filename
32 37 self._store = store
33 38
34 39 def __contains__(self, node):
35 40 missing = self._store.getmissing([(self._filename, node)])
36 41 return not bool(missing)
37 42
38 43 def __get__(self, node):
39 44 if node not in self:
40 45 raise KeyError(node)
41 46 return node
42 47
43 48 class remotefilelog(object):
44 49
45 50 _generaldelta = True
46 51
47 52 def __init__(self, opener, path, repo):
48 53 self.opener = opener
49 54 self.filename = path
50 55 self.repo = repo
51 56 self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
52 57
53 58 self.version = 1
54 59
55 60 def read(self, node):
56 61 """returns the file contents at this node"""
57 62 t = self.revision(node)
58 63 if not t.startswith('\1\n'):
59 64 return t
60 65 s = t.index('\1\n', 2)
61 66 return t[s + 2:]
62 67
63 68 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
64 69 # hash with the metadata, like in vanilla filelogs
65 70 hashtext = shallowutil.createrevlogtext(text, meta.get('copy'),
66 71 meta.get('copyrev'))
67 72 node = storageutil.hashrevisionsha1(hashtext, p1, p2)
68 73 return self.addrevision(hashtext, transaction, linknode, p1, p2,
69 74 node=node)
70 75
71 76 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
72 77 # text passed to "_createfileblob" does not include filelog metadata
73 78 header = shallowutil.buildfileblobheader(len(text), flags)
74 79 data = "%s\0%s" % (header, text)
75 80
76 81 realp1 = p1
77 82 copyfrom = ""
78 83 if meta and 'copy' in meta:
79 84 copyfrom = meta['copy']
80 85 realp1 = bin(meta['copyrev'])
81 86
82 87 data += "%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
83 88
84 89 visited = set()
85 90
86 91 pancestors = {}
87 92 queue = []
88 93 if realp1 != nullid:
89 94 p1flog = self
90 95 if copyfrom:
91 96 p1flog = remotefilelog(self.opener, copyfrom, self.repo)
92 97
93 98 pancestors.update(p1flog.ancestormap(realp1))
94 99 queue.append(realp1)
95 100 visited.add(realp1)
96 101 if p2 != nullid:
97 102 pancestors.update(self.ancestormap(p2))
98 103 queue.append(p2)
99 104 visited.add(p2)
100 105
101 106 ancestortext = ""
102 107
103 108 # add the ancestors in topological order
104 109 while queue:
105 110 c = queue.pop(0)
106 111 pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
107 112
108 113 pacopyfrom = pacopyfrom or ''
109 114 ancestortext += "%s%s%s%s%s\0" % (
110 115 c, pa1, pa2, ancestorlinknode, pacopyfrom)
111 116
112 117 if pa1 != nullid and pa1 not in visited:
113 118 queue.append(pa1)
114 119 visited.add(pa1)
115 120 if pa2 != nullid and pa2 not in visited:
116 121 queue.append(pa2)
117 122 visited.add(pa2)
118 123
119 124 data += ancestortext
120 125
121 126 return data
122 127
123 128 def addrevision(self, text, transaction, linknode, p1, p2, cachedelta=None,
124 129 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS):
125 130 # text passed to "addrevision" includes hg filelog metadata header
126 131 if node is None:
127 132 node = storageutil.hashrevisionsha1(text, p1, p2)
128 133
129 134 meta, metaoffset = storageutil.parsemeta(text)
130 135 rawtext, validatehash = self._processflags(text, flags, 'write')
131 136 return self.addrawrevision(rawtext, transaction, linknode, p1, p2,
132 137 node, flags, cachedelta,
133 138 _metatuple=(meta, metaoffset))
134 139
135 140 def addrawrevision(self, rawtext, transaction, linknode, p1, p2, node,
136 141 flags, cachedelta=None, _metatuple=None):
137 142 if _metatuple:
138 143 # _metatuple: used by "addrevision" internally by remotefilelog
139 144 # meta was parsed confidently
140 145 meta, metaoffset = _metatuple
141 146 else:
142 147 # not from self.addrevision, but something else (repo._filecommit)
143 148 # calls addrawrevision directly. remotefilelog needs to get and
144 149 # strip filelog metadata.
145 150 # we don't have confidence about whether rawtext contains filelog
146 151 # metadata or not (flag processor could replace it), so we just
147 152 # parse it as best-effort.
148 153 # in LFS (flags != 0)'s case, the best way is to call LFS code to
149 154 # get the meta information, instead of storageutil.parsemeta.
150 155 meta, metaoffset = storageutil.parsemeta(rawtext)
151 156 if flags != 0:
152 157 # when flags != 0, be conservative and do not mangle rawtext, since
153 158 # a read flag processor expects the text not being mangled at all.
154 159 metaoffset = 0
155 160 if metaoffset:
156 161 # remotefilelog fileblob stores copy metadata in its ancestortext,
157 162 # not its main blob. so we need to remove filelog metadata
158 163 # (containing copy information) from text.
159 164 blobtext = rawtext[metaoffset:]
160 165 else:
161 166 blobtext = rawtext
162 167 data = self._createfileblob(blobtext, meta, flags, p1, p2, node,
163 168 linknode)
164 169 self.repo.contentstore.addremotefilelognode(self.filename, node, data)
165 170
166 171 return node
167 172
168 173 def renamed(self, node):
169 174 ancestors = self.repo.metadatastore.getancestors(self.filename, node)
170 175 p1, p2, linknode, copyfrom = ancestors[node]
171 176 if copyfrom:
172 177 return (copyfrom, p1)
173 178
174 179 return False
175 180
176 181 def size(self, node):
177 182 """return the size of a given revision"""
178 183 return len(self.read(node))
179 184
180 185 rawsize = size
181 186
182 187 def cmp(self, node, text):
183 188 """compare text with a given file revision
184 189
185 190 returns True if text is different than what is stored.
186 191 """
187 192
188 193 if node == nullid:
189 194 return True
190 195
191 196 nodetext = self.read(node)
192 197 return nodetext != text
193 198
194 199 def __nonzero__(self):
195 200 return True
196 201
197 202 __bool__ = __nonzero__
198 203
199 204 def __len__(self):
200 205 if self.filename == '.hgtags':
201 206 # The length of .hgtags is used to fast path tag checking.
202 207 # remotefilelog doesn't support .hgtags since the entire .hgtags
203 208 # history is needed. Use the excludepattern setting to make
204 209 # .hgtags a normal filelog.
205 210 return 0
206 211
207 212 raise RuntimeError("len not supported")
208 213
209 214 def empty(self):
210 215 return False
211 216
212 217 def flags(self, node):
213 218 if isinstance(node, int):
214 219 raise error.ProgrammingError(
215 220 'remotefilelog does not accept integer rev for flags')
216 221 store = self.repo.contentstore
217 222 return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
218 223
219 224 def parents(self, node):
220 225 if node == nullid:
221 226 return nullid, nullid
222 227
223 228 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
224 229 p1, p2, linknode, copyfrom = ancestormap[node]
225 230 if copyfrom:
226 231 p1 = nullid
227 232
228 233 return p1, p2
229 234
230 235 def parentrevs(self, rev):
231 236 # TODO(augie): this is a node and should be a rev, but for now
232 237 # nothing in core seems to actually break.
233 238 return self.parents(rev)
234 239
235 240 def linknode(self, node):
236 241 ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
237 242 p1, p2, linknode, copyfrom = ancestormap[node]
238 243 return linknode
239 244
240 245 def linkrev(self, node):
241 246 return self.repo.unfiltered().changelog.rev(self.linknode(node))
242 247
243 248 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
244 249 assumehaveparentrevisions=False, deltaprevious=False,
245 250 deltamode=None):
246 251 # we don't use any of these parameters here
247 252 del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
248 253 del deltamode
249 254 prevnode = None
250 255 for node in nodes:
251 256 p1, p2 = self.parents(node)
252 257 if prevnode is None:
253 258 basenode = prevnode = p1
254 259 if basenode == node:
255 260 basenode = nullid
256 261 if basenode != nullid:
257 262 revision = None
258 263 delta = self.revdiff(basenode, node)
259 264 else:
260 265 revision = self.revision(node, raw=True)
261 266 delta = None
262 267 yield revlog.revlogrevisiondelta(
263 268 node=node,
264 269 p1node=p1,
265 270 p2node=p2,
266 271 linknode=self.linknode(node),
267 272 basenode=basenode,
268 273 flags=self.flags(node),
269 274 baserevisionsize=None,
270 275 revision=revision,
271 276 delta=delta,
272 277 )
273 278
274 279 def revdiff(self, node1, node2):
275 280 return mdiff.textdiff(self.revision(node1, raw=True),
276 281 self.revision(node2, raw=True))
277 282
278 283 def lookup(self, node):
279 284 if len(node) == 40:
280 285 node = bin(node)
281 286 if len(node) != 20:
282 287 raise error.LookupError(node, self.filename,
283 288 _('invalid lookup input'))
284 289
285 290 return node
286 291
287 292 def rev(self, node):
288 293 # This is a hack to make TortoiseHG work.
289 294 return node
290 295
291 296 def node(self, rev):
292 297 # This is a hack.
293 298 if isinstance(rev, int):
294 299 raise error.ProgrammingError(
295 300 'remotefilelog does not convert integer rev to node')
296 301 return rev
297 302
298 303 def revision(self, node, raw=False):
299 304 """returns the revlog contents at this node.
300 305 this includes the meta data traditionally included in file revlogs.
301 306 this is generally only used for bundling and communicating with vanilla
302 307 hg clients.
303 308 """
304 309 if node == nullid:
305 310 return ""
306 311 if len(node) != 20:
307 312 raise error.LookupError(node, self.filename,
308 313 _('invalid revision input'))
314 if node == wdirid or node in wdirfilenodeids:
315 raise error.WdirUnsupported
309 316
310 317 store = self.repo.contentstore
311 318 rawtext = store.get(self.filename, node)
312 319 if raw:
313 320 return rawtext
314 321 flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
315 322 if flags == 0:
316 323 return rawtext
317 324 text, verifyhash = self._processflags(rawtext, flags, 'read')
318 325 return text
319 326
320 327 def _processflags(self, text, flags, operation, raw=False):
321 328 # mostly copied from hg/mercurial/revlog.py
322 329 validatehash = True
323 330 orderedflags = revlog.REVIDX_FLAGS_ORDER
324 331 if operation == 'write':
325 332 orderedflags = reversed(orderedflags)
326 333 for flag in orderedflags:
327 334 if flag & flags:
328 335 vhash = True
329 336 if flag not in revlog._flagprocessors:
330 337 message = _("missing processor for flag '%#x'") % (flag)
331 338 raise revlog.RevlogError(message)
332 339 readfunc, writefunc, rawfunc = revlog._flagprocessors[flag]
333 340 if raw:
334 341 vhash = rawfunc(self, text)
335 342 elif operation == 'read':
336 343 text, vhash = readfunc(self, text)
337 344 elif operation == 'write':
338 345 text, vhash = writefunc(self, text)
339 346 validatehash = validatehash and vhash
340 347 return text, validatehash
341 348
342 349 def _read(self, id):
343 350 """reads the raw file blob from disk, cache, or server"""
344 351 fileservice = self.repo.fileservice
345 352 localcache = fileservice.localcache
346 353 cachekey = fileserverclient.getcachekey(self.repo.name, self.filename,
347 354 id)
348 355 try:
349 356 return localcache.read(cachekey)
350 357 except KeyError:
351 358 pass
352 359
353 360 localkey = fileserverclient.getlocalkey(self.filename, id)
354 361 localpath = os.path.join(self.localpath, localkey)
355 362 try:
356 363 return shallowutil.readfile(localpath)
357 364 except IOError:
358 365 pass
359 366
360 367 fileservice.prefetch([(self.filename, id)])
361 368 try:
362 369 return localcache.read(cachekey)
363 370 except KeyError:
364 371 pass
365 372
366 373 raise error.LookupError(id, self.filename, _('no node'))
367 374
368 375 def ancestormap(self, node):
369 376 return self.repo.metadatastore.getancestors(self.filename, node)
370 377
371 378 def ancestor(self, a, b):
372 379 if a == nullid or b == nullid:
373 380 return nullid
374 381
375 382 revmap, parentfunc = self._buildrevgraph(a, b)
376 383 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
377 384
378 385 ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
379 386 if ancs:
380 387 # choose a consistent winner when there's a tie
381 388 return min(map(nodemap.__getitem__, ancs))
382 389 return nullid
383 390
384 391 def commonancestorsheads(self, a, b):
385 392 """calculate all the heads of the common ancestors of nodes a and b"""
386 393
387 394 if a == nullid or b == nullid:
388 395 return nullid
389 396
390 397 revmap, parentfunc = self._buildrevgraph(a, b)
391 398 nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
392 399
393 400 ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
394 401 return map(nodemap.__getitem__, ancs)
395 402
396 403 def _buildrevgraph(self, a, b):
397 404 """Builds a numeric revision graph for the given two nodes.
398 405 Returns a node->rev map and a rev->[revs] parent function.
399 406 """
400 407 amap = self.ancestormap(a)
401 408 bmap = self.ancestormap(b)
402 409
403 410 # Union the two maps
404 411 parentsmap = collections.defaultdict(list)
405 412 allparents = set()
406 413 for mapping in (amap, bmap):
407 414 for node, pdata in mapping.iteritems():
408 415 parents = parentsmap[node]
409 416 p1, p2, linknode, copyfrom = pdata
410 417 # Don't follow renames (copyfrom).
411 418 # remotefilectx.ancestor does that.
412 419 if p1 != nullid and not copyfrom:
413 420 parents.append(p1)
414 421 allparents.add(p1)
415 422 if p2 != nullid:
416 423 parents.append(p2)
417 424 allparents.add(p2)
418 425
419 426 # Breadth first traversal to build linkrev graph
420 427 parentrevs = collections.defaultdict(list)
421 428 revmap = {}
422 429 queue = collections.deque(((None, n) for n in parentsmap
423 430 if n not in allparents))
424 431 while queue:
425 432 prevrev, current = queue.pop()
426 433 if current in revmap:
427 434 if prevrev:
428 435 parentrevs[prevrev].append(revmap[current])
429 436 continue
430 437
431 438 # Assign linkrevs in reverse order, so start at
432 439 # len(parentsmap) and work backwards.
433 440 currentrev = len(parentsmap) - len(revmap) - 1
434 441 revmap[current] = currentrev
435 442
436 443 if prevrev:
437 444 parentrevs[prevrev].append(currentrev)
438 445
439 446 for parent in parentsmap.get(current):
440 447 queue.appendleft((currentrev, parent))
441 448
442 449 return revmap, parentrevs.__getitem__
443 450
444 451 def strip(self, minlink, transaction):
445 452 pass
446 453
447 454 # misc unused things
448 455 def files(self):
449 456 return []
450 457
451 458 def checksize(self):
452 459 return 0, 0
@@ -1,45 +1,40 b''
1 1 #require no-windows
2 2
3 3 $ . "$TESTDIR/remotefilelog-library.sh"
4 4
5 5 $ hg init master
6 6 $ cd master
7 7 $ cat >> .hg/hgrc <<EOF
8 8 > [remotefilelog]
9 9 > server=True
10 10 > EOF
11 11 $ echo x > x
12 12 $ hg commit -qAm x
13 13 $ echo y >> x
14 14 $ hg commit -qAm y
15 15 $ echo z >> x
16 16 $ hg commit -qAm z
17 17 $ echo a > a
18 18 $ hg commit -qAm a
19 19
20 20 $ cd ..
21 21
22 22 $ hgcloneshallow ssh://user@dummy/master shallow -q
23 23 2 files fetched over 1 fetches - (2 misses, 0.00% hit ratio) over *s (glob)
24 24 $ cd shallow
25 25
26 26 Test blame
27 27
28 28 $ hg blame x
29 29 0: x
30 30 1: y
31 31 2: z
32 32 2 files fetched over 1 fetches - (2 misses, 0.00% hit ratio) over *s (glob)
33 33
34 34 Test grepping the working directory.
35 35
36 36 $ hg grep --all-files x
37 37 x:x
38 BROKEN: modifications in the wdir tries to fetch from the server.
39 38 $ echo foo >> x
40 39 $ hg grep --all-files x
41 remote: abort: working directory revision cannot be specified
42 1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob)
43 abort: error downloading file contents:
44 'connection closed early'
45 [255]
40 x:x
General Comments 0
You need to be logged in to leave comments. Login now