##// END OF EJS Templates
remotefilelog: byteify the message for a few StorageErrors...
Matt Harbison -
r50754:8b369bcb default
parent child Browse files
Show More
@@ -1,540 +1,540 b''
1 1 # shallowutil.py -- remotefilelog utilities
2 2 #
3 3 # Copyright 2014 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import os
10 10 import stat
11 11 import struct
12 12 import tempfile
13 13
14 14 from mercurial.i18n import _
15 15 from mercurial.pycompat import open
16 16 from mercurial.node import hex
17 17 from mercurial import (
18 18 error,
19 19 pycompat,
20 20 revlog,
21 21 util,
22 22 )
23 23 from mercurial.utils import (
24 24 hashutil,
25 25 storageutil,
26 26 stringutil,
27 27 )
28 28 from . import constants
29 29
30 30 if not pycompat.iswindows:
31 31 import grp
32 32
33 33
34 34 def isenabled(repo):
35 35 """returns whether the repository is remotefilelog enabled or not"""
36 36 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
37 37
38 38
39 39 def getcachekey(reponame, file, id):
40 40 pathhash = hex(hashutil.sha1(file).digest())
41 41 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
42 42
43 43
44 44 def getlocalkey(file, id):
45 45 pathhash = hex(hashutil.sha1(file).digest())
46 46 return os.path.join(pathhash, id)
47 47
48 48
49 49 def getcachepath(ui, allowempty=False):
50 50 cachepath = ui.config(b"remotefilelog", b"cachepath")
51 51 if not cachepath:
52 52 if allowempty:
53 53 return None
54 54 else:
55 55 raise error.Abort(
56 56 _(b"could not find config option remotefilelog.cachepath")
57 57 )
58 58 return util.expandpath(cachepath)
59 59
60 60
61 61 def getcachepackpath(repo, category):
62 62 cachepath = getcachepath(repo.ui)
63 63 if category != constants.FILEPACK_CATEGORY:
64 64 return os.path.join(cachepath, repo.name, b'packs', category)
65 65 else:
66 66 return os.path.join(cachepath, repo.name, b'packs')
67 67
68 68
69 69 def getlocalpackpath(base, category):
70 70 return os.path.join(base, b'packs', category)
71 71
72 72
73 73 def createrevlogtext(text, copyfrom=None, copyrev=None):
74 74 """returns a string that matches the revlog contents in a
75 75 traditional revlog
76 76 """
77 77 meta = {}
78 78 if copyfrom or text.startswith(b'\1\n'):
79 79 if copyfrom:
80 80 meta[b'copy'] = copyfrom
81 81 meta[b'copyrev'] = copyrev
82 82 text = storageutil.packmeta(meta, text)
83 83
84 84 return text
85 85
86 86
87 87 def parsemeta(text):
88 88 """parse mercurial filelog metadata"""
89 89 meta, size = storageutil.parsemeta(text)
90 90 if text.startswith(b'\1\n'):
91 91 s = text.index(b'\1\n', 2)
92 92 text = text[s + 2 :]
93 93 return meta or {}, text
94 94
95 95
96 96 def sumdicts(*dicts):
97 97 """Adds all the values of *dicts together into one dictionary. This assumes
98 98 the values in *dicts are all summable.
99 99
100 100 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
101 101 """
102 102 result = collections.defaultdict(lambda: 0)
103 103 for dict in dicts:
104 104 for k, v in dict.items():
105 105 result[k] += v
106 106 return result
107 107
108 108
109 109 def prefixkeys(dict, prefix):
110 110 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
111 111 result = {}
112 112 for k, v in dict.items():
113 113 result[prefix + k] = v
114 114 return result
115 115
116 116
117 117 def reportpackmetrics(ui, prefix, *stores):
118 118 dicts = [s.getmetrics() for s in stores]
119 119 dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
120 120 ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
121 121
122 122
123 123 def _parsepackmeta(metabuf):
124 124 """parse datapack meta, bytes (<metadata-list>) -> dict
125 125
126 126 The dict contains raw content - both keys and values are strings.
127 127 Upper-level business may want to convert some of them to other types like
128 128 integers, on their own.
129 129
130 130 raise ValueError if the data is corrupted
131 131 """
132 132 metadict = {}
133 133 offset = 0
134 134 buflen = len(metabuf)
135 135 while buflen - offset >= 3:
136 136 key = metabuf[offset : offset + 1]
137 137 offset += 1
138 138 metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
139 139 offset += 2
140 140 if offset + metalen > buflen:
141 141 raise ValueError(b'corrupted metadata: incomplete buffer')
142 142 value = metabuf[offset : offset + metalen]
143 143 metadict[key] = value
144 144 offset += metalen
145 145 if offset != buflen:
146 146 raise ValueError(b'corrupted metadata: redundant data')
147 147 return metadict
148 148
149 149
150 150 def _buildpackmeta(metadict):
151 151 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
152 152
153 153 The dict contains raw content - both keys and values are strings.
154 154 Upper-level business may want to serialize some of other types (like
155 155 integers) to strings before calling this function.
156 156
157 157 raise ProgrammingError when metadata key is illegal, or ValueError if
158 158 length limit is exceeded
159 159 """
160 160 metabuf = b''
161 161 for k, v in sorted((metadict or {}).items()):
162 162 if len(k) != 1:
163 163 raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
164 164 if len(v) > 0xFFFE:
165 165 raise ValueError(
166 166 b'metadata value is too long: 0x%x > 0xfffe' % len(v)
167 167 )
168 168 metabuf += k
169 169 metabuf += struct.pack(b'!H', len(v))
170 170 metabuf += v
171 171 # len(metabuf) is guaranteed representable in 4 bytes, because there are
172 172 # only 256 keys, and for each value, len(value) <= 0xfffe.
173 173 return metabuf
174 174
175 175
176 176 _metaitemtypes = {
177 177 constants.METAKEYFLAG: (int, int),
178 178 constants.METAKEYSIZE: (int, int),
179 179 }
180 180
181 181
182 182 def buildpackmeta(metadict):
183 183 """like _buildpackmeta, but typechecks metadict and normalize it.
184 184
185 185 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
186 186 and METAKEYFLAG will be dropped if its value is 0.
187 187 """
188 188 newmeta = {}
189 189 for k, v in (metadict or {}).items():
190 190 expectedtype = _metaitemtypes.get(k, (bytes,))
191 191 if not isinstance(v, expectedtype):
192 192 raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
193 193 # normalize int to binary buffer
194 194 if int in expectedtype:
195 195 # optimization: remove flag if it's 0 to save space
196 196 if k == constants.METAKEYFLAG and v == 0:
197 197 continue
198 198 v = int2bin(v)
199 199 newmeta[k] = v
200 200 return _buildpackmeta(newmeta)
201 201
202 202
203 203 def parsepackmeta(metabuf):
204 204 """like _parsepackmeta, but convert fields to desired types automatically.
205 205
206 206 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
207 207 integers.
208 208 """
209 209 metadict = _parsepackmeta(metabuf)
210 210 for k, v in metadict.items():
211 211 if k in _metaitemtypes and int in _metaitemtypes[k]:
212 212 metadict[k] = bin2int(v)
213 213 return metadict
214 214
215 215
216 216 def int2bin(n):
217 217 """convert a non-negative integer to raw binary buffer"""
218 218 buf = bytearray()
219 219 while n > 0:
220 220 buf.insert(0, n & 0xFF)
221 221 n >>= 8
222 222 return bytes(buf)
223 223
224 224
225 225 def bin2int(buf):
226 226 """the reverse of int2bin, convert a binary buffer to an integer"""
227 227 x = 0
228 228 for b in bytearray(buf):
229 229 x <<= 8
230 230 x |= b
231 231 return x
232 232
233 233
234 234 class BadRemotefilelogHeader(error.StorageError):
235 235 """Exception raised when parsing a remotefilelog blob header fails."""
236 236
237 237
238 238 def parsesizeflags(raw):
239 239 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
240 240
241 241 see remotefilelogserver.createfileblob for the format.
242 242 raise RuntimeError if the content is illformed.
243 243 """
244 244 flags = revlog.REVIDX_DEFAULT_FLAGS
245 245 size = None
246 246 try:
247 247 index = raw.index(b'\0')
248 248 except ValueError:
249 249 raise BadRemotefilelogHeader(
250 "unexpected remotefilelog header: illegal format"
250 b"unexpected remotefilelog header: illegal format"
251 251 )
252 252 header = raw[:index]
253 253 if header.startswith(b'v'):
254 254 # v1 and above, header starts with 'v'
255 255 if header.startswith(b'v1\n'):
256 256 for s in header.split(b'\n'):
257 257 if s.startswith(constants.METAKEYSIZE):
258 258 size = int(s[len(constants.METAKEYSIZE) :])
259 259 elif s.startswith(constants.METAKEYFLAG):
260 260 flags = int(s[len(constants.METAKEYFLAG) :])
261 261 else:
262 262 raise BadRemotefilelogHeader(
263 263 b'unsupported remotefilelog header: %s' % header
264 264 )
265 265 else:
266 266 # v0, str(int(size)) is the header
267 267 size = int(header)
268 268 if size is None:
269 269 raise BadRemotefilelogHeader(
270 "unexpected remotefilelog header: no size found"
270 b"unexpected remotefilelog header: no size found"
271 271 )
272 272 return index + 1, size, flags
273 273
274 274
275 275 def buildfileblobheader(size, flags, version=None):
276 276 """return the header of a remotefilelog blob.
277 277
278 278 see remotefilelogserver.createfileblob for the format.
279 279 approximately the reverse of parsesizeflags.
280 280
281 281 version could be 0 or 1, or None (auto decide).
282 282 """
283 283 # choose v0 if flags is empty, otherwise v1
284 284 if version is None:
285 285 version = int(bool(flags))
286 286 if version == 1:
287 287 header = b'v1\n%s%d\n%s%d' % (
288 288 constants.METAKEYSIZE,
289 289 size,
290 290 constants.METAKEYFLAG,
291 291 flags,
292 292 )
293 293 elif version == 0:
294 294 if flags:
295 295 raise error.ProgrammingError(b'fileblob v0 does not support flag')
296 296 header = b'%d' % size
297 297 else:
298 298 raise error.ProgrammingError(b'unknown fileblob version %d' % version)
299 299 return header
300 300
301 301
302 302 def ancestormap(raw):
303 303 offset, size, flags = parsesizeflags(raw)
304 304 start = offset + size
305 305
306 306 mapping = {}
307 307 while start < len(raw):
308 308 divider = raw.index(b'\0', start + 80)
309 309
310 310 currentnode = raw[start : (start + 20)]
311 311 p1 = raw[(start + 20) : (start + 40)]
312 312 p2 = raw[(start + 40) : (start + 60)]
313 313 linknode = raw[(start + 60) : (start + 80)]
314 314 copyfrom = raw[(start + 80) : divider]
315 315
316 316 mapping[currentnode] = (p1, p2, linknode, copyfrom)
317 317 start = divider + 1
318 318
319 319 return mapping
320 320
321 321
322 322 def readfile(path):
323 323 f = open(path, b'rb')
324 324 try:
325 325 result = f.read()
326 326
327 327 # we should never have empty files
328 328 if not result:
329 329 os.remove(path)
330 330 raise IOError(b"empty file: %s" % path)
331 331
332 332 return result
333 333 finally:
334 334 f.close()
335 335
336 336
337 337 def unlinkfile(filepath):
338 338 if pycompat.iswindows:
339 339 # On Windows, os.unlink cannnot delete readonly files
340 340 os.chmod(filepath, stat.S_IWUSR)
341 341 os.unlink(filepath)
342 342
343 343
344 344 def renamefile(source, destination):
345 345 if pycompat.iswindows:
346 346 # On Windows, os.rename cannot rename readonly files
347 347 # and cannot overwrite destination if it exists
348 348 os.chmod(source, stat.S_IWUSR)
349 349 if os.path.isfile(destination):
350 350 os.chmod(destination, stat.S_IWUSR)
351 351 os.unlink(destination)
352 352
353 353 os.rename(source, destination)
354 354
355 355
356 356 def writefile(path, content, readonly=False):
357 357 dirname, filename = os.path.split(path)
358 358 if not os.path.exists(dirname):
359 359 try:
360 360 os.makedirs(dirname)
361 361 except FileExistsError:
362 362 pass
363 363
364 364 fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
365 365 os.close(fd)
366 366
367 367 try:
368 368 f = util.posixfile(temp, b'wb')
369 369 f.write(content)
370 370 f.close()
371 371
372 372 if readonly:
373 373 mode = 0o444
374 374 else:
375 375 # tempfiles are created with 0o600, so we need to manually set the
376 376 # mode.
377 377 oldumask = os.umask(0)
378 378 # there's no way to get the umask without modifying it, so set it
379 379 # back
380 380 os.umask(oldumask)
381 381 mode = ~oldumask
382 382
383 383 renamefile(temp, path)
384 384 os.chmod(path, mode)
385 385 except Exception:
386 386 try:
387 387 unlinkfile(temp)
388 388 except OSError:
389 389 pass
390 390 raise
391 391
392 392
393 393 def sortnodes(nodes, parentfunc):
394 394 """Topologically sorts the nodes, using the parentfunc to find
395 395 the parents of nodes."""
396 396 nodes = set(nodes)
397 397 childmap = {}
398 398 parentmap = {}
399 399 roots = []
400 400
401 401 # Build a child and parent map
402 402 for n in nodes:
403 403 parents = [p for p in parentfunc(n) if p in nodes]
404 404 parentmap[n] = set(parents)
405 405 for p in parents:
406 406 childmap.setdefault(p, set()).add(n)
407 407 if not parents:
408 408 roots.append(n)
409 409
410 410 roots.sort()
411 411 # Process roots, adding children to the queue as they become roots
412 412 results = []
413 413 while roots:
414 414 n = roots.pop(0)
415 415 results.append(n)
416 416 if n in childmap:
417 417 children = childmap[n]
418 418 for c in children:
419 419 childparents = parentmap[c]
420 420 childparents.remove(n)
421 421 if len(childparents) == 0:
422 422 # insert at the beginning, that way child nodes
423 423 # are likely to be output immediately after their
424 424 # parents. This gives better compression results.
425 425 roots.insert(0, c)
426 426
427 427 return results
428 428
429 429
430 430 def readexactly(stream, n):
431 431 '''read n bytes from stream.read and abort if less was available'''
432 432 s = stream.read(n)
433 433 if len(s) < n:
434 434 raise error.Abort(
435 435 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
436 436 % (len(s), n)
437 437 )
438 438 return s
439 439
440 440
441 441 def readunpack(stream, fmt):
442 442 data = readexactly(stream, struct.calcsize(fmt))
443 443 return struct.unpack(fmt, data)
444 444
445 445
446 446 def readpath(stream):
447 447 rawlen = readexactly(stream, constants.FILENAMESIZE)
448 448 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
449 449 return readexactly(stream, pathlen)
450 450
451 451
452 452 def readnodelist(stream):
453 453 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
454 454 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
455 455 for i in range(nodecount):
456 456 yield readexactly(stream, constants.NODESIZE)
457 457
458 458
459 459 def readpathlist(stream):
460 460 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
461 461 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
462 462 for i in range(pathcount):
463 463 yield readpath(stream)
464 464
465 465
466 466 def getgid(groupname):
467 467 try:
468 468 gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
469 469 return gid
470 470 except KeyError:
471 471 return None
472 472
473 473
474 474 def setstickygroupdir(path, gid, warn=None):
475 475 if gid is None:
476 476 return
477 477 try:
478 478 os.chown(path, -1, gid)
479 479 os.chmod(path, 0o2775)
480 480 except (IOError, OSError) as ex:
481 481 if warn:
482 482 warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
483 483
484 484
485 485 def mkstickygroupdir(ui, path):
486 486 """Creates the given directory (if it doesn't exist) and give it a
487 487 particular group with setgid enabled."""
488 488 gid = None
489 489 groupname = ui.config(b"remotefilelog", b"cachegroup")
490 490 if groupname:
491 491 gid = getgid(groupname)
492 492 if gid is None:
493 493 ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
494 494
495 495 # we use a single stat syscall to test the existence and mode / group bit
496 496 st = None
497 497 try:
498 498 st = os.stat(path)
499 499 except OSError:
500 500 pass
501 501
502 502 if st:
503 503 # exists
504 504 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
505 505 # permission needs to be fixed
506 506 setstickygroupdir(path, gid, ui.warn)
507 507 return
508 508
509 509 oldumask = os.umask(0o002)
510 510 try:
511 511 missingdirs = [path]
512 512 path = os.path.dirname(path)
513 513 while path and not os.path.exists(path):
514 514 missingdirs.append(path)
515 515 path = os.path.dirname(path)
516 516
517 517 for path in reversed(missingdirs):
518 518 try:
519 519 os.mkdir(path)
520 520 except FileExistsError:
521 521 pass
522 522
523 523 for path in missingdirs:
524 524 setstickygroupdir(path, gid, ui.warn)
525 525 finally:
526 526 os.umask(oldumask)
527 527
528 528
529 529 def getusername(ui):
530 530 try:
531 531 return stringutil.shortuser(ui.username())
532 532 except Exception:
533 533 return b'unknown'
534 534
535 535
536 536 def getreponame(ui):
537 537 reponame = ui.config(b'paths', b'default')
538 538 if reponame:
539 539 return os.path.basename(reponame)
540 540 return b"unknown"
@@ -1,72 +1,72 b''
1 1 #require no-windows
2 2
3 3 $ . "$TESTDIR/remotefilelog-library.sh"
4 4
5 5 $ hg init master
6 6 $ cd master
7 7 $ cat >> .hg/hgrc <<EOF
8 8 > [remotefilelog]
9 9 > server=True
10 10 > EOF
11 11 $ echo x > x
12 12 $ echo y > y
13 13 $ echo z > z
14 14 $ hg commit -qAm xy
15 15
16 16 $ cd ..
17 17
18 18 $ hgcloneshallow ssh://user@dummy/master shallow -q
19 19 3 files fetched over 1 fetches - (3 misses, 0.00% hit ratio) over *s (glob)
20 20 $ cd shallow
21 21
22 22 Verify corrupt cache handling repairs by default
23 23
24 24 $ hg up -q null
25 25 $ chmod u+w $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
26 26 $ echo x > $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
27 27 $ hg up tip
28 28 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
29 29 1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob)
30 30
31 31 Verify corrupt cache error message
32 32
33 33 $ hg up -q null
34 34 $ cat >> .hg/hgrc <<EOF
35 35 > [remotefilelog]
36 36 > validatecache=off
37 37 > EOF
38 38 $ chmod u+w $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
39 39 $ echo x > $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
40 40 $ hg up tip 2>&1 | egrep "^[^ ].*unexpected remotefilelog"
41 hgext.remotefilelog.shallowutil.BadRemotefilelogHeader: unexpected remotefilelog header: illegal format (py3 !)
41 abort: unexpected remotefilelog header: illegal format
42 42
43 43 Verify detection and remediation when remotefilelog.validatecachelog is set
44 44
45 45 $ cat >> .hg/hgrc <<EOF
46 46 > [remotefilelog]
47 47 > validatecachelog=$PWD/.hg/remotefilelog_cache.log
48 48 > validatecache=strict
49 49 > EOF
50 50 $ chmod u+w $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
51 51 $ echo x > $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
52 52 $ hg up tip
53 53 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
54 54 1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob)
55 55 $ cat .hg/remotefilelog_cache.log
56 56 corrupt $TESTTMP/hgcache/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0 during contains
57 57
58 58 Verify handling of corrupt server cache
59 59
60 60 $ rm -f ../master/.hg/remotefilelogcache/y/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
61 61 $ touch ../master/.hg/remotefilelogcache/y/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
62 62 $ clearcache
63 63 $ hg prefetch -r .
64 64 3 files fetched over 1 fetches - (3 misses, 0.00% hit ratio) over *s (glob)
65 65 $ test -s ../master/.hg/remotefilelogcache/y/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
66 66 $ hg debugremotefilelog $CACHEDIR/master/95/cb0bfd2977c761298d9624e4b4d4c72a39974a/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
67 67 size: 2 bytes
68 68 path: $TESTTMP/hgcache/master/95/cb0bfd2977c761298d9624e4b4d4c72a39974a/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
69 69 key: 076f5e2225b3
70 70
71 71 node => p1 p2 linknode copyfrom
72 72 076f5e2225b3 => 000000000000 000000000000 f3d0bb0d1e48
General Comments 0
You need to be logged in to leave comments. Login now