##// END OF EJS Templates
remotefilelog: stop using RuntimeError for control flow...
Augie Fackler -
r48344:e9720776 default
parent child Browse files
Show More
@@ -1,461 +1,461 b''
1 1 from __future__ import absolute_import
2 2
3 3 import errno
4 4 import os
5 5 import shutil
6 6 import stat
7 7 import time
8 8
9 9 from mercurial.i18n import _
10 10 from mercurial.node import bin, hex
11 11 from mercurial.pycompat import open
12 12 from mercurial import (
13 13 error,
14 14 pycompat,
15 15 util,
16 16 )
17 17 from mercurial.utils import hashutil
18 18 from . import (
19 19 constants,
20 20 shallowutil,
21 21 )
22 22
23 23
24 24 class basestore(object):
25 25 def __init__(self, repo, path, reponame, shared=False):
26 26 """Creates a remotefilelog store object for the given repo name.
27 27
28 28 `path` - The file path where this store keeps its data
29 29 `reponame` - The name of the repo. This is used to partition data from
30 30 many repos.
31 31 `shared` - True if this store is a shared cache of data from the central
32 32 server, for many repos on this machine. False means this store is for
33 33 the local data for one repo.
34 34 """
35 35 self.repo = repo
36 36 self.ui = repo.ui
37 37 self._path = path
38 38 self._reponame = reponame
39 39 self._shared = shared
40 40 self._uid = os.getuid() if not pycompat.iswindows else None
41 41
42 42 self._validatecachelog = self.ui.config(
43 43 b"remotefilelog", b"validatecachelog"
44 44 )
45 45 self._validatecache = self.ui.config(
46 46 b"remotefilelog", b"validatecache", b'on'
47 47 )
48 48 if self._validatecache not in (b'on', b'strict', b'off'):
49 49 self._validatecache = b'on'
50 50 if self._validatecache == b'off':
51 51 self._validatecache = False
52 52
53 53 if shared:
54 54 shallowutil.mkstickygroupdir(self.ui, path)
55 55
56 56 def getmissing(self, keys):
57 57 missing = []
58 58 for name, node in keys:
59 59 filepath = self._getfilepath(name, node)
60 60 exists = os.path.exists(filepath)
61 61 if (
62 62 exists
63 63 and self._validatecache == b'strict'
64 64 and not self._validatekey(filepath, b'contains')
65 65 ):
66 66 exists = False
67 67 if not exists:
68 68 missing.append((name, node))
69 69
70 70 return missing
71 71
72 72 # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
73 73
74 74 def markledger(self, ledger, options=None):
75 75 if options and options.get(constants.OPTION_PACKSONLY):
76 76 return
77 77 if self._shared:
78 78 for filename, nodes in self._getfiles():
79 79 for node in nodes:
80 80 ledger.markdataentry(self, filename, node)
81 81 ledger.markhistoryentry(self, filename, node)
82 82
83 83 def cleanup(self, ledger):
84 84 ui = self.ui
85 85 entries = ledger.sources.get(self, [])
86 86 count = 0
87 87 progress = ui.makeprogress(
88 88 _(b"cleaning up"), unit=b"files", total=len(entries)
89 89 )
90 90 for entry in entries:
91 91 if entry.gced or (entry.datarepacked and entry.historyrepacked):
92 92 progress.update(count)
93 93 path = self._getfilepath(entry.filename, entry.node)
94 94 util.tryunlink(path)
95 95 count += 1
96 96 progress.complete()
97 97
98 98 # Clean up the repo cache directory.
99 99 self._cleanupdirectory(self._getrepocachepath())
100 100
101 101 # BELOW THIS ARE NON-STANDARD APIS
102 102
103 103 def _cleanupdirectory(self, rootdir):
104 104 """Removes the empty directories and unnecessary files within the root
105 105 directory recursively. Note that this method does not remove the root
106 106 directory itself."""
107 107
108 108 oldfiles = set()
109 109 otherfiles = set()
110 110 # osutil.listdir returns stat information which saves some rmdir/listdir
111 111 # syscalls.
112 112 for name, mode in util.osutil.listdir(rootdir):
113 113 if stat.S_ISDIR(mode):
114 114 dirpath = os.path.join(rootdir, name)
115 115 self._cleanupdirectory(dirpath)
116 116
117 117 # Now that the directory specified by dirpath is potentially
118 118 # empty, try and remove it.
119 119 try:
120 120 os.rmdir(dirpath)
121 121 except OSError:
122 122 pass
123 123
124 124 elif stat.S_ISREG(mode):
125 125 if name.endswith(b'_old'):
126 126 oldfiles.add(name[:-4])
127 127 else:
128 128 otherfiles.add(name)
129 129
130 130 # Remove the files which end with suffix '_old' and have no
131 131 # corresponding file without the suffix '_old'. See addremotefilelognode
132 132 # method for the generation/purpose of files with '_old' suffix.
133 133 for filename in oldfiles - otherfiles:
134 134 filepath = os.path.join(rootdir, filename + b'_old')
135 135 util.tryunlink(filepath)
136 136
137 137 def _getfiles(self):
138 138 """Return a list of (filename, [node,...]) for all the revisions that
139 139 exist in the store.
140 140
141 141 This is useful for obtaining a list of all the contents of the store
142 142 when performing a repack to another store, since the store API requires
143 143 name+node keys and not namehash+node keys.
144 144 """
145 145 existing = {}
146 146 for filenamehash, node in self._listkeys():
147 147 existing.setdefault(filenamehash, []).append(node)
148 148
149 149 filenamemap = self._resolvefilenames(existing.keys())
150 150
151 151 for filename, sha in pycompat.iteritems(filenamemap):
152 152 yield (filename, existing[sha])
153 153
154 154 def _resolvefilenames(self, hashes):
155 155 """Given a list of filename hashes that are present in the
156 156 remotefilelog store, return a mapping from filename->hash.
157 157
158 158 This is useful when converting remotefilelog blobs into other storage
159 159 formats.
160 160 """
161 161 if not hashes:
162 162 return {}
163 163
164 164 filenames = {}
165 165 missingfilename = set(hashes)
166 166
167 167 # Start with a full manifest, since it'll cover the majority of files
168 168 for filename in self.repo[b'tip'].manifest():
169 169 sha = hashutil.sha1(filename).digest()
170 170 if sha in missingfilename:
171 171 filenames[filename] = sha
172 172 missingfilename.discard(sha)
173 173
174 174 # Scan the changelog until we've found every file name
175 175 cl = self.repo.unfiltered().changelog
176 176 for rev in pycompat.xrange(len(cl) - 1, -1, -1):
177 177 if not missingfilename:
178 178 break
179 179 files = cl.readfiles(cl.node(rev))
180 180 for filename in files:
181 181 sha = hashutil.sha1(filename).digest()
182 182 if sha in missingfilename:
183 183 filenames[filename] = sha
184 184 missingfilename.discard(sha)
185 185
186 186 return filenames
187 187
188 188 def _getrepocachepath(self):
189 189 return (
190 190 os.path.join(self._path, self._reponame)
191 191 if self._shared
192 192 else self._path
193 193 )
194 194
195 195 def _listkeys(self):
196 196 """List all the remotefilelog keys that exist in the store.
197 197
198 198 Returns a iterator of (filename hash, filecontent hash) tuples.
199 199 """
200 200
201 201 for root, dirs, files in os.walk(self._getrepocachepath()):
202 202 for filename in files:
203 203 if len(filename) != 40:
204 204 continue
205 205 node = filename
206 206 if self._shared:
207 207 # .../1a/85ffda..be21
208 208 filenamehash = root[-41:-39] + root[-38:]
209 209 else:
210 210 filenamehash = root[-40:]
211 211 yield (bin(filenamehash), bin(node))
212 212
213 213 def _getfilepath(self, name, node):
214 214 node = hex(node)
215 215 if self._shared:
216 216 key = shallowutil.getcachekey(self._reponame, name, node)
217 217 else:
218 218 key = shallowutil.getlocalkey(name, node)
219 219
220 220 return os.path.join(self._path, key)
221 221
222 222 def _getdata(self, name, node):
223 223 filepath = self._getfilepath(name, node)
224 224 try:
225 225 data = shallowutil.readfile(filepath)
226 226 if self._validatecache and not self._validatedata(data, filepath):
227 227 if self._validatecachelog:
228 228 with open(self._validatecachelog, b'ab+') as f:
229 229 f.write(b"corrupt %s during read\n" % filepath)
230 230 os.rename(filepath, filepath + b".corrupt")
231 231 raise KeyError(b"corrupt local cache file %s" % filepath)
232 232 except IOError:
233 233 raise KeyError(
234 234 b"no file found at %s for %s:%s" % (filepath, name, hex(node))
235 235 )
236 236
237 237 return data
238 238
239 239 def addremotefilelognode(self, name, node, data):
240 240 filepath = self._getfilepath(name, node)
241 241
242 242 oldumask = os.umask(0o002)
243 243 try:
244 244 # if this node already exists, save the old version for
245 245 # recovery/debugging purposes.
246 246 if os.path.exists(filepath):
247 247 newfilename = filepath + b'_old'
248 248 # newfilename can be read-only and shutil.copy will fail.
249 249 # Delete newfilename to avoid it
250 250 if os.path.exists(newfilename):
251 251 shallowutil.unlinkfile(newfilename)
252 252 shutil.copy(filepath, newfilename)
253 253
254 254 shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
255 255 shallowutil.writefile(filepath, data, readonly=True)
256 256
257 257 if self._validatecache:
258 258 if not self._validatekey(filepath, b'write'):
259 259 raise error.Abort(
260 260 _(b"local cache write was corrupted %s") % filepath
261 261 )
262 262 finally:
263 263 os.umask(oldumask)
264 264
265 265 def markrepo(self, path):
266 266 """Call this to add the given repo path to the store's list of
267 267 repositories that are using it. This is useful later when doing garbage
268 268 collection, since it allows us to insecpt the repos to see what nodes
269 269 they want to be kept alive in the store.
270 270 """
271 271 repospath = os.path.join(self._path, b"repos")
272 272 with open(repospath, b'ab') as reposfile:
273 273 reposfile.write(os.path.dirname(path) + b"\n")
274 274
275 275 repospathstat = os.stat(repospath)
276 276 if repospathstat.st_uid == self._uid:
277 277 os.chmod(repospath, 0o0664)
278 278
279 279 def _validatekey(self, path, action):
280 280 with open(path, b'rb') as f:
281 281 data = f.read()
282 282
283 283 if self._validatedata(data, path):
284 284 return True
285 285
286 286 if self._validatecachelog:
287 287 with open(self._validatecachelog, b'ab+') as f:
288 288 f.write(b"corrupt %s during %s\n" % (path, action))
289 289
290 290 os.rename(path, path + b".corrupt")
291 291 return False
292 292
293 293 def _validatedata(self, data, path):
294 294 try:
295 295 if len(data) > 0:
296 296 # see remotefilelogserver.createfileblob for the format
297 297 offset, size, flags = shallowutil.parsesizeflags(data)
298 298 if len(data) <= size:
299 299 # it is truncated
300 300 return False
301 301
302 302 # extract the node from the metadata
303 303 offset += size
304 304 datanode = data[offset : offset + 20]
305 305
306 306 # and compare against the path
307 307 if os.path.basename(path) == hex(datanode):
308 308 # Content matches the intended path
309 309 return True
310 310 return False
311 except (ValueError, RuntimeError):
311 except (ValueError, shallowutil.BadRemotefilelogHeader):
312 312 pass
313 313
314 314 return False
315 315
316 316 def gc(self, keepkeys):
317 317 ui = self.ui
318 318 cachepath = self._path
319 319
320 320 # prune cache
321 321 queue = pycompat.queue.PriorityQueue()
322 322 originalsize = 0
323 323 size = 0
324 324 count = 0
325 325 removed = 0
326 326
327 327 # keep files newer than a day even if they aren't needed
328 328 limit = time.time() - (60 * 60 * 24)
329 329
330 330 progress = ui.makeprogress(
331 331 _(b"removing unnecessary files"), unit=b"files"
332 332 )
333 333 progress.update(0)
334 334 for root, dirs, files in os.walk(cachepath):
335 335 for file in files:
336 336 if file == b'repos':
337 337 continue
338 338
339 339 # Don't delete pack files
340 340 if b'/packs/' in root:
341 341 continue
342 342
343 343 progress.update(count)
344 344 path = os.path.join(root, file)
345 345 key = os.path.relpath(path, cachepath)
346 346 count += 1
347 347 try:
348 348 pathstat = os.stat(path)
349 349 except OSError as e:
350 350 # errno.ENOENT = no such file or directory
351 351 if e.errno != errno.ENOENT:
352 352 raise
353 353 msg = _(
354 354 b"warning: file %s was removed by another process\n"
355 355 )
356 356 ui.warn(msg % path)
357 357 continue
358 358
359 359 originalsize += pathstat.st_size
360 360
361 361 if key in keepkeys or pathstat.st_atime > limit:
362 362 queue.put((pathstat.st_atime, path, pathstat))
363 363 size += pathstat.st_size
364 364 else:
365 365 try:
366 366 shallowutil.unlinkfile(path)
367 367 except OSError as e:
368 368 # errno.ENOENT = no such file or directory
369 369 if e.errno != errno.ENOENT:
370 370 raise
371 371 msg = _(
372 372 b"warning: file %s was removed by another "
373 373 b"process\n"
374 374 )
375 375 ui.warn(msg % path)
376 376 continue
377 377 removed += 1
378 378 progress.complete()
379 379
380 380 # remove oldest files until under limit
381 381 limit = ui.configbytes(b"remotefilelog", b"cachelimit")
382 382 if size > limit:
383 383 excess = size - limit
384 384 progress = ui.makeprogress(
385 385 _(b"enforcing cache limit"), unit=b"bytes", total=excess
386 386 )
387 387 removedexcess = 0
388 388 while queue and size > limit and size > 0:
389 389 progress.update(removedexcess)
390 390 atime, oldpath, oldpathstat = queue.get()
391 391 try:
392 392 shallowutil.unlinkfile(oldpath)
393 393 except OSError as e:
394 394 # errno.ENOENT = no such file or directory
395 395 if e.errno != errno.ENOENT:
396 396 raise
397 397 msg = _(
398 398 b"warning: file %s was removed by another process\n"
399 399 )
400 400 ui.warn(msg % oldpath)
401 401 size -= oldpathstat.st_size
402 402 removed += 1
403 403 removedexcess += oldpathstat.st_size
404 404 progress.complete()
405 405
406 406 ui.status(
407 407 _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
408 408 % (
409 409 removed,
410 410 count,
411 411 float(originalsize) / 1024.0 / 1024.0 / 1024.0,
412 412 float(size) / 1024.0 / 1024.0 / 1024.0,
413 413 )
414 414 )
415 415
416 416
417 417 class baseunionstore(object):
418 418 def __init__(self, *args, **kwargs):
419 419 # If one of the functions that iterates all of the stores is about to
420 420 # throw a KeyError, try this many times with a full refresh between
421 421 # attempts. A repack operation may have moved data from one store to
422 422 # another while we were running.
423 423 self.numattempts = kwargs.get('numretries', 0) + 1
424 424 # If not-None, call this function on every retry and if the attempts are
425 425 # exhausted.
426 426 self.retrylog = kwargs.get('retrylog', None)
427 427
428 428 def markforrefresh(self):
429 429 for store in self.stores:
430 430 if util.safehasattr(store, b'markforrefresh'):
431 431 store.markforrefresh()
432 432
433 433 @staticmethod
434 434 def retriable(fn):
435 435 def noop(*args):
436 436 pass
437 437
438 438 def wrapped(self, *args, **kwargs):
439 439 retrylog = self.retrylog or noop
440 440 funcname = fn.__name__
441 441 i = 0
442 442 while i < self.numattempts:
443 443 if i > 0:
444 444 retrylog(
445 445 b're-attempting (n=%d) %s\n'
446 446 % (i, pycompat.sysbytes(funcname))
447 447 )
448 448 self.markforrefresh()
449 449 i += 1
450 450 try:
451 451 return fn(self, *args, **kwargs)
452 452 except KeyError:
453 453 if i == self.numattempts:
454 454 # retries exhausted
455 455 retrylog(
456 456 b'retries exhausted in %s, raising KeyError\n'
457 457 % pycompat.sysbytes(funcname)
458 458 )
459 459 raise
460 460
461 461 return wrapped
@@ -1,536 +1,544 b''
1 1 # shallowutil.py -- remotefilelog utilities
2 2 #
3 3 # Copyright 2014 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import collections
10 10 import errno
11 11 import os
12 12 import stat
13 13 import struct
14 14 import tempfile
15 15
16 16 from mercurial.i18n import _
17 17 from mercurial.pycompat import open
18 18 from mercurial.node import hex
19 19 from mercurial import (
20 20 error,
21 21 pycompat,
22 22 revlog,
23 23 util,
24 24 )
25 25 from mercurial.utils import (
26 26 hashutil,
27 27 storageutil,
28 28 stringutil,
29 29 )
30 30 from . import constants
31 31
32 32 if not pycompat.iswindows:
33 33 import grp
34 34
35 35
36 36 def isenabled(repo):
37 37 """returns whether the repository is remotefilelog enabled or not"""
38 38 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
39 39
40 40
41 41 def getcachekey(reponame, file, id):
42 42 pathhash = hex(hashutil.sha1(file).digest())
43 43 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
44 44
45 45
46 46 def getlocalkey(file, id):
47 47 pathhash = hex(hashutil.sha1(file).digest())
48 48 return os.path.join(pathhash, id)
49 49
50 50
51 51 def getcachepath(ui, allowempty=False):
52 52 cachepath = ui.config(b"remotefilelog", b"cachepath")
53 53 if not cachepath:
54 54 if allowempty:
55 55 return None
56 56 else:
57 57 raise error.Abort(
58 58 _(b"could not find config option remotefilelog.cachepath")
59 59 )
60 60 return util.expandpath(cachepath)
61 61
62 62
63 63 def getcachepackpath(repo, category):
64 64 cachepath = getcachepath(repo.ui)
65 65 if category != constants.FILEPACK_CATEGORY:
66 66 return os.path.join(cachepath, repo.name, b'packs', category)
67 67 else:
68 68 return os.path.join(cachepath, repo.name, b'packs')
69 69
70 70
71 71 def getlocalpackpath(base, category):
72 72 return os.path.join(base, b'packs', category)
73 73
74 74
75 75 def createrevlogtext(text, copyfrom=None, copyrev=None):
76 76 """returns a string that matches the revlog contents in a
77 77 traditional revlog
78 78 """
79 79 meta = {}
80 80 if copyfrom or text.startswith(b'\1\n'):
81 81 if copyfrom:
82 82 meta[b'copy'] = copyfrom
83 83 meta[b'copyrev'] = copyrev
84 84 text = storageutil.packmeta(meta, text)
85 85
86 86 return text
87 87
88 88
89 89 def parsemeta(text):
90 90 """parse mercurial filelog metadata"""
91 91 meta, size = storageutil.parsemeta(text)
92 92 if text.startswith(b'\1\n'):
93 93 s = text.index(b'\1\n', 2)
94 94 text = text[s + 2 :]
95 95 return meta or {}, text
96 96
97 97
98 98 def sumdicts(*dicts):
99 99 """Adds all the values of *dicts together into one dictionary. This assumes
100 100 the values in *dicts are all summable.
101 101
102 102 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
103 103 """
104 104 result = collections.defaultdict(lambda: 0)
105 105 for dict in dicts:
106 106 for k, v in pycompat.iteritems(dict):
107 107 result[k] += v
108 108 return result
109 109
110 110
111 111 def prefixkeys(dict, prefix):
112 112 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
113 113 result = {}
114 114 for k, v in pycompat.iteritems(dict):
115 115 result[prefix + k] = v
116 116 return result
117 117
118 118
119 119 def reportpackmetrics(ui, prefix, *stores):
120 120 dicts = [s.getmetrics() for s in stores]
121 121 dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
122 122 ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
123 123
124 124
125 125 def _parsepackmeta(metabuf):
126 126 """parse datapack meta, bytes (<metadata-list>) -> dict
127 127
128 128 The dict contains raw content - both keys and values are strings.
129 129 Upper-level business may want to convert some of them to other types like
130 130 integers, on their own.
131 131
132 132 raise ValueError if the data is corrupted
133 133 """
134 134 metadict = {}
135 135 offset = 0
136 136 buflen = len(metabuf)
137 137 while buflen - offset >= 3:
138 138 key = metabuf[offset : offset + 1]
139 139 offset += 1
140 140 metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
141 141 offset += 2
142 142 if offset + metalen > buflen:
143 143 raise ValueError(b'corrupted metadata: incomplete buffer')
144 144 value = metabuf[offset : offset + metalen]
145 145 metadict[key] = value
146 146 offset += metalen
147 147 if offset != buflen:
148 148 raise ValueError(b'corrupted metadata: redundant data')
149 149 return metadict
150 150
151 151
152 152 def _buildpackmeta(metadict):
153 153 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
154 154
155 155 The dict contains raw content - both keys and values are strings.
156 156 Upper-level business may want to serialize some of other types (like
157 157 integers) to strings before calling this function.
158 158
159 159 raise ProgrammingError when metadata key is illegal, or ValueError if
160 160 length limit is exceeded
161 161 """
162 162 metabuf = b''
163 163 for k, v in sorted(pycompat.iteritems((metadict or {}))):
164 164 if len(k) != 1:
165 165 raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
166 166 if len(v) > 0xFFFE:
167 167 raise ValueError(
168 168 b'metadata value is too long: 0x%x > 0xfffe' % len(v)
169 169 )
170 170 metabuf += k
171 171 metabuf += struct.pack(b'!H', len(v))
172 172 metabuf += v
173 173 # len(metabuf) is guaranteed representable in 4 bytes, because there are
174 174 # only 256 keys, and for each value, len(value) <= 0xfffe.
175 175 return metabuf
176 176
177 177
178 178 _metaitemtypes = {
179 179 constants.METAKEYFLAG: (int, pycompat.long),
180 180 constants.METAKEYSIZE: (int, pycompat.long),
181 181 }
182 182
183 183
184 184 def buildpackmeta(metadict):
185 185 """like _buildpackmeta, but typechecks metadict and normalize it.
186 186
187 187 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
188 188 and METAKEYFLAG will be dropped if its value is 0.
189 189 """
190 190 newmeta = {}
191 191 for k, v in pycompat.iteritems(metadict or {}):
192 192 expectedtype = _metaitemtypes.get(k, (bytes,))
193 193 if not isinstance(v, expectedtype):
194 194 raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
195 195 # normalize int to binary buffer
196 196 if int in expectedtype:
197 197 # optimization: remove flag if it's 0 to save space
198 198 if k == constants.METAKEYFLAG and v == 0:
199 199 continue
200 200 v = int2bin(v)
201 201 newmeta[k] = v
202 202 return _buildpackmeta(newmeta)
203 203
204 204
205 205 def parsepackmeta(metabuf):
206 206 """like _parsepackmeta, but convert fields to desired types automatically.
207 207
208 208 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
209 209 integers.
210 210 """
211 211 metadict = _parsepackmeta(metabuf)
212 212 for k, v in pycompat.iteritems(metadict):
213 213 if k in _metaitemtypes and int in _metaitemtypes[k]:
214 214 metadict[k] = bin2int(v)
215 215 return metadict
216 216
217 217
218 218 def int2bin(n):
219 219 """convert a non-negative integer to raw binary buffer"""
220 220 buf = bytearray()
221 221 while n > 0:
222 222 buf.insert(0, n & 0xFF)
223 223 n >>= 8
224 224 return bytes(buf)
225 225
226 226
227 227 def bin2int(buf):
228 228 """the reverse of int2bin, convert a binary buffer to an integer"""
229 229 x = 0
230 230 for b in bytearray(buf):
231 231 x <<= 8
232 232 x |= b
233 233 return x
234 234
235 235
236 class BadRemotefilelogHeader(error.StorageError):
237 """Exception raised when parsing a remotefilelog blob header fails."""
238
239
236 240 def parsesizeflags(raw):
237 241 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
238 242
239 243 see remotefilelogserver.createfileblob for the format.
240 244 raise RuntimeError if the content is illformed.
241 245 """
242 246 flags = revlog.REVIDX_DEFAULT_FLAGS
243 247 size = None
244 248 try:
245 249 index = raw.index(b'\0')
246 250 header = raw[:index]
247 251 if header.startswith(b'v'):
248 252 # v1 and above, header starts with 'v'
249 253 if header.startswith(b'v1\n'):
250 254 for s in header.split(b'\n'):
251 255 if s.startswith(constants.METAKEYSIZE):
252 256 size = int(s[len(constants.METAKEYSIZE) :])
253 257 elif s.startswith(constants.METAKEYFLAG):
254 258 flags = int(s[len(constants.METAKEYFLAG) :])
255 259 else:
256 raise RuntimeError(
260 raise BadRemotefilelogHeader(
257 261 b'unsupported remotefilelog header: %s' % header
258 262 )
259 263 else:
260 264 # v0, str(int(size)) is the header
261 265 size = int(header)
262 266 except ValueError:
263 raise RuntimeError("unexpected remotefilelog header: illegal format")
267 raise BadRemotefilelogHeader(
268 "unexpected remotefilelog header: illegal format"
269 )
264 270 if size is None:
265 raise RuntimeError("unexpected remotefilelog header: no size found")
271 raise BadRemotefilelogHeader(
272 "unexpected remotefilelog header: no size found"
273 )
266 274 return index + 1, size, flags
267 275
268 276
269 277 def buildfileblobheader(size, flags, version=None):
270 278 """return the header of a remotefilelog blob.
271 279
272 280 see remotefilelogserver.createfileblob for the format.
273 281 approximately the reverse of parsesizeflags.
274 282
275 283 version could be 0 or 1, or None (auto decide).
276 284 """
277 285 # choose v0 if flags is empty, otherwise v1
278 286 if version is None:
279 287 version = int(bool(flags))
280 288 if version == 1:
281 289 header = b'v1\n%s%d\n%s%d' % (
282 290 constants.METAKEYSIZE,
283 291 size,
284 292 constants.METAKEYFLAG,
285 293 flags,
286 294 )
287 295 elif version == 0:
288 296 if flags:
289 297 raise error.ProgrammingError(b'fileblob v0 does not support flag')
290 298 header = b'%d' % size
291 299 else:
292 300 raise error.ProgrammingError(b'unknown fileblob version %d' % version)
293 301 return header
294 302
295 303
296 304 def ancestormap(raw):
297 305 offset, size, flags = parsesizeflags(raw)
298 306 start = offset + size
299 307
300 308 mapping = {}
301 309 while start < len(raw):
302 310 divider = raw.index(b'\0', start + 80)
303 311
304 312 currentnode = raw[start : (start + 20)]
305 313 p1 = raw[(start + 20) : (start + 40)]
306 314 p2 = raw[(start + 40) : (start + 60)]
307 315 linknode = raw[(start + 60) : (start + 80)]
308 316 copyfrom = raw[(start + 80) : divider]
309 317
310 318 mapping[currentnode] = (p1, p2, linknode, copyfrom)
311 319 start = divider + 1
312 320
313 321 return mapping
314 322
315 323
316 324 def readfile(path):
317 325 f = open(path, b'rb')
318 326 try:
319 327 result = f.read()
320 328
321 329 # we should never have empty files
322 330 if not result:
323 331 os.remove(path)
324 332 raise IOError(b"empty file: %s" % path)
325 333
326 334 return result
327 335 finally:
328 336 f.close()
329 337
330 338
331 339 def unlinkfile(filepath):
332 340 if pycompat.iswindows:
333 341 # On Windows, os.unlink cannnot delete readonly files
334 342 os.chmod(filepath, stat.S_IWUSR)
335 343 os.unlink(filepath)
336 344
337 345
338 346 def renamefile(source, destination):
339 347 if pycompat.iswindows:
340 348 # On Windows, os.rename cannot rename readonly files
341 349 # and cannot overwrite destination if it exists
342 350 os.chmod(source, stat.S_IWUSR)
343 351 if os.path.isfile(destination):
344 352 os.chmod(destination, stat.S_IWUSR)
345 353 os.unlink(destination)
346 354
347 355 os.rename(source, destination)
348 356
349 357
350 358 def writefile(path, content, readonly=False):
351 359 dirname, filename = os.path.split(path)
352 360 if not os.path.exists(dirname):
353 361 try:
354 362 os.makedirs(dirname)
355 363 except OSError as ex:
356 364 if ex.errno != errno.EEXIST:
357 365 raise
358 366
359 367 fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
360 368 os.close(fd)
361 369
362 370 try:
363 371 f = util.posixfile(temp, b'wb')
364 372 f.write(content)
365 373 f.close()
366 374
367 375 if readonly:
368 376 mode = 0o444
369 377 else:
370 378 # tempfiles are created with 0o600, so we need to manually set the
371 379 # mode.
372 380 oldumask = os.umask(0)
373 381 # there's no way to get the umask without modifying it, so set it
374 382 # back
375 383 os.umask(oldumask)
376 384 mode = ~oldumask
377 385
378 386 renamefile(temp, path)
379 387 os.chmod(path, mode)
380 388 except Exception:
381 389 try:
382 390 unlinkfile(temp)
383 391 except OSError:
384 392 pass
385 393 raise
386 394
387 395
388 396 def sortnodes(nodes, parentfunc):
389 397 """Topologically sorts the nodes, using the parentfunc to find
390 398 the parents of nodes."""
391 399 nodes = set(nodes)
392 400 childmap = {}
393 401 parentmap = {}
394 402 roots = []
395 403
396 404 # Build a child and parent map
397 405 for n in nodes:
398 406 parents = [p for p in parentfunc(n) if p in nodes]
399 407 parentmap[n] = set(parents)
400 408 for p in parents:
401 409 childmap.setdefault(p, set()).add(n)
402 410 if not parents:
403 411 roots.append(n)
404 412
405 413 roots.sort()
406 414 # Process roots, adding children to the queue as they become roots
407 415 results = []
408 416 while roots:
409 417 n = roots.pop(0)
410 418 results.append(n)
411 419 if n in childmap:
412 420 children = childmap[n]
413 421 for c in children:
414 422 childparents = parentmap[c]
415 423 childparents.remove(n)
416 424 if len(childparents) == 0:
417 425 # insert at the beginning, that way child nodes
418 426 # are likely to be output immediately after their
419 427 # parents. This gives better compression results.
420 428 roots.insert(0, c)
421 429
422 430 return results
423 431
424 432
425 433 def readexactly(stream, n):
426 434 '''read n bytes from stream.read and abort if less was available'''
427 435 s = stream.read(n)
428 436 if len(s) < n:
429 437 raise error.Abort(
430 438 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
431 439 % (len(s), n)
432 440 )
433 441 return s
434 442
435 443
436 444 def readunpack(stream, fmt):
437 445 data = readexactly(stream, struct.calcsize(fmt))
438 446 return struct.unpack(fmt, data)
439 447
440 448
441 449 def readpath(stream):
442 450 rawlen = readexactly(stream, constants.FILENAMESIZE)
443 451 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
444 452 return readexactly(stream, pathlen)
445 453
446 454
447 455 def readnodelist(stream):
448 456 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
449 457 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
450 458 for i in pycompat.xrange(nodecount):
451 459 yield readexactly(stream, constants.NODESIZE)
452 460
453 461
454 462 def readpathlist(stream):
455 463 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
456 464 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
457 465 for i in pycompat.xrange(pathcount):
458 466 yield readpath(stream)
459 467
460 468
461 469 def getgid(groupname):
462 470 try:
463 471 gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
464 472 return gid
465 473 except KeyError:
466 474 return None
467 475
468 476
469 477 def setstickygroupdir(path, gid, warn=None):
470 478 if gid is None:
471 479 return
472 480 try:
473 481 os.chown(path, -1, gid)
474 482 os.chmod(path, 0o2775)
475 483 except (IOError, OSError) as ex:
476 484 if warn:
477 485 warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
478 486
479 487
480 488 def mkstickygroupdir(ui, path):
481 489 """Creates the given directory (if it doesn't exist) and give it a
482 490 particular group with setgid enabled."""
483 491 gid = None
484 492 groupname = ui.config(b"remotefilelog", b"cachegroup")
485 493 if groupname:
486 494 gid = getgid(groupname)
487 495 if gid is None:
488 496 ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
489 497
490 498 # we use a single stat syscall to test the existence and mode / group bit
491 499 st = None
492 500 try:
493 501 st = os.stat(path)
494 502 except OSError:
495 503 pass
496 504
497 505 if st:
498 506 # exists
499 507 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
500 508 # permission needs to be fixed
501 509 setstickygroupdir(path, gid, ui.warn)
502 510 return
503 511
504 512 oldumask = os.umask(0o002)
505 513 try:
506 514 missingdirs = [path]
507 515 path = os.path.dirname(path)
508 516 while path and not os.path.exists(path):
509 517 missingdirs.append(path)
510 518 path = os.path.dirname(path)
511 519
512 520 for path in reversed(missingdirs):
513 521 try:
514 522 os.mkdir(path)
515 523 except OSError as ex:
516 524 if ex.errno != errno.EEXIST:
517 525 raise
518 526
519 527 for path in missingdirs:
520 528 setstickygroupdir(path, gid, ui.warn)
521 529 finally:
522 530 os.umask(oldumask)
523 531
524 532
525 533 def getusername(ui):
526 534 try:
527 535 return stringutil.shortuser(ui.username())
528 536 except Exception:
529 537 return b'unknown'
530 538
531 539
532 540 def getreponame(ui):
533 541 reponame = ui.config(b'paths', b'default')
534 542 if reponame:
535 543 return os.path.basename(reponame)
536 544 return b"unknown"
@@ -1,72 +1,73 b''
1 1 #require no-windows
2 2
3 3 $ . "$TESTDIR/remotefilelog-library.sh"
4 4
5 5 $ hg init master
6 6 $ cd master
7 7 $ cat >> .hg/hgrc <<EOF
8 8 > [remotefilelog]
9 9 > server=True
10 10 > EOF
11 11 $ echo x > x
12 12 $ echo y > y
13 13 $ echo z > z
14 14 $ hg commit -qAm xy
15 15
16 16 $ cd ..
17 17
18 18 $ hgcloneshallow ssh://user@dummy/master shallow -q
19 19 3 files fetched over 1 fetches - (3 misses, 0.00% hit ratio) over *s (glob)
20 20 $ cd shallow
21 21
22 22 Verify corrupt cache handling repairs by default
23 23
24 24 $ hg up -q null
25 25 $ chmod u+w $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
26 26 $ echo x > $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
27 27 $ hg up tip
28 28 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
29 29 1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob)
30 30
31 31 Verify corrupt cache error message
32 32
33 33 $ hg up -q null
34 34 $ cat >> .hg/hgrc <<EOF
35 35 > [remotefilelog]
36 36 > validatecache=off
37 37 > EOF
38 38 $ chmod u+w $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
39 39 $ echo x > $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
40 40 $ hg up tip 2>&1 | egrep "^[^ ].*unexpected remotefilelog"
41 RuntimeError: unexpected remotefilelog header: illegal format
41 abort: unexpected remotefilelog header: illegal format (no-py3 !)
42 hgext.remotefilelog.shallowutil.BadRemotefilelogHeader: unexpected remotefilelog header: illegal format (py3 !)
42 43
43 44 Verify detection and remediation when remotefilelog.validatecachelog is set
44 45
45 46 $ cat >> .hg/hgrc <<EOF
46 47 > [remotefilelog]
47 48 > validatecachelog=$PWD/.hg/remotefilelog_cache.log
48 49 > validatecache=strict
49 50 > EOF
50 51 $ chmod u+w $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
51 52 $ echo x > $CACHEDIR/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0
52 53 $ hg up tip
53 54 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
54 55 1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob)
55 56 $ cat .hg/remotefilelog_cache.log
56 57 corrupt $TESTTMP/hgcache/master/11/f6ad8ec52a2984abaafd7c3b516503785c2072/1406e74118627694268417491f018a4a883152f0 during contains
57 58
58 59 Verify handling of corrupt server cache
59 60
60 61 $ rm -f ../master/.hg/remotefilelogcache/y/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
61 62 $ touch ../master/.hg/remotefilelogcache/y/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
62 63 $ clearcache
63 64 $ hg prefetch -r .
64 65 3 files fetched over 1 fetches - (3 misses, 0.00% hit ratio) over *s (glob)
65 66 $ test -s ../master/.hg/remotefilelogcache/y/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
66 67 $ hg debugremotefilelog $CACHEDIR/master/95/cb0bfd2977c761298d9624e4b4d4c72a39974a/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
67 68 size: 2 bytes
68 69 path: $TESTTMP/hgcache/master/95/cb0bfd2977c761298d9624e4b4d4c72a39974a/076f5e2225b3ff0400b98c92aa6cdf403ee24cca
69 70 key: 076f5e2225b3
70 71
71 72 node => p1 p2 linknode copyfrom
72 73 076f5e2225b3 => 000000000000 000000000000 f3d0bb0d1e48
General Comments 0
You need to be logged in to leave comments. Login now