##// END OF EJS Templates
typing: simplify archive.gz writing and drop a few pytype suppressions...
Matt Harbison -
r52754:f81e0ce5 default
parent child Browse files
Show More
@@ -1,401 +1,392
1 # archival.py - revision archival for mercurial
1 # archival.py - revision archival for mercurial
2 #
2 #
3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import gzip
9 import gzip
10 import os
10 import os
11 import struct
11 import struct
12 import tarfile
12 import tarfile
13 import time
13 import time
14 import typing
14 import typing
15 import zipfile
15 import zipfile
16 import zlib
16 import zlib
17
17
18 from typing import (
18 from typing import (
19 Optional,
19 Optional,
20 )
20 )
21
21
22 from .i18n import _
22 from .i18n import _
23 from .node import nullrev
23 from .node import nullrev
24 from .pycompat import open
24 from .pycompat import open
25
25
26 from . import (
26 from . import (
27 error,
27 error,
28 formatter,
28 formatter,
29 match as matchmod,
29 match as matchmod,
30 pycompat,
30 pycompat,
31 scmutil,
31 scmutil,
32 util,
32 util,
33 vfs as vfsmod,
33 vfs as vfsmod,
34 )
34 )
35
35
36 from .utils import stringutil
36 from .utils import stringutil
37
37
38 if typing.TYPE_CHECKING:
38 if typing.TYPE_CHECKING:
39 from . import (
39 from . import (
40 localrepo,
40 localrepo,
41 )
41 )
42
42
43 stringio = util.stringio
43 stringio = util.stringio
44
44
45 # from unzip source code:
45 # from unzip source code:
46 _UNX_IFREG = 0x8000
46 _UNX_IFREG = 0x8000
47 _UNX_IFLNK = 0xA000
47 _UNX_IFLNK = 0xA000
48
48
49
49
50 def tidyprefix(dest, kind, prefix):
50 def tidyprefix(dest, kind, prefix):
51 """choose prefix to use for names in archive. make sure prefix is
51 """choose prefix to use for names in archive. make sure prefix is
52 safe for consumers."""
52 safe for consumers."""
53
53
54 if prefix:
54 if prefix:
55 prefix = util.normpath(prefix)
55 prefix = util.normpath(prefix)
56 else:
56 else:
57 if not isinstance(dest, bytes):
57 if not isinstance(dest, bytes):
58 raise ValueError(b'dest must be string if no prefix')
58 raise ValueError(b'dest must be string if no prefix')
59 prefix = os.path.basename(dest)
59 prefix = os.path.basename(dest)
60 lower = prefix.lower()
60 lower = prefix.lower()
61 for sfx in exts.get(kind, []):
61 for sfx in exts.get(kind, []):
62 if lower.endswith(sfx):
62 if lower.endswith(sfx):
63 prefix = prefix[: -len(sfx)]
63 prefix = prefix[: -len(sfx)]
64 break
64 break
65 lpfx = os.path.normpath(util.localpath(prefix))
65 lpfx = os.path.normpath(util.localpath(prefix))
66 prefix = util.pconvert(lpfx)
66 prefix = util.pconvert(lpfx)
67 if not prefix.endswith(b'/'):
67 if not prefix.endswith(b'/'):
68 prefix += b'/'
68 prefix += b'/'
69 # Drop the leading '.' path component if present, so Windows can read the
69 # Drop the leading '.' path component if present, so Windows can read the
70 # zip files (issue4634)
70 # zip files (issue4634)
71 if prefix.startswith(b'./'):
71 if prefix.startswith(b'./'):
72 prefix = prefix[2:]
72 prefix = prefix[2:]
73 if prefix.startswith(b'../') or os.path.isabs(lpfx) or b'/../' in prefix:
73 if prefix.startswith(b'../') or os.path.isabs(lpfx) or b'/../' in prefix:
74 raise error.Abort(_(b'archive prefix contains illegal components'))
74 raise error.Abort(_(b'archive prefix contains illegal components'))
75 return prefix
75 return prefix
76
76
77
77
78 exts = {
78 exts = {
79 b'tar': [b'.tar'],
79 b'tar': [b'.tar'],
80 b'tbz2': [b'.tbz2', b'.tar.bz2'],
80 b'tbz2': [b'.tbz2', b'.tar.bz2'],
81 b'tgz': [b'.tgz', b'.tar.gz'],
81 b'tgz': [b'.tgz', b'.tar.gz'],
82 b'zip': [b'.zip'],
82 b'zip': [b'.zip'],
83 b'txz': [b'.txz', b'.tar.xz'],
83 b'txz': [b'.txz', b'.tar.xz'],
84 }
84 }
85
85
86
86
87 def guesskind(dest):
87 def guesskind(dest):
88 for kind, extensions in exts.items():
88 for kind, extensions in exts.items():
89 if any(dest.endswith(ext) for ext in extensions):
89 if any(dest.endswith(ext) for ext in extensions):
90 return kind
90 return kind
91 return None
91 return None
92
92
93
93
94 def _rootctx(repo):
94 def _rootctx(repo):
95 # repo[0] may be hidden
95 # repo[0] may be hidden
96 for rev in repo:
96 for rev in repo:
97 return repo[rev]
97 return repo[rev]
98 return repo[nullrev]
98 return repo[nullrev]
99
99
100
100
101 # {tags} on ctx includes local tags and 'tip', with no current way to limit
101 # {tags} on ctx includes local tags and 'tip', with no current way to limit
102 # that to global tags. Therefore, use {latesttag} as a substitute when
102 # that to global tags. Therefore, use {latesttag} as a substitute when
103 # the distance is 0, since that will be the list of global tags on ctx.
103 # the distance is 0, since that will be the list of global tags on ctx.
104 _defaultmetatemplate = br'''
104 _defaultmetatemplate = br'''
105 repo: {root}
105 repo: {root}
106 node: {ifcontains(rev, revset("wdir()"), "{p1node}{dirty}", "{node}")}
106 node: {ifcontains(rev, revset("wdir()"), "{p1node}{dirty}", "{node}")}
107 branch: {branch|utf8}
107 branch: {branch|utf8}
108 {ifeq(latesttagdistance, 0, join(latesttag % "tag: {tag}", "\n"),
108 {ifeq(latesttagdistance, 0, join(latesttag % "tag: {tag}", "\n"),
109 separate("\n",
109 separate("\n",
110 join(latesttag % "latesttag: {tag}", "\n"),
110 join(latesttag % "latesttag: {tag}", "\n"),
111 "latesttagdistance: {latesttagdistance}",
111 "latesttagdistance: {latesttagdistance}",
112 "changessincelatesttag: {changessincelatesttag}"))}
112 "changessincelatesttag: {changessincelatesttag}"))}
113 '''[
113 '''[
114 1:
114 1:
115 ] # drop leading '\n'
115 ] # drop leading '\n'
116
116
117
117
118 def buildmetadata(ctx):
118 def buildmetadata(ctx):
119 '''build content of .hg_archival.txt'''
119 '''build content of .hg_archival.txt'''
120 repo = ctx.repo()
120 repo = ctx.repo()
121
121
122 opts = {
122 opts = {
123 b'template': repo.ui.config(
123 b'template': repo.ui.config(
124 b'experimental', b'archivemetatemplate', _defaultmetatemplate
124 b'experimental', b'archivemetatemplate', _defaultmetatemplate
125 )
125 )
126 }
126 }
127
127
128 out = util.stringio()
128 out = util.stringio()
129
129
130 fm = formatter.formatter(repo.ui, out, b'archive', opts)
130 fm = formatter.formatter(repo.ui, out, b'archive', opts)
131 fm.startitem()
131 fm.startitem()
132 fm.context(ctx=ctx)
132 fm.context(ctx=ctx)
133 fm.data(root=_rootctx(repo).hex())
133 fm.data(root=_rootctx(repo).hex())
134
134
135 if ctx.rev() is None:
135 if ctx.rev() is None:
136 dirty = b''
136 dirty = b''
137 if ctx.dirty(missing=True):
137 if ctx.dirty(missing=True):
138 dirty = b'+'
138 dirty = b'+'
139 fm.data(dirty=dirty)
139 fm.data(dirty=dirty)
140 fm.end()
140 fm.end()
141
141
142 return out.getvalue()
142 return out.getvalue()
143
143
144
144
145 class tarit:
145 class tarit:
146 """write archive to tar file or stream. can write uncompressed,
146 """write archive to tar file or stream. can write uncompressed,
147 or compress with gzip or bzip2."""
147 or compress with gzip or bzip2."""
148
148
149 def __init__(self, dest, mtime, kind=b''):
149 def __init__(self, dest, mtime, kind=b''):
150 self.mtime = mtime
150 self.mtime = mtime
151 self.fileobj = None
151 self.fileobj = None
152
152
153 def taropen(mode, name=b'', fileobj=None):
153 def taropen(mode, name=b'', fileobj=None):
154 if kind == b'gz':
154 if kind == b'gz':
155 mode = mode[0:1]
155 mode = mode[0:1]
156 if not fileobj:
156 if not fileobj:
157 fileobj = open(name, mode + b'b')
157 fileobj = open(name, mode + b'b')
158 gzfileobj = gzip.GzipFile(
158 gzfileobj = gzip.GzipFile(
159 name,
159 name,
160 pycompat.sysstr(mode + b'b'),
160 pycompat.sysstr(mode + b'b'),
161 zlib.Z_BEST_COMPRESSION,
161 zlib.Z_BEST_COMPRESSION,
162 fileobj,
162 fileobj,
163 mtime=mtime,
163 mtime=mtime,
164 )
164 )
165 self.fileobj = gzfileobj
165 self.fileobj = gzfileobj
166 return (
166 return tarfile.TarFile.taropen(name, "w", gzfileobj)
167 # taropen() wants Literal['a', 'r', 'w', 'x'] for the mode,
168 # but Literal[] is only available in 3.8+ without the
169 # typing_extensions backport.
170 # pytype: disable=wrong-arg-types
171 tarfile.TarFile.taropen( # pytype: disable=attribute-error
172 name, pycompat.sysstr(mode), gzfileobj
173 )
174 # pytype: enable=wrong-arg-types
175 )
176 else:
167 else:
177 try:
168 try:
178 return tarfile.open(
169 return tarfile.open(
179 name, pycompat.sysstr(mode + kind), fileobj
170 name, pycompat.sysstr(mode + kind), fileobj
180 )
171 )
181 except tarfile.CompressionError as e:
172 except tarfile.CompressionError as e:
182 raise error.Abort(stringutil.forcebytestr(e))
173 raise error.Abort(stringutil.forcebytestr(e))
183
174
184 if isinstance(dest, bytes):
175 if isinstance(dest, bytes):
185 self.z = taropen(b'w:', name=dest)
176 self.z = taropen(b'w:', name=dest)
186 else:
177 else:
187 self.z = taropen(b'w|', fileobj=dest)
178 self.z = taropen(b'w|', fileobj=dest)
188
179
189 def addfile(self, name, mode, islink, data):
180 def addfile(self, name, mode, islink, data):
190 name = pycompat.fsdecode(name)
181 name = pycompat.fsdecode(name)
191 i = tarfile.TarInfo(name)
182 i = tarfile.TarInfo(name)
192 i.mtime = self.mtime
183 i.mtime = self.mtime
193 i.size = len(data)
184 i.size = len(data)
194 if islink:
185 if islink:
195 i.type = tarfile.SYMTYPE
186 i.type = tarfile.SYMTYPE
196 i.mode = 0o777
187 i.mode = 0o777
197 i.linkname = pycompat.fsdecode(data)
188 i.linkname = pycompat.fsdecode(data)
198 data = None
189 data = None
199 i.size = 0
190 i.size = 0
200 else:
191 else:
201 i.mode = mode
192 i.mode = mode
202 data = stringio(data)
193 data = stringio(data)
203 self.z.addfile(i, data)
194 self.z.addfile(i, data)
204
195
205 def done(self):
196 def done(self):
206 self.z.close()
197 self.z.close()
207 if self.fileobj:
198 if self.fileobj:
208 self.fileobj.close()
199 self.fileobj.close()
209
200
210
201
211 class zipit:
202 class zipit:
212 """write archive to zip file or stream. can write uncompressed,
203 """write archive to zip file or stream. can write uncompressed,
213 or compressed with deflate."""
204 or compressed with deflate."""
214
205
215 def __init__(self, dest, mtime, compress=True):
206 def __init__(self, dest, mtime, compress=True):
216 if isinstance(dest, bytes):
207 if isinstance(dest, bytes):
217 dest = pycompat.fsdecode(dest)
208 dest = pycompat.fsdecode(dest)
218 self.z = zipfile.ZipFile(
209 self.z = zipfile.ZipFile(
219 dest, 'w', compress and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED
210 dest, 'w', compress and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED
220 )
211 )
221
212
222 # Python's zipfile module emits deprecation warnings if we try
213 # Python's zipfile module emits deprecation warnings if we try
223 # to store files with a date before 1980.
214 # to store files with a date before 1980.
224 epoch = 315532800 # calendar.timegm((1980, 1, 1, 0, 0, 0, 1, 1, 0))
215 epoch = 315532800 # calendar.timegm((1980, 1, 1, 0, 0, 0, 1, 1, 0))
225 if mtime < epoch:
216 if mtime < epoch:
226 mtime = epoch
217 mtime = epoch
227
218
228 self.mtime = mtime
219 self.mtime = mtime
229 self.date_time = time.gmtime(mtime)[:6]
220 self.date_time = time.gmtime(mtime)[:6]
230
221
231 def addfile(self, name, mode, islink, data):
222 def addfile(self, name, mode, islink, data):
232 i = zipfile.ZipInfo(pycompat.fsdecode(name), self.date_time)
223 i = zipfile.ZipInfo(pycompat.fsdecode(name), self.date_time)
233 i.compress_type = self.z.compression # pytype: disable=attribute-error
224 i.compress_type = self.z.compression # pytype: disable=attribute-error
234 # unzip will not honor unix file modes unless file creator is
225 # unzip will not honor unix file modes unless file creator is
235 # set to unix (id 3).
226 # set to unix (id 3).
236 i.create_system = 3
227 i.create_system = 3
237 ftype = _UNX_IFREG
228 ftype = _UNX_IFREG
238 if islink:
229 if islink:
239 mode = 0o777
230 mode = 0o777
240 ftype = _UNX_IFLNK
231 ftype = _UNX_IFLNK
241 i.external_attr = (mode | ftype) << 16
232 i.external_attr = (mode | ftype) << 16
242 # add "extended-timestamp" extra block, because zip archives
233 # add "extended-timestamp" extra block, because zip archives
243 # without this will be extracted with unexpected timestamp,
234 # without this will be extracted with unexpected timestamp,
244 # if TZ is not configured as GMT
235 # if TZ is not configured as GMT
245 i.extra += struct.pack(
236 i.extra += struct.pack(
246 b'<hhBl',
237 b'<hhBl',
247 0x5455, # block type: "extended-timestamp"
238 0x5455, # block type: "extended-timestamp"
248 1 + 4, # size of this block
239 1 + 4, # size of this block
249 1, # "modification time is present"
240 1, # "modification time is present"
250 int(self.mtime),
241 int(self.mtime),
251 ) # last modification (UTC)
242 ) # last modification (UTC)
252 self.z.writestr(i, data)
243 self.z.writestr(i, data)
253
244
254 def done(self):
245 def done(self):
255 self.z.close()
246 self.z.close()
256
247
257
248
258 class fileit:
249 class fileit:
259 '''write archive as files in directory.'''
250 '''write archive as files in directory.'''
260
251
261 def __init__(self, name, mtime):
252 def __init__(self, name, mtime):
262 self.basedir = name
253 self.basedir = name
263 self.opener = vfsmod.vfs(self.basedir)
254 self.opener = vfsmod.vfs(self.basedir)
264 self.mtime = mtime
255 self.mtime = mtime
265
256
266 def addfile(self, name, mode, islink, data):
257 def addfile(self, name, mode, islink, data):
267 if islink:
258 if islink:
268 self.opener.symlink(data, name)
259 self.opener.symlink(data, name)
269 return
260 return
270 f = self.opener(name, b"w", atomictemp=False)
261 f = self.opener(name, b"w", atomictemp=False)
271 f.write(data)
262 f.write(data)
272 f.close()
263 f.close()
273 destfile = os.path.join(self.basedir, name)
264 destfile = os.path.join(self.basedir, name)
274 os.chmod(destfile, mode)
265 os.chmod(destfile, mode)
275 if self.mtime is not None:
266 if self.mtime is not None:
276 os.utime(destfile, (self.mtime, self.mtime))
267 os.utime(destfile, (self.mtime, self.mtime))
277
268
278 def done(self):
269 def done(self):
279 pass
270 pass
280
271
281
272
282 archivers = {
273 archivers = {
283 b'files': fileit,
274 b'files': fileit,
284 b'tar': tarit,
275 b'tar': tarit,
285 b'tbz2': lambda name, mtime: tarit(name, mtime, b'bz2'),
276 b'tbz2': lambda name, mtime: tarit(name, mtime, b'bz2'),
286 b'tgz': lambda name, mtime: tarit(name, mtime, b'gz'),
277 b'tgz': lambda name, mtime: tarit(name, mtime, b'gz'),
287 b'txz': lambda name, mtime: tarit(name, mtime, b'xz'),
278 b'txz': lambda name, mtime: tarit(name, mtime, b'xz'),
288 b'uzip': lambda name, mtime: zipit(name, mtime, False),
279 b'uzip': lambda name, mtime: zipit(name, mtime, False),
289 b'zip': zipit,
280 b'zip': zipit,
290 }
281 }
291
282
292
283
293 def archive(
284 def archive(
294 repo: "localrepo.localrepository",
285 repo: "localrepo.localrepository",
295 dest, # TODO: should be bytes, but could be Callable
286 dest, # TODO: should be bytes, but could be Callable
296 node,
287 node,
297 kind: bytes,
288 kind: bytes,
298 decode: bool = True,
289 decode: bool = True,
299 match=None,
290 match=None,
300 prefix: bytes = b'',
291 prefix: bytes = b'',
301 mtime: Optional[float] = None,
292 mtime: Optional[float] = None,
302 subrepos: bool = False,
293 subrepos: bool = False,
303 ) -> int:
294 ) -> int:
304 """create archive of repo as it was at node.
295 """create archive of repo as it was at node.
305
296
306 dest can be name of directory, name of archive file, a callable, or file
297 dest can be name of directory, name of archive file, a callable, or file
307 object to write archive to. If it is a callable, it will called to open
298 object to write archive to. If it is a callable, it will called to open
308 the actual file object before the first archive member is written.
299 the actual file object before the first archive member is written.
309
300
310 kind is type of archive to create.
301 kind is type of archive to create.
311
302
312 decode tells whether to put files through decode filters from
303 decode tells whether to put files through decode filters from
313 hgrc.
304 hgrc.
314
305
315 match is a matcher to filter names of files to write to archive.
306 match is a matcher to filter names of files to write to archive.
316
307
317 prefix is name of path to put before every archive member.
308 prefix is name of path to put before every archive member.
318
309
319 mtime is the modified time, in seconds, or None to use the changeset time.
310 mtime is the modified time, in seconds, or None to use the changeset time.
320
311
321 subrepos tells whether to include subrepos.
312 subrepos tells whether to include subrepos.
322 """
313 """
323
314
324 if kind == b'files':
315 if kind == b'files':
325 if prefix:
316 if prefix:
326 raise error.Abort(_(b'cannot give prefix when archiving to files'))
317 raise error.Abort(_(b'cannot give prefix when archiving to files'))
327 else:
318 else:
328 prefix = tidyprefix(dest, kind, prefix)
319 prefix = tidyprefix(dest, kind, prefix)
329
320
330 archiver = None
321 archiver = None
331 ctx = repo[node]
322 ctx = repo[node]
332
323
333 def opencallback():
324 def opencallback():
334 """Return the archiver instance, creating it if necessary.
325 """Return the archiver instance, creating it if necessary.
335
326
336 This function is called when the first actual entry is created.
327 This function is called when the first actual entry is created.
337 It may be called multiple times from different layers.
328 It may be called multiple times from different layers.
338 When serving the archive via hgweb, no errors should happen after
329 When serving the archive via hgweb, no errors should happen after
339 this point.
330 this point.
340 """
331 """
341 nonlocal archiver
332 nonlocal archiver
342 if archiver is None:
333 if archiver is None:
343 if callable(dest):
334 if callable(dest):
344 output = dest()
335 output = dest()
345 else:
336 else:
346 output = dest
337 output = dest
347 archiver = archivers[kind](output, mtime or ctx.date()[0])
338 archiver = archivers[kind](output, mtime or ctx.date()[0])
348 assert archiver is not None
339 assert archiver is not None
349
340
350 if repo.ui.configbool(b"ui", b"archivemeta"):
341 if repo.ui.configbool(b"ui", b"archivemeta"):
351 metaname = b'.hg_archival.txt'
342 metaname = b'.hg_archival.txt'
352 if match(metaname):
343 if match(metaname):
353 write(metaname, 0o644, False, lambda: buildmetadata(ctx))
344 write(metaname, 0o644, False, lambda: buildmetadata(ctx))
354 return archiver
345 return archiver
355
346
356 def write(name, mode, islink, getdata):
347 def write(name, mode, islink, getdata):
357 if archiver is None:
348 if archiver is None:
358 opencallback()
349 opencallback()
359 assert archiver is not None, "archive should be opened by now"
350 assert archiver is not None, "archive should be opened by now"
360
351
361 data = getdata()
352 data = getdata()
362 if decode:
353 if decode:
363 data = repo.wwritedata(name, data)
354 data = repo.wwritedata(name, data)
364 archiver.addfile(prefix + name, mode, islink, data)
355 archiver.addfile(prefix + name, mode, islink, data)
365
356
366 if kind not in archivers:
357 if kind not in archivers:
367 raise error.Abort(_(b"unknown archive type '%s'") % kind)
358 raise error.Abort(_(b"unknown archive type '%s'") % kind)
368
359
369 if not match:
360 if not match:
370 match = scmutil.matchall(repo)
361 match = scmutil.matchall(repo)
371
362
372 files = list(ctx.manifest().walk(match))
363 files = list(ctx.manifest().walk(match))
373 total = len(files)
364 total = len(files)
374 if total:
365 if total:
375 files.sort()
366 files.sort()
376 scmutil.prefetchfiles(
367 scmutil.prefetchfiles(
377 repo, [(ctx.rev(), scmutil.matchfiles(repo, files))]
368 repo, [(ctx.rev(), scmutil.matchfiles(repo, files))]
378 )
369 )
379 progress = repo.ui.makeprogress(
370 progress = repo.ui.makeprogress(
380 _(b'archiving'), unit=_(b'files'), total=total
371 _(b'archiving'), unit=_(b'files'), total=total
381 )
372 )
382 progress.update(0)
373 progress.update(0)
383 for f in files:
374 for f in files:
384 ff = ctx.flags(f)
375 ff = ctx.flags(f)
385 write(f, b'x' in ff and 0o755 or 0o644, b'l' in ff, ctx[f].data)
376 write(f, b'x' in ff and 0o755 or 0o644, b'l' in ff, ctx[f].data)
386 progress.increment(item=f)
377 progress.increment(item=f)
387 progress.complete()
378 progress.complete()
388
379
389 if subrepos:
380 if subrepos:
390 for subpath in sorted(ctx.substate):
381 for subpath in sorted(ctx.substate):
391 sub = ctx.workingsub(subpath)
382 sub = ctx.workingsub(subpath)
392 submatch = matchmod.subdirmatcher(subpath, match)
383 submatch = matchmod.subdirmatcher(subpath, match)
393 subprefix = prefix + subpath + b'/'
384 subprefix = prefix + subpath + b'/'
394 total += sub.archive(opencallback, subprefix, submatch, decode)
385 total += sub.archive(opencallback, subprefix, submatch, decode)
395
386
396 if total == 0:
387 if total == 0:
397 raise error.Abort(_(b'no files match the archive pattern'))
388 raise error.Abort(_(b'no files match the archive pattern'))
398
389
399 assert archiver is not None, "archive should have been opened before"
390 assert archiver is not None, "archive should have been opened before"
400 archiver.done()
391 archiver.done()
401 return total
392 return total
General Comments 0
You need to be logged in to leave comments. Login now