##// END OF EJS Templates
archival: fsdecode paths before passing to tar or zip objects...
Augie Fackler -
r36724:bfe23afe default
parent child Browse files
Show More
@@ -1,360 +1,362 b''
1 # archival.py - revision archival for mercurial
1 # archival.py - revision archival for mercurial
2 #
2 #
3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import gzip
10 import gzip
11 import os
11 import os
12 import struct
12 import struct
13 import tarfile
13 import tarfile
14 import time
14 import time
15 import zipfile
15 import zipfile
16 import zlib
16 import zlib
17
17
18 from .i18n import _
18 from .i18n import _
19
19
20 from . import (
20 from . import (
21 error,
21 error,
22 formatter,
22 formatter,
23 match as matchmod,
23 match as matchmod,
24 pycompat,
24 scmutil,
25 scmutil,
25 util,
26 util,
26 vfs as vfsmod,
27 vfs as vfsmod,
27 )
28 )
28 stringio = util.stringio
29 stringio = util.stringio
29
30
30 # from unzip source code:
31 # from unzip source code:
31 _UNX_IFREG = 0x8000
32 _UNX_IFREG = 0x8000
32 _UNX_IFLNK = 0xa000
33 _UNX_IFLNK = 0xa000
33
34
34 def tidyprefix(dest, kind, prefix):
35 def tidyprefix(dest, kind, prefix):
35 '''choose prefix to use for names in archive. make sure prefix is
36 '''choose prefix to use for names in archive. make sure prefix is
36 safe for consumers.'''
37 safe for consumers.'''
37
38
38 if prefix:
39 if prefix:
39 prefix = util.normpath(prefix)
40 prefix = util.normpath(prefix)
40 else:
41 else:
41 if not isinstance(dest, bytes):
42 if not isinstance(dest, bytes):
42 raise ValueError('dest must be string if no prefix')
43 raise ValueError('dest must be string if no prefix')
43 prefix = os.path.basename(dest)
44 prefix = os.path.basename(dest)
44 lower = prefix.lower()
45 lower = prefix.lower()
45 for sfx in exts.get(kind, []):
46 for sfx in exts.get(kind, []):
46 if lower.endswith(sfx):
47 if lower.endswith(sfx):
47 prefix = prefix[:-len(sfx)]
48 prefix = prefix[:-len(sfx)]
48 break
49 break
49 lpfx = os.path.normpath(util.localpath(prefix))
50 lpfx = os.path.normpath(util.localpath(prefix))
50 prefix = util.pconvert(lpfx)
51 prefix = util.pconvert(lpfx)
51 if not prefix.endswith('/'):
52 if not prefix.endswith('/'):
52 prefix += '/'
53 prefix += '/'
53 # Drop the leading '.' path component if present, so Windows can read the
54 # Drop the leading '.' path component if present, so Windows can read the
54 # zip files (issue4634)
55 # zip files (issue4634)
55 if prefix.startswith('./'):
56 if prefix.startswith('./'):
56 prefix = prefix[2:]
57 prefix = prefix[2:]
57 if prefix.startswith('../') or os.path.isabs(lpfx) or '/../' in prefix:
58 if prefix.startswith('../') or os.path.isabs(lpfx) or '/../' in prefix:
58 raise error.Abort(_('archive prefix contains illegal components'))
59 raise error.Abort(_('archive prefix contains illegal components'))
59 return prefix
60 return prefix
60
61
61 exts = {
62 exts = {
62 'tar': ['.tar'],
63 'tar': ['.tar'],
63 'tbz2': ['.tbz2', '.tar.bz2'],
64 'tbz2': ['.tbz2', '.tar.bz2'],
64 'tgz': ['.tgz', '.tar.gz'],
65 'tgz': ['.tgz', '.tar.gz'],
65 'zip': ['.zip'],
66 'zip': ['.zip'],
66 }
67 }
67
68
68 def guesskind(dest):
69 def guesskind(dest):
69 for kind, extensions in exts.iteritems():
70 for kind, extensions in exts.iteritems():
70 if any(dest.endswith(ext) for ext in extensions):
71 if any(dest.endswith(ext) for ext in extensions):
71 return kind
72 return kind
72 return None
73 return None
73
74
74 def _rootctx(repo):
75 def _rootctx(repo):
75 # repo[0] may be hidden
76 # repo[0] may be hidden
76 for rev in repo:
77 for rev in repo:
77 return repo[rev]
78 return repo[rev]
78 return repo['null']
79 return repo['null']
79
80
80 # {tags} on ctx includes local tags and 'tip', with no current way to limit
81 # {tags} on ctx includes local tags and 'tip', with no current way to limit
81 # that to global tags. Therefore, use {latesttag} as a substitute when
82 # that to global tags. Therefore, use {latesttag} as a substitute when
82 # the distance is 0, since that will be the list of global tags on ctx.
83 # the distance is 0, since that will be the list of global tags on ctx.
83 _defaultmetatemplate = br'''
84 _defaultmetatemplate = br'''
84 repo: {root}
85 repo: {root}
85 node: {ifcontains(rev, revset("wdir()"), "{p1node}{dirty}", "{node}")}
86 node: {ifcontains(rev, revset("wdir()"), "{p1node}{dirty}", "{node}")}
86 branch: {branch|utf8}
87 branch: {branch|utf8}
87 {ifeq(latesttagdistance, 0, join(latesttag % "tag: {tag}", "\n"),
88 {ifeq(latesttagdistance, 0, join(latesttag % "tag: {tag}", "\n"),
88 separate("\n",
89 separate("\n",
89 join(latesttag % "latesttag: {tag}", "\n"),
90 join(latesttag % "latesttag: {tag}", "\n"),
90 "latesttagdistance: {latesttagdistance}",
91 "latesttagdistance: {latesttagdistance}",
91 "changessincelatesttag: {changessincelatesttag}"))}
92 "changessincelatesttag: {changessincelatesttag}"))}
92 '''[1:] # drop leading '\n'
93 '''[1:] # drop leading '\n'
93
94
94 def buildmetadata(ctx):
95 def buildmetadata(ctx):
95 '''build content of .hg_archival.txt'''
96 '''build content of .hg_archival.txt'''
96 repo = ctx.repo()
97 repo = ctx.repo()
97
98
98 opts = {
99 opts = {
99 'template': repo.ui.config('experimental', 'archivemetatemplate',
100 'template': repo.ui.config('experimental', 'archivemetatemplate',
100 _defaultmetatemplate)
101 _defaultmetatemplate)
101 }
102 }
102
103
103 out = util.stringio()
104 out = util.stringio()
104
105
105 fm = formatter.formatter(repo.ui, out, 'archive', opts)
106 fm = formatter.formatter(repo.ui, out, 'archive', opts)
106 fm.startitem()
107 fm.startitem()
107 fm.context(ctx=ctx)
108 fm.context(ctx=ctx)
108 fm.data(root=_rootctx(repo).hex())
109 fm.data(root=_rootctx(repo).hex())
109
110
110 if ctx.rev() is None:
111 if ctx.rev() is None:
111 dirty = ''
112 dirty = ''
112 if ctx.dirty(missing=True):
113 if ctx.dirty(missing=True):
113 dirty = '+'
114 dirty = '+'
114 fm.data(dirty=dirty)
115 fm.data(dirty=dirty)
115 fm.end()
116 fm.end()
116
117
117 return out.getvalue()
118 return out.getvalue()
118
119
119 class tarit(object):
120 class tarit(object):
120 '''write archive to tar file or stream. can write uncompressed,
121 '''write archive to tar file or stream. can write uncompressed,
121 or compress with gzip or bzip2.'''
122 or compress with gzip or bzip2.'''
122
123
123 class GzipFileWithTime(gzip.GzipFile):
124 class GzipFileWithTime(gzip.GzipFile):
124
125
125 def __init__(self, *args, **kw):
126 def __init__(self, *args, **kw):
126 timestamp = None
127 timestamp = None
127 if 'timestamp' in kw:
128 if 'timestamp' in kw:
128 timestamp = kw.pop(r'timestamp')
129 timestamp = kw.pop(r'timestamp')
129 if timestamp is None:
130 if timestamp is None:
130 self.timestamp = time.time()
131 self.timestamp = time.time()
131 else:
132 else:
132 self.timestamp = timestamp
133 self.timestamp = timestamp
133 gzip.GzipFile.__init__(self, *args, **kw)
134 gzip.GzipFile.__init__(self, *args, **kw)
134
135
135 def _write_gzip_header(self):
136 def _write_gzip_header(self):
136 self.fileobj.write('\037\213') # magic header
137 self.fileobj.write('\037\213') # magic header
137 self.fileobj.write('\010') # compression method
138 self.fileobj.write('\010') # compression method
138 fname = self.name
139 fname = self.name
139 if fname and fname.endswith('.gz'):
140 if fname and fname.endswith('.gz'):
140 fname = fname[:-3]
141 fname = fname[:-3]
141 flags = 0
142 flags = 0
142 if fname:
143 if fname:
143 flags = gzip.FNAME
144 flags = gzip.FNAME
144 self.fileobj.write(chr(flags))
145 self.fileobj.write(chr(flags))
145 gzip.write32u(self.fileobj, long(self.timestamp))
146 gzip.write32u(self.fileobj, long(self.timestamp))
146 self.fileobj.write('\002')
147 self.fileobj.write('\002')
147 self.fileobj.write('\377')
148 self.fileobj.write('\377')
148 if fname:
149 if fname:
149 self.fileobj.write(fname + '\000')
150 self.fileobj.write(fname + '\000')
150
151
151 def __init__(self, dest, mtime, kind=''):
152 def __init__(self, dest, mtime, kind=''):
152 self.mtime = mtime
153 self.mtime = mtime
153 self.fileobj = None
154 self.fileobj = None
154
155
155 def taropen(mode, name='', fileobj=None):
156 def taropen(mode, name='', fileobj=None):
156 if kind == 'gz':
157 if kind == 'gz':
157 mode = mode[0:1]
158 mode = mode[0:1]
158 if not fileobj:
159 if not fileobj:
159 fileobj = open(name, mode + 'b')
160 fileobj = open(name, mode + 'b')
160 gzfileobj = self.GzipFileWithTime(name, mode + 'b',
161 gzfileobj = self.GzipFileWithTime(name, mode + 'b',
161 zlib.Z_BEST_COMPRESSION,
162 zlib.Z_BEST_COMPRESSION,
162 fileobj, timestamp=mtime)
163 fileobj, timestamp=mtime)
163 self.fileobj = gzfileobj
164 self.fileobj = gzfileobj
164 return tarfile.TarFile.taropen(name, mode, gzfileobj)
165 return tarfile.TarFile.taropen(name, mode, gzfileobj)
165 else:
166 else:
166 return tarfile.open(name, mode + kind, fileobj)
167 return tarfile.open(name, mode + kind, fileobj)
167
168
168 if isinstance(dest, str):
169 if isinstance(dest, str):
169 self.z = taropen('w:', name=dest)
170 self.z = taropen('w:', name=dest)
170 else:
171 else:
171 self.z = taropen('w|', fileobj=dest)
172 self.z = taropen('w|', fileobj=dest)
172
173
173 def addfile(self, name, mode, islink, data):
174 def addfile(self, name, mode, islink, data):
175 name = pycompat.fsdecode(name)
174 i = tarfile.TarInfo(name)
176 i = tarfile.TarInfo(name)
175 i.mtime = self.mtime
177 i.mtime = self.mtime
176 i.size = len(data)
178 i.size = len(data)
177 if islink:
179 if islink:
178 i.type = tarfile.SYMTYPE
180 i.type = tarfile.SYMTYPE
179 i.mode = 0o777
181 i.mode = 0o777
180 i.linkname = data
182 i.linkname = pycompat.fsdecode(data)
181 data = None
183 data = None
182 i.size = 0
184 i.size = 0
183 else:
185 else:
184 i.mode = mode
186 i.mode = mode
185 data = stringio(data)
187 data = stringio(data)
186 self.z.addfile(i, data)
188 self.z.addfile(i, data)
187
189
188 def done(self):
190 def done(self):
189 self.z.close()
191 self.z.close()
190 if self.fileobj:
192 if self.fileobj:
191 self.fileobj.close()
193 self.fileobj.close()
192
194
193 class tellable(object):
195 class tellable(object):
194 '''provide tell method for zipfile.ZipFile when writing to http
196 '''provide tell method for zipfile.ZipFile when writing to http
195 response file object.'''
197 response file object.'''
196
198
197 def __init__(self, fp):
199 def __init__(self, fp):
198 self.fp = fp
200 self.fp = fp
199 self.offset = 0
201 self.offset = 0
200
202
201 def __getattr__(self, key):
203 def __getattr__(self, key):
202 return getattr(self.fp, key)
204 return getattr(self.fp, key)
203
205
204 def write(self, s):
206 def write(self, s):
205 self.fp.write(s)
207 self.fp.write(s)
206 self.offset += len(s)
208 self.offset += len(s)
207
209
208 def tell(self):
210 def tell(self):
209 return self.offset
211 return self.offset
210
212
211 class zipit(object):
213 class zipit(object):
212 '''write archive to zip file or stream. can write uncompressed,
214 '''write archive to zip file or stream. can write uncompressed,
213 or compressed with deflate.'''
215 or compressed with deflate.'''
214
216
215 def __init__(self, dest, mtime, compress=True):
217 def __init__(self, dest, mtime, compress=True):
216 if not isinstance(dest, str):
218 if not isinstance(dest, str):
217 try:
219 try:
218 dest.tell()
220 dest.tell()
219 except (AttributeError, IOError):
221 except (AttributeError, IOError):
220 dest = tellable(dest)
222 dest = tellable(dest)
221 self.z = zipfile.ZipFile(dest, r'w',
223 self.z = zipfile.ZipFile(pycompat.fsdecode(dest), r'w',
222 compress and zipfile.ZIP_DEFLATED or
224 compress and zipfile.ZIP_DEFLATED or
223 zipfile.ZIP_STORED)
225 zipfile.ZIP_STORED)
224
226
225 # Python's zipfile module emits deprecation warnings if we try
227 # Python's zipfile module emits deprecation warnings if we try
226 # to store files with a date before 1980.
228 # to store files with a date before 1980.
227 epoch = 315532800 # calendar.timegm((1980, 1, 1, 0, 0, 0, 1, 1, 0))
229 epoch = 315532800 # calendar.timegm((1980, 1, 1, 0, 0, 0, 1, 1, 0))
228 if mtime < epoch:
230 if mtime < epoch:
229 mtime = epoch
231 mtime = epoch
230
232
231 self.mtime = mtime
233 self.mtime = mtime
232 self.date_time = time.gmtime(mtime)[:6]
234 self.date_time = time.gmtime(mtime)[:6]
233
235
234 def addfile(self, name, mode, islink, data):
236 def addfile(self, name, mode, islink, data):
235 i = zipfile.ZipInfo(name, self.date_time)
237 i = zipfile.ZipInfo(pycompat.fsdecode(name), self.date_time)
236 i.compress_type = self.z.compression
238 i.compress_type = self.z.compression
237 # unzip will not honor unix file modes unless file creator is
239 # unzip will not honor unix file modes unless file creator is
238 # set to unix (id 3).
240 # set to unix (id 3).
239 i.create_system = 3
241 i.create_system = 3
240 ftype = _UNX_IFREG
242 ftype = _UNX_IFREG
241 if islink:
243 if islink:
242 mode = 0o777
244 mode = 0o777
243 ftype = _UNX_IFLNK
245 ftype = _UNX_IFLNK
244 i.external_attr = (mode | ftype) << 16
246 i.external_attr = (mode | ftype) << 16
245 # add "extended-timestamp" extra block, because zip archives
247 # add "extended-timestamp" extra block, because zip archives
246 # without this will be extracted with unexpected timestamp,
248 # without this will be extracted with unexpected timestamp,
247 # if TZ is not configured as GMT
249 # if TZ is not configured as GMT
248 i.extra += struct.pack('<hhBl',
250 i.extra += struct.pack('<hhBl',
249 0x5455, # block type: "extended-timestamp"
251 0x5455, # block type: "extended-timestamp"
250 1 + 4, # size of this block
252 1 + 4, # size of this block
251 1, # "modification time is present"
253 1, # "modification time is present"
252 int(self.mtime)) # last modification (UTC)
254 int(self.mtime)) # last modification (UTC)
253 self.z.writestr(i, data)
255 self.z.writestr(i, data)
254
256
255 def done(self):
257 def done(self):
256 self.z.close()
258 self.z.close()
257
259
258 class fileit(object):
260 class fileit(object):
259 '''write archive as files in directory.'''
261 '''write archive as files in directory.'''
260
262
261 def __init__(self, name, mtime):
263 def __init__(self, name, mtime):
262 self.basedir = name
264 self.basedir = name
263 self.opener = vfsmod.vfs(self.basedir)
265 self.opener = vfsmod.vfs(self.basedir)
264 self.mtime = mtime
266 self.mtime = mtime
265
267
266 def addfile(self, name, mode, islink, data):
268 def addfile(self, name, mode, islink, data):
267 if islink:
269 if islink:
268 self.opener.symlink(data, name)
270 self.opener.symlink(data, name)
269 return
271 return
270 f = self.opener(name, "w", atomictemp=True)
272 f = self.opener(name, "w", atomictemp=True)
271 f.write(data)
273 f.write(data)
272 f.close()
274 f.close()
273 destfile = os.path.join(self.basedir, name)
275 destfile = os.path.join(self.basedir, name)
274 os.chmod(destfile, mode)
276 os.chmod(destfile, mode)
275 if self.mtime is not None:
277 if self.mtime is not None:
276 os.utime(destfile, (self.mtime, self.mtime))
278 os.utime(destfile, (self.mtime, self.mtime))
277
279
278 def done(self):
280 def done(self):
279 pass
281 pass
280
282
281 archivers = {
283 archivers = {
282 'files': fileit,
284 'files': fileit,
283 'tar': tarit,
285 'tar': tarit,
284 'tbz2': lambda name, mtime: tarit(name, mtime, 'bz2'),
286 'tbz2': lambda name, mtime: tarit(name, mtime, 'bz2'),
285 'tgz': lambda name, mtime: tarit(name, mtime, 'gz'),
287 'tgz': lambda name, mtime: tarit(name, mtime, 'gz'),
286 'uzip': lambda name, mtime: zipit(name, mtime, False),
288 'uzip': lambda name, mtime: zipit(name, mtime, False),
287 'zip': zipit,
289 'zip': zipit,
288 }
290 }
289
291
290 def archive(repo, dest, node, kind, decode=True, matchfn=None,
292 def archive(repo, dest, node, kind, decode=True, matchfn=None,
291 prefix='', mtime=None, subrepos=False):
293 prefix='', mtime=None, subrepos=False):
292 '''create archive of repo as it was at node.
294 '''create archive of repo as it was at node.
293
295
294 dest can be name of directory, name of archive file, or file
296 dest can be name of directory, name of archive file, or file
295 object to write archive to.
297 object to write archive to.
296
298
297 kind is type of archive to create.
299 kind is type of archive to create.
298
300
299 decode tells whether to put files through decode filters from
301 decode tells whether to put files through decode filters from
300 hgrc.
302 hgrc.
301
303
302 matchfn is function to filter names of files to write to archive.
304 matchfn is function to filter names of files to write to archive.
303
305
304 prefix is name of path to put before every archive member.
306 prefix is name of path to put before every archive member.
305
307
306 mtime is the modified time, in seconds, or None to use the changeset time.
308 mtime is the modified time, in seconds, or None to use the changeset time.
307
309
308 subrepos tells whether to include subrepos.
310 subrepos tells whether to include subrepos.
309 '''
311 '''
310
312
311 if kind == 'files':
313 if kind == 'files':
312 if prefix:
314 if prefix:
313 raise error.Abort(_('cannot give prefix when archiving to files'))
315 raise error.Abort(_('cannot give prefix when archiving to files'))
314 else:
316 else:
315 prefix = tidyprefix(dest, kind, prefix)
317 prefix = tidyprefix(dest, kind, prefix)
316
318
317 def write(name, mode, islink, getdata):
319 def write(name, mode, islink, getdata):
318 data = getdata()
320 data = getdata()
319 if decode:
321 if decode:
320 data = repo.wwritedata(name, data)
322 data = repo.wwritedata(name, data)
321 archiver.addfile(prefix + name, mode, islink, data)
323 archiver.addfile(prefix + name, mode, islink, data)
322
324
323 if kind not in archivers:
325 if kind not in archivers:
324 raise error.Abort(_("unknown archive type '%s'") % kind)
326 raise error.Abort(_("unknown archive type '%s'") % kind)
325
327
326 ctx = repo[node]
328 ctx = repo[node]
327 archiver = archivers[kind](dest, mtime or ctx.date()[0])
329 archiver = archivers[kind](dest, mtime or ctx.date()[0])
328
330
329 if repo.ui.configbool("ui", "archivemeta"):
331 if repo.ui.configbool("ui", "archivemeta"):
330 name = '.hg_archival.txt'
332 name = '.hg_archival.txt'
331 if not matchfn or matchfn(name):
333 if not matchfn or matchfn(name):
332 write(name, 0o644, False, lambda: buildmetadata(ctx))
334 write(name, 0o644, False, lambda: buildmetadata(ctx))
333
335
334 if matchfn:
336 if matchfn:
335 files = [f for f in ctx.manifest().keys() if matchfn(f)]
337 files = [f for f in ctx.manifest().keys() if matchfn(f)]
336 else:
338 else:
337 files = ctx.manifest().keys()
339 files = ctx.manifest().keys()
338 total = len(files)
340 total = len(files)
339 if total:
341 if total:
340 files.sort()
342 files.sort()
341 scmutil.fileprefetchhooks(repo, ctx, files)
343 scmutil.fileprefetchhooks(repo, ctx, files)
342 repo.ui.progress(_('archiving'), 0, unit=_('files'), total=total)
344 repo.ui.progress(_('archiving'), 0, unit=_('files'), total=total)
343 for i, f in enumerate(files):
345 for i, f in enumerate(files):
344 ff = ctx.flags(f)
346 ff = ctx.flags(f)
345 write(f, 'x' in ff and 0o755 or 0o644, 'l' in ff, ctx[f].data)
347 write(f, 'x' in ff and 0o755 or 0o644, 'l' in ff, ctx[f].data)
346 repo.ui.progress(_('archiving'), i + 1, item=f,
348 repo.ui.progress(_('archiving'), i + 1, item=f,
347 unit=_('files'), total=total)
349 unit=_('files'), total=total)
348 repo.ui.progress(_('archiving'), None)
350 repo.ui.progress(_('archiving'), None)
349
351
350 if subrepos:
352 if subrepos:
351 for subpath in sorted(ctx.substate):
353 for subpath in sorted(ctx.substate):
352 sub = ctx.workingsub(subpath)
354 sub = ctx.workingsub(subpath)
353 submatch = matchmod.subdirmatcher(subpath, matchfn)
355 submatch = matchmod.subdirmatcher(subpath, matchfn)
354 total += sub.archive(archiver, prefix, submatch, decode)
356 total += sub.archive(archiver, prefix, submatch, decode)
355
357
356 if total == 0:
358 if total == 0:
357 raise error.Abort(_('no files match the archive pattern'))
359 raise error.Abort(_('no files match the archive pattern'))
358
360
359 archiver.done()
361 archiver.done()
360 return total
362 return total
General Comments 0
You need to be logged in to leave comments. Login now