retar.py
87 lines
| 2.0 KiB
| text/x-python
|
PythonLexer
/ tools / retar.py
Matthias Bussonnier
|
r25769 | """ | ||
Un-targz and retargz a targz file to ensure reproducible build. | ||||
usage: | ||||
$ export SOURCE_DATE_EPOCH=$(date +%s) | ||||
Matthias Bussonnier
|
r27713 | # or | ||
$ export SOURCE_DATE_EPOCH=$(git show -s --format=%ct HEAD) | ||||
Matthias Bussonnier
|
r25769 | ... | ||
$ python retar.py <tarfile.gz> | ||||
The process of creating an sdist can be non-reproducible: | ||||
- directory created during the process get a mtime of the creation date; | ||||
Dimitri Papadopoulos
|
r26875 | - gziping files embed the timestamp of zip creation. | ||
Matthias Bussonnier
|
r25769 | |||
This will untar-retar; ensuring that all mtime > SOURCE_DATE_EPOCH will be set | ||||
equal to SOURCE_DATE_EPOCH. | ||||
""" | ||||
import tarfile | ||||
import sys | ||||
import os | ||||
import gzip | ||||
import io | ||||
rushabh-v
|
r25998 | from pathlib import Path | ||
Matthias Bussonnier
|
r25769 | if len(sys.argv) > 2: | ||
raise ValueError("Too many arguments") | ||||
timestamp = int(os.environ["SOURCE_DATE_EPOCH"]) | ||||
rushabh-v
|
r25998 | path = Path(sys.argv[1]) | ||
Matthias Bussonnier
|
r25769 | old_buf = io.BytesIO() | ||
rushabh-v
|
r25998 | with open(path, "rb") as f: | ||
Matthias Bussonnier
|
r25769 | old_buf.write(f.read()) | ||
old_buf.seek(0) | ||||
Matthias Bussonnier
|
r27316 | if path.name.endswith("gz"): | ||
r_mode = "r:gz" | ||||
Matthias Bussonnier
|
r27317 | if path.name.endswith("bz2"): | ||
r_mode = "r:bz2" | ||||
if path.name.endswith("xz"): | ||||
raise ValueError("XZ is deprecated but it's written nowhere") | ||||
Matthias Bussonnier
|
r27316 | old = tarfile.open(fileobj=old_buf, mode=r_mode) | ||
Matthias Bussonnier
|
r25769 | |||
buf = io.BytesIO() | ||||
new = tarfile.open(fileobj=buf, mode="w", format=tarfile.GNU_FORMAT) | ||||
for i, m in enumerate(old): | ||||
data = None | ||||
# mutation does not work, copy | ||||
if m.name.endswith('.DS_Store'): | ||||
continue | ||||
m2 = tarfile.TarInfo(m.name) | ||||
m2.mtime = min(timestamp, m.mtime) | ||||
Matthias Bussonnier
|
r27316 | m2.pax_headers["mtime"] = m2.mtime | ||
Matthias Bussonnier
|
r25769 | m2.size = m.size | ||
m2.type = m.type | ||||
m2.linkname = m.linkname | ||||
Matthias Bussonnier
|
r25866 | m2.mode = m.mode | ||
Matthias Bussonnier
|
r25769 | if m.isdir(): | ||
Matthias Bussonnier
|
r25795 | new.addfile(m2) | ||
else: | ||||
Matthias Bussonnier
|
r25769 | data = old.extractfile(m) | ||
new.addfile(m2, data) | ||||
new.close() | ||||
old.close() | ||||
buf.seek(0) | ||||
Matthias Bussonnier
|
r27316 | |||
if r_mode == "r:gz": | ||||
with open(path, "wb") as f: | ||||
with gzip.GzipFile("", "wb", fileobj=f, mtime=timestamp) as gzf: | ||||
gzf.write(buf.read()) | ||||
Matthias Bussonnier
|
r27317 | elif r_mode == "r:bz2": | ||
import bz2 | ||||
Matthias Bussonnier
|
r27316 | |||
Matthias Bussonnier
|
r27317 | with bz2.open(path, "wb") as f: | ||
Matthias Bussonnier
|
r27316 | f.write(buf.read()) | ||
else: | ||||
assert False | ||||
Matthias Bussonnier
|
r25795 | |||
# checks the archive is valid. | ||||
rushabh-v
|
r25998 | archive = tarfile.open(path) | ||
Matthias Bussonnier
|
r25795 | names = archive.getnames() | ||