Skip to content

Commit bb911a2

Browse files
ampleyflyblurb-it[bot]ethanfurman
authored
gh-75707: tarfile: Add optional open() argument "mtime" (GH-138117)
This makes it possible to set the gzip header mtime field without overriding time.time(), making it useful when creating reproducible archives. * πŸ“œπŸ€– Added by blurb_it. --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Ethan Furman <ethan@stoneleaf.us>
1 parent 91e871a commit bb911a2

4 files changed

Lines changed: 43 additions & 7 deletions

File tree

β€ŽDoc/library/tarfile.rstβ€Ž

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ Some facts and figures:
142142
a Zstandard dictionary used to improve compression of smaller amounts of
143143
data.
144144

145+
For modes ``'w:gz'`` and ``'w|gz'``, :func:`tarfile.open` accepts the
146+
keyword argument *mtime* to create a gzip archive header with that mtime. By
147+
default, the mtime is set to the time of creation of the archive.
148+
145149
For special purposes, there is a second format for *mode*:
146150
``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile`
147151
object that processes its data as a stream of blocks. No random seeking will

β€ŽLib/tarfile.pyβ€Ž

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ class _Stream:
337337
"""
338338

339339
def __init__(self, name, mode, comptype, fileobj, bufsize,
340-
compresslevel, preset):
340+
compresslevel, preset, mtime):
341341
"""Construct a _Stream object.
342342
"""
343343
self._extfileobj = True
@@ -372,7 +372,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize,
372372
self.exception = zlib.error
373373
self._init_read_gz()
374374
else:
375-
self._init_write_gz(compresslevel)
375+
self._init_write_gz(compresslevel, mtime)
376376

377377
elif comptype == "bz2":
378378
try:
@@ -421,15 +421,17 @@ def __del__(self):
421421
if hasattr(self, "closed") and not self.closed:
422422
self.close()
423423

424-
def _init_write_gz(self, compresslevel):
424+
def _init_write_gz(self, compresslevel, mtime):
425425
"""Initialize for writing with gzip compression.
426426
"""
427427
self.cmp = self.zlib.compressobj(compresslevel,
428428
self.zlib.DEFLATED,
429429
-self.zlib.MAX_WBITS,
430430
self.zlib.DEF_MEM_LEVEL,
431431
0)
432-
timestamp = struct.pack("<L", int(time.time()))
432+
if mtime is None:
433+
mtime = int(time.time())
434+
timestamp = struct.pack("<L", mtime)
433435
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
434436
if self.name.endswith(".gz"):
435437
self.name = self.name[:-3]
@@ -1745,7 +1747,7 @@ class TarFile(object):
17451747
def __init__(self, name=None, mode="r", fileobj=None, format=None,
17461748
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
17471749
errors="surrogateescape", pax_headers=None, debug=None,
1748-
errorlevel=None, copybufsize=None, stream=False):
1750+
errorlevel=None, copybufsize=None, stream=False, mtime=None):
17491751
"""Open an (uncompressed) tar archive 'name'. 'mode' is either 'r' to
17501752
read from an existing archive, 'a' to append data to an existing
17511753
file or 'w' to create a new file overwriting an existing one. 'mode'
@@ -1951,8 +1953,9 @@ def not_compressed(comptype):
19511953

19521954
compresslevel = kwargs.pop("compresslevel", 6)
19531955
preset = kwargs.pop("preset", None)
1956+
mtime = kwargs.pop("mtime", None)
19541957
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1955-
compresslevel, preset)
1958+
compresslevel, preset, mtime)
19561959
try:
19571960
t = cls(name, filemode, stream, **kwargs)
19581961
except:
@@ -1988,7 +1991,8 @@ def gzopen(cls, name, mode="r", fileobj=None, compresslevel=6, **kwargs):
19881991
raise CompressionError("gzip module is not available") from None
19891992

19901993
try:
1991-
fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1994+
mtime = kwargs.pop("mtime", None)
1995+
fileobj = GzipFile(name, mode + "b", compresslevel, fileobj, mtime=mtime)
19921996
except OSError as e:
19931997
if fileobj is not None and mode == 'r':
19941998
raise ReadError("not a gzip file") from e

β€ŽLib/test/test_tarfile.pyβ€Ž

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import re
1111
import warnings
1212
import stat
13+
import time
1314

1415
import unittest
1516
import unittest.mock
@@ -1828,6 +1829,19 @@ def test_source_directory_not_leaked(self):
18281829
payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
18291830
assert os.path.dirname(tmpname) not in payload
18301831

1832+
def test_create_with_mtime(self):
1833+
tarfile.open(tmpname, self.mode, mtime=0).close()
1834+
with self.open(tmpname, 'r') as fobj:
1835+
fobj.read()
1836+
self.assertEqual(fobj.mtime, 0)
1837+
1838+
def test_create_without_mtime(self):
1839+
before = int(time.time())
1840+
tarfile.open(tmpname, self.mode).close()
1841+
after = int(time.time())
1842+
with self.open(tmpname, 'r') as fobj:
1843+
fobj.read()
1844+
self.assertTrue(before <= fobj.mtime <= after)
18311845

18321846
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
18331847
decompressor = bz2.BZ2Decompressor if bz2 else None
@@ -2134,6 +2148,19 @@ def test_create_with_compresslevel(self):
21342148
with tarfile.open(tmpname, 'r:gz', compresslevel=1) as tobj:
21352149
pass
21362150

2151+
def test_create_with_mtime(self):
2152+
tarfile.open(tmpname, self.mode, mtime=0).close()
2153+
with self.open(tmpname, 'rb') as fobj:
2154+
fobj.read()
2155+
self.assertEqual(fobj.mtime, 0)
2156+
2157+
def test_create_without_mtime(self):
2158+
before = int(time.time())
2159+
tarfile.open(tmpname, self.mode).close()
2160+
after = int(time.time())
2161+
with self.open(tmpname, 'r') as fobj:
2162+
fobj.read()
2163+
self.assertTrue(before <= fobj.mtime <= after)
21372164

21382165
class Bz2CreateTest(Bz2Test, CreateTest):
21392166

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add optional ``mtime`` argument to :func:`tarfile.open`, for setting the ``mtime`` header field in ``.tar.gz`` archives.

0 commit comments

Comments
Β (0)