summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZachary Vance <za3k@za3k.com>2019-03-10 19:12:19 -0700
committerSergey Poznyakoff <gray@gnu.org>2019-03-28 13:37:07 +0200
commitef0f882382f6faac51aa343193f8d740dff91512 (patch)
tree033ce1f884495740c6050a4d07d9b4125253865f
parent66162927ebdfe9dd4ef570a132663fd76217952f (diff)
downloadtar-ef0f882382f6faac51aa343193f8d740dff91512.tar.gz
tar-ef0f882382f6faac51aa343193f8d740dff91512.tar.bz2
POSIX extended format headers do not include PID by default
The intent is to make binary-equivalent PAX archives easy to create. If POSIXLY_CORRECT is set, the POSIX standard default is used, which embeds the pid. * src/common.h (posixly_correct): New global. * src/tar.c (decode_options): Detect the POSIXLY_CORRECT environment variable. * src/buffer.c (add_chunk_header): Change filenames of multipart files to omit the pid. * src/xheader.c (HEADER_TEMPLATE): New macro. (xheader_xhdr_name, xheader_ghdr_name): Use HEADER_TEMPLATE to select the template for the POSIX extended header name. * doc/tar.texi: Document the change. Signed-off-by: Zachary Vance <za3k@za3k.com>
-rw-r--r--doc/tar.texi66
-rw-r--r--src/buffer.c2
-rw-r--r--src/common.h3
-rw-r--r--src/tar.c2
-rw-r--r--src/xheader.c27
5 files changed, 73 insertions, 27 deletions
diff --git a/doc/tar.texi b/doc/tar.texi
index 71318f3e..0296c429 100644
--- a/doc/tar.texi
+++ b/doc/tar.texi
@@ -10458,9 +10458,16 @@ If no option @samp{exthdr.name=string} is specified, @command{tar}
will use the following default value:
@smallexample
-%d/PaxHeaders.%p/%f
+%d/PaxHeaders/%f
@end smallexample
+This default is selected to ensure the reproducibility of the
+archive. @acronym{POSIX} standard recommends to use
+@samp{%d/PaxHeaders.%p/%f} instead, which means the two archives
+created with the same set of options and containing the same set
+of files will be byte-to-byte different. This default will be used
+if the environment variable @env{POSIXLY_CORRECT} is set.
+
@item exthdr.mtime=@var{value}
This keyword defines the value of the @samp{mtime} field that
@@ -10490,11 +10497,17 @@ If no option @samp{globexthdr.name=string} is specified, @command{tar}
will use the following default value:
@smallexample
+$TMPDIR/GlobalHead.%n
+@end smallexample
+
+If the environment variable @env{POSIXLY_CORRECT} is set, the
+following value is used instead:
+
+@smallexample
$TMPDIR/GlobalHead.%p.%n
@end smallexample
-@noindent
-where @samp{$TMPDIR} represents the value of the @var{TMPDIR}
+In both cases, @samp{$TMPDIR} stands for the value of the @var{TMPDIR}
environment variable. If @var{TMPDIR} is not set, @command{tar}
uses @samp{/tmp}.
@@ -10557,7 +10570,7 @@ archives created using it, will be binary equivalent if they have the
same contents:
@smallexample
---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0
+--pax-option=atime:=0
@end smallexample
@noindent
@@ -10566,14 +10579,27 @@ from them, you will also need to eliminate changes due to ctime, as
shown in examples below:
@smallexample
---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0,ctime:=0
+--pax-option=atime:=0,ctime:=0
@end smallexample
@noindent
or
@smallexample
---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0,delete=ctime
+--pax-option=atime:=0,delete=ctime
+@end smallexample
+
+Notice, that if you create an archive in POSIX format (@pxref{posix})
+and the environment variable @env{POSIXLY_CORRECT} is set, then the
+two archives created using the same options on the same set of files
+will not be byte-to-byte equivalent even with the above option. This
+is because the posix default for extended header names includes the
+PID of the tar process, which is different at each run. To produce
+byte-to-byte equivalent archives in this case, either unset
+@env{POSIXLY_CORRECT}, or use the following option:
+
+@smallexample
+---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0,ctime:=0
@end smallexample
@node Checksumming
@@ -10699,7 +10725,7 @@ GNU extensions. More specifically, the very first part retains its
original name, and all subsequent parts are named using the pattern:
@smallexample
-%d/GNUFileParts.%p/%f.%n
+%d/GNUFileParts/%f.%n
@end smallexample
@noindent
@@ -10718,13 +10744,12 @@ created the archive.
@end multitable
For example, if the file @file{var/longfile} was split during archive
-creation between three volumes, and the creator @command{tar} process
-had process @acronym{ID} @samp{27962}, then the member names will be:
+creation between three volumes, then the member names will be:
@smallexample
var/longfile
-var/GNUFileParts.27962/longfile.1
-var/GNUFileParts.27962/longfile.2
+var/GNUFileParts/longfile.1
+var/GNUFileParts/longfile.2
@end smallexample
When you extract your archive using a third-party @command{tar}, these
@@ -10735,9 +10760,9 @@ the proper order, for example:
@smallexample
@group
$ @kbd{cd var}
-$ @kbd{cat GNUFileParts.27962/longfile.1 \
- GNUFileParts.27962/longfile.2 >> longfile}
-$ rm -f GNUFileParts.27962
+$ @kbd{cat GNUFileParts/longfile.1 \
+ GNUFileParts/longfile.2 >> longfile}
+$ rm -f GNUFileParts
@end group
@end smallexample
@@ -10763,12 +10788,12 @@ more warnings and more files generated on your disk, e.g.:
@smallexample
@group
$ @kbd{tar xf vol-1.tar}
-var/PaxHeaders.27962/longfile: Unknown file type 'x', extracted as
+var/PaxHeaders/longfile: Unknown file type 'x', extracted as
normal file
Unexpected EOF in archive
$ @kbd{tar xf vol-2.tar}
-tmp/GlobalHead.27962.1: Unknown file type 'g', extracted as normal file
-GNUFileParts.27962/PaxHeaders.27962/sparsefile.1: Unknown file type
+tmp/GlobalHead.1: Unknown file type 'g', extracted as normal file
+GNUFileParts/PaxHeaders/sparsefile.1: Unknown file type
'x', extracted as normal file
@end group
@end smallexample
@@ -10884,8 +10909,8 @@ use. Continuing our example:
@smallexample
@group
-$ @kbd{xsparse -v -x /home/gray/PaxHeaders.6058/sparsefile \
- /home/gray/GNUSparseFile.6058/sparsefile}
+$ @kbd{xsparse -v -x /home/gray/PaxHeaders/sparsefile \
+ /home/gray/GNUSparseFile/sparsefile}
Reading extended header file
Found variable GNU.sparse.major = 1
Found variable GNU.sparse.minor = 0
@@ -10915,8 +10940,7 @@ If you use a @command{tar} implementation that does not support PAX
format, extended headers for each member will be extracted as a
separate file. If we represent the member name as
@file{@var{dir}/@var{name}}, then the extended header file will be
-named @file{@var{dir}/@/PaxHeaders.@var{n}/@/@var{name}}, where
-@var{n} is an integer number.
+named @file{@var{dir}/@/PaxHeaders/@/@var{name}}.
Things become more difficult if your @command{tar} implementation
does support PAX headers, because in this case you will have to
diff --git a/src/buffer.c b/src/buffer.c
index 7781009a..151710ce 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -1731,7 +1731,7 @@ add_chunk_header (struct bufmap *map)
st.stat.st_uid = getuid ();
st.stat.st_gid = getgid ();
st.orig_file_name = xheader_format_name (&st,
- "%d/GNUFileParts.%p/%f.%n",
+ "%d/GNUFileParts/%f.%n",
volno);
st.file_name = st.orig_file_name;
st.archive_file_size = st.stat.st_size = map->sizeleft;
diff --git a/src/common.h b/src/common.h
index 501e0d3a..2396e767 100644
--- a/src/common.h
+++ b/src/common.h
@@ -340,6 +340,9 @@ GLOBAL const char *volume_label_option;
/* Other global variables. */
+/* Force POSIX-compliance */
+GLOBAL bool posixly_correct;
+
/* File descriptor for archive file. */
GLOBAL int archive;
diff --git a/src/tar.c b/src/tar.c
index d66fed86..250f9f2d 100644
--- a/src/tar.c
+++ b/src/tar.c
@@ -2230,6 +2230,8 @@ decode_options (int argc, char **argv)
args.version_control_string = 0;
args.compress_autodetect = false;
+ posixly_correct = getenv ("POSIXLY_CORRECT") != NULL;
+
subcommand_option = UNKNOWN_SUBCOMMAND;
archive_format = DEFAULT_FORMAT;
blocking_factor = DEFAULT_BLOCKING;
diff --git a/src/xheader.c b/src/xheader.c
index 46bf4bbe..eaeea5d4 100644
--- a/src/xheader.c
+++ b/src/xheader.c
@@ -369,29 +369,46 @@ xheader_format_name (struct tar_stat_info *st, const char *fmt, size_t n)
return buf;
}
+/* Table of templates for the names of POSIX extended headers.
+ Indexed by the the type of the header (per-file or global)
+ and POSIX compliance mode (0 or q depending on whether
+ POSIXLY_CORRECT environment variable is set. */
+static const char *header_template[][2] = {
+ /* Individual header templates: */
+ { "%d/PaxHeaders/%f", "%d/PaxHeaders.%p/%f" },
+ /* Global header templates: */
+ { "/GlobalHead.%n", "/GlobalHead.%p.%n" }
+};
+/* Indices to the above table */
+enum {
+ pax_file_header,
+ pax_global_header
+};
+/* Return the name for the POSIX extended header T */
+#define HEADER_TEMPLATE(t) header_template[t][posixly_correct]
+
char *
xheader_xhdr_name (struct tar_stat_info *st)
{
if (!exthdr_name)
- assign_string (&exthdr_name, "%d/PaxHeaders.%p/%f");
+ assign_string (&exthdr_name, HEADER_TEMPLATE (pax_file_header));
return xheader_format_name (st, exthdr_name, 0);
}
-#define GLOBAL_HEADER_TEMPLATE "/GlobalHead.%p.%n"
-
char *
xheader_ghdr_name (void)
{
if (!globexthdr_name)
{
size_t len;
+ const char *global_header_template = HEADER_TEMPLATE (pax_global_header);
const char *tmp = getenv ("TMPDIR");
if (!tmp)
tmp = "/tmp";
- len = strlen (tmp) + sizeof (GLOBAL_HEADER_TEMPLATE); /* Includes nul */
+ len = strlen (tmp) + strlen (global_header_template) + 1;
globexthdr_name = xmalloc (len);
strcpy(globexthdr_name, tmp);
- strcat(globexthdr_name, GLOBAL_HEADER_TEMPLATE);
+ strcat(globexthdr_name, global_header_template);
}
return xheader_format_name (NULL, globexthdr_name, global_header_count + 1);

Return to:

Send suggestions and report system problems to the System administrator.