diff options
author | Zachary Vance <za3k@za3k.com> | 2019-03-10 19:12:19 -0700 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2019-03-28 13:37:07 +0200 |
commit | ef0f882382f6faac51aa343193f8d740dff91512 (patch) | |
tree | 033ce1f884495740c6050a4d07d9b4125253865f | |
parent | 66162927ebdfe9dd4ef570a132663fd76217952f (diff) | |
download | tar-ef0f882382f6faac51aa343193f8d740dff91512.tar.gz tar-ef0f882382f6faac51aa343193f8d740dff91512.tar.bz2 |
POSIX extended format headers do not include PID by default
The intent is to make binary-equivalent PAX archives easy to create. If
POSIXLY_CORRECT is set, the POSIX standard default is used, which embeds
the pid.
* src/common.h (posixly_correct): New global.
* src/tar.c (decode_options): Detect the POSIXLY_CORRECT environment
variable.
* src/buffer.c (add_chunk_header): Change filenames of multipart files to
omit the pid.
* src/xheader.c (HEADER_TEMPLATE): New macro.
(xheader_xhdr_name, xheader_ghdr_name): Use HEADER_TEMPLATE to select
the template for the POSIX extended header name.
* doc/tar.texi: Document the change.
Signed-off-by: Zachary Vance <za3k@za3k.com>
-rw-r--r-- | doc/tar.texi | 66 | ||||
-rw-r--r-- | src/buffer.c | 2 | ||||
-rw-r--r-- | src/common.h | 3 | ||||
-rw-r--r-- | src/tar.c | 2 | ||||
-rw-r--r-- | src/xheader.c | 27 |
5 files changed, 73 insertions, 27 deletions
diff --git a/doc/tar.texi b/doc/tar.texi index 71318f3e..0296c429 100644 --- a/doc/tar.texi +++ b/doc/tar.texi @@ -10458,9 +10458,16 @@ If no option @samp{exthdr.name=string} is specified, @command{tar} will use the following default value: @smallexample -%d/PaxHeaders.%p/%f +%d/PaxHeaders/%f @end smallexample +This default is selected to ensure the reproducibility of the +archive. @acronym{POSIX} standard recommends to use +@samp{%d/PaxHeaders.%p/%f} instead, which means the two archives +created with the same set of options and containing the same set +of files will be byte-to-byte different. This default will be used +if the environment variable @env{POSIXLY_CORRECT} is set. + @item exthdr.mtime=@var{value} This keyword defines the value of the @samp{mtime} field that @@ -10490,11 +10497,17 @@ If no option @samp{globexthdr.name=string} is specified, @command{tar} will use the following default value: @smallexample +$TMPDIR/GlobalHead.%n +@end smallexample + +If the environment variable @env{POSIXLY_CORRECT} is set, the +following value is used instead: + +@smallexample $TMPDIR/GlobalHead.%p.%n @end smallexample -@noindent -where @samp{$TMPDIR} represents the value of the @var{TMPDIR} +In both cases, @samp{$TMPDIR} stands for the value of the @var{TMPDIR} environment variable. If @var{TMPDIR} is not set, @command{tar} uses @samp{/tmp}. @@ -10557,7 +10570,7 @@ archives created using it, will be binary equivalent if they have the same contents: @smallexample ---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0 +--pax-option=atime:=0 @end smallexample @noindent @@ -10566,14 +10579,27 @@ from them, you will also need to eliminate changes due to ctime, as shown in examples below: @smallexample ---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0,ctime:=0 +--pax-option=atime:=0,ctime:=0 @end smallexample @noindent or @smallexample ---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0,delete=ctime +--pax-option=atime:=0,delete=ctime +@end smallexample + +Notice, that if you create an archive in POSIX format (@pxref{posix}) +and the environment variable @env{POSIXLY_CORRECT} is set, then the +two archives created using the same options on the same set of files +will not be byte-to-byte equivalent even with the above option. This +is because the posix default for extended header names includes the +PID of the tar process, which is different at each run. To produce +byte-to-byte equivalent archives in this case, either unset +@env{POSIXLY_CORRECT}, or use the following option: + +@smallexample +---pax-option=exthdr.name=%d/PaxHeaders/%f,atime:=0,ctime:=0 @end smallexample @node Checksumming @@ -10699,7 +10725,7 @@ GNU extensions. More specifically, the very first part retains its original name, and all subsequent parts are named using the pattern: @smallexample -%d/GNUFileParts.%p/%f.%n +%d/GNUFileParts/%f.%n @end smallexample @noindent @@ -10718,13 +10744,12 @@ created the archive. @end multitable For example, if the file @file{var/longfile} was split during archive -creation between three volumes, and the creator @command{tar} process -had process @acronym{ID} @samp{27962}, then the member names will be: +creation between three volumes, then the member names will be: @smallexample var/longfile -var/GNUFileParts.27962/longfile.1 -var/GNUFileParts.27962/longfile.2 +var/GNUFileParts/longfile.1 +var/GNUFileParts/longfile.2 @end smallexample When you extract your archive using a third-party @command{tar}, these @@ -10735,9 +10760,9 @@ the proper order, for example: @smallexample @group $ @kbd{cd var} -$ @kbd{cat GNUFileParts.27962/longfile.1 \ - GNUFileParts.27962/longfile.2 >> longfile} -$ rm -f GNUFileParts.27962 +$ @kbd{cat GNUFileParts/longfile.1 \ + GNUFileParts/longfile.2 >> longfile} +$ rm -f GNUFileParts @end group @end smallexample @@ -10763,12 +10788,12 @@ more warnings and more files generated on your disk, e.g.: @smallexample @group $ @kbd{tar xf vol-1.tar} -var/PaxHeaders.27962/longfile: Unknown file type 'x', extracted as +var/PaxHeaders/longfile: Unknown file type 'x', extracted as normal file Unexpected EOF in archive $ @kbd{tar xf vol-2.tar} -tmp/GlobalHead.27962.1: Unknown file type 'g', extracted as normal file -GNUFileParts.27962/PaxHeaders.27962/sparsefile.1: Unknown file type +tmp/GlobalHead.1: Unknown file type 'g', extracted as normal file +GNUFileParts/PaxHeaders/sparsefile.1: Unknown file type 'x', extracted as normal file @end group @end smallexample @@ -10884,8 +10909,8 @@ use. Continuing our example: @smallexample @group -$ @kbd{xsparse -v -x /home/gray/PaxHeaders.6058/sparsefile \ - /home/gray/GNUSparseFile.6058/sparsefile} +$ @kbd{xsparse -v -x /home/gray/PaxHeaders/sparsefile \ + /home/gray/GNUSparseFile/sparsefile} Reading extended header file Found variable GNU.sparse.major = 1 Found variable GNU.sparse.minor = 0 @@ -10915,8 +10940,7 @@ If you use a @command{tar} implementation that does not support PAX format, extended headers for each member will be extracted as a separate file. If we represent the member name as @file{@var{dir}/@var{name}}, then the extended header file will be -named @file{@var{dir}/@/PaxHeaders.@var{n}/@/@var{name}}, where -@var{n} is an integer number. +named @file{@var{dir}/@/PaxHeaders/@/@var{name}}. Things become more difficult if your @command{tar} implementation does support PAX headers, because in this case you will have to diff --git a/src/buffer.c b/src/buffer.c index 7781009a..151710ce 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1731,7 +1731,7 @@ add_chunk_header (struct bufmap *map) st.stat.st_uid = getuid (); st.stat.st_gid = getgid (); st.orig_file_name = xheader_format_name (&st, - "%d/GNUFileParts.%p/%f.%n", + "%d/GNUFileParts/%f.%n", volno); st.file_name = st.orig_file_name; st.archive_file_size = st.stat.st_size = map->sizeleft; diff --git a/src/common.h b/src/common.h index 501e0d3a..2396e767 100644 --- a/src/common.h +++ b/src/common.h @@ -340,6 +340,9 @@ GLOBAL const char *volume_label_option; /* Other global variables. */ +/* Force POSIX-compliance */ +GLOBAL bool posixly_correct; + /* File descriptor for archive file. */ GLOBAL int archive; @@ -2230,6 +2230,8 @@ decode_options (int argc, char **argv) args.version_control_string = 0; args.compress_autodetect = false; + posixly_correct = getenv ("POSIXLY_CORRECT") != NULL; + subcommand_option = UNKNOWN_SUBCOMMAND; archive_format = DEFAULT_FORMAT; blocking_factor = DEFAULT_BLOCKING; diff --git a/src/xheader.c b/src/xheader.c index 46bf4bbe..eaeea5d4 100644 --- a/src/xheader.c +++ b/src/xheader.c @@ -369,29 +369,46 @@ xheader_format_name (struct tar_stat_info *st, const char *fmt, size_t n) return buf; } +/* Table of templates for the names of POSIX extended headers. + Indexed by the the type of the header (per-file or global) + and POSIX compliance mode (0 or q depending on whether + POSIXLY_CORRECT environment variable is set. */ +static const char *header_template[][2] = { + /* Individual header templates: */ + { "%d/PaxHeaders/%f", "%d/PaxHeaders.%p/%f" }, + /* Global header templates: */ + { "/GlobalHead.%n", "/GlobalHead.%p.%n" } +}; +/* Indices to the above table */ +enum { + pax_file_header, + pax_global_header +}; +/* Return the name for the POSIX extended header T */ +#define HEADER_TEMPLATE(t) header_template[t][posixly_correct] + char * xheader_xhdr_name (struct tar_stat_info *st) { if (!exthdr_name) - assign_string (&exthdr_name, "%d/PaxHeaders.%p/%f"); + assign_string (&exthdr_name, HEADER_TEMPLATE (pax_file_header)); return xheader_format_name (st, exthdr_name, 0); } -#define GLOBAL_HEADER_TEMPLATE "/GlobalHead.%p.%n" - char * xheader_ghdr_name (void) { if (!globexthdr_name) { size_t len; + const char *global_header_template = HEADER_TEMPLATE (pax_global_header); const char *tmp = getenv ("TMPDIR"); if (!tmp) tmp = "/tmp"; - len = strlen (tmp) + sizeof (GLOBAL_HEADER_TEMPLATE); /* Includes nul */ + len = strlen (tmp) + strlen (global_header_template) + 1; globexthdr_name = xmalloc (len); strcpy(globexthdr_name, tmp); - strcat(globexthdr_name, GLOBAL_HEADER_TEMPLATE); + strcat(globexthdr_name, global_header_template); } return xheader_format_name (NULL, globexthdr_name, global_header_count + 1); |