diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2017-05-23 10:59:02 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2017-05-23 11:16:58 +0300 |
commit | e184e9cc590cd702d79e983c033df0d60a49f3df (patch) | |
tree | 29dd14b91a3331edefe7f421a4538f25c1860e52 /lib/App | |
parent | ba211551ca112afe9d19221c60afba0ce493526c (diff) | |
download | glacier-e184e9cc590cd702d79e983c033df0d60a49f3df.tar.gz glacier-e184e9cc590cd702d79e983c033df0d60a49f3df.tar.bz2 |
Improve transfer configuration
* lib/App/Glacier/Command.pm (ck_size): New function.
New configuration file sections: [transfer], [transfer upload],
and [transfer download] configure number of jobs and maximal
size for single-part transfer operation.
(cf_transfer_param, dry_run): New methods.
(glacier_eval): Fix context evaluation.
* lib/App/Glacier/Command/Get.pm: Rewrite using new cinfiguration
parameters. Remove the --part-size option. Honor the dry_run
setting.
* lib/App/Glacier/Command/Put.pm: : Rewrite using new cinfiguration
parameters. Honor the dry_run setting.
Diffstat (limited to 'lib/App')
-rw-r--r-- | lib/App/Glacier/Command.pm | 53 | ||||
-rw-r--r-- | lib/App/Glacier/Command/Get.pm | 96 | ||||
-rw-r--r-- | lib/App/Glacier/Command/Put.pm | 13 |
3 files changed, 112 insertions, 50 deletions
diff --git a/lib/App/Glacier/Command.pm b/lib/App/Glacier/Command.pm index e46ed6e..515ec31 100644 --- a/lib/App/Glacier/Command.pm +++ b/lib/App/Glacier/Command.pm @@ -59,6 +59,8 @@ use constant { EX_CONFIG => 78 }; +use constant MB => 1024*1024; + sub ck_number { my ($vref) = @_; return "not a number" @@ -66,6 +68,23 @@ sub ck_number { return undef; } +sub ck_size { + my ($vref) = @_; + if ($$vref =~ /^(\d+)\s*([kKmMgG])?$/) { + my $size = $1; + if ($2) { + my $suf = lc $2; + foreach my $m (qw(k m g)) { + $size *= 1024; + last if $m eq $suf; + } + } + $$vref = $size; + } else { + return 'invalid size specification'; + } +} + my %parameters = ( glacier => { section => { @@ -73,8 +92,24 @@ my %parameters = ( access => 1, secret => 1, region => 1, - 'multipart-upload-size' => { default => 100*1024*1024, - check => \&ck_number }, + } + }, + transfer => { + section => { + 'single-part-size' => { default => 100*MB, check => \&ck_size }, + 'jobs' => { default => 16, check => \&ck_number }, + upload => { + section => { + 'single-part-size' => { check => \&ck_size }, + 'jobs' => { check => \&ck_number }, + } + }, + download => { + section => { + 'single-part-size' => { check => \&ck_size }, + 'jobs' => { check => \&ck_number }, + } + } } }, database => { @@ -259,6 +294,12 @@ sub cfget { return $self->config->get(@path); } +sub cf_transfer_param { + my ($self, $type, $param) = @_; + return $self->cfget('transfer', $type, $param) + || $self->cfget('transfer', $param); +} + sub error { my ($self, @msg) = @_; print STDERR "$self->{_progname}: " if $self->{_progname}; @@ -274,6 +315,11 @@ sub debug { } } +sub dry_run { + my $self = shift; + return $self->{_dry_run}; +} + sub abend { my ($self, $code, @msg) = @_; $self->error(@msg); @@ -288,8 +334,9 @@ sub run { sub glacier_eval { my $self = shift; my $method = shift; + my $wantarray = wantarray; my $ret = http_catch(sub { - wantarray ? [ $self->{_glacier}->${\$method}(@_) ] + $wantarray ? [ $self->{_glacier}->${\$method}(@_) ] : $self->{_glacier}->${\$method}(@_) }, err => \my %err, diff --git a/lib/App/Glacier/Command/Get.pm b/lib/App/Glacier/Command/Get.pm index 09e739f..5666d36 100644 --- a/lib/App/Glacier/Command/Get.pm +++ b/lib/App/Glacier/Command/Get.pm @@ -15,13 +15,11 @@ glacier get - download file from a vault B<glacier put> [B<-fit>] -[B<-s> I<N>] [B<--force>] [B<--interactive>] [B<-j> I<NJOBS>] [B<--jobs=>I<NJOBS>] [B<--no-clobber>] -[B<--part-size=>I<N>] [B<--test>] I<VAULT> I<FILE> @@ -42,7 +40,6 @@ sub getopt { 'interactive|i' => sub { $self->{_options}{ifexists} = IFEXISTS_ASK }, 'force|f' => sub { $self->{_options}{ifexists} = IFEXISTS_OVERWRITE }, 'no-clobber|f' => sub { $self->{_options}{ifexists} = IFEXISTS_KEEP }, - 'part-size|s=i' => \$self->{_options}{part_size}, 'jobs|j=i' => \$self->{_options}{jobs}, 'test|t' => \$self->{_options}{test}, %opts); @@ -95,7 +92,12 @@ sub run { } if ($job->is_completed) { - $self->download($job, $localname); + my $tree_hash = $self->download($job, $localname); + if (!$self->dry_run + && $tree_hash ne $job->get('ArchiveSHA256TreeHash')) { + unlink $localname; + $self->abend(EX_SOFTWARE, "downloaded file is corrupt"); + } } else { my ($status, $message) = $job->status; if ($status eq 'InProgress') { @@ -117,50 +119,75 @@ use constant TWOMB => 2*MB; sub download { my ($self, $job, $localname) = @_; + my $archive_size = $job->get('ArchiveSizeInBytes'); + if ($archive_size < $self->cf_transfer_param(qw(download single-part-size))) { + # simple download + $self->_download_simple($job, $localname); + } else { + $self->_download_multipart($job, $localname); + } +} - use threads; - use threads::shared; - - my $fd; - open($fd, '>', $localname) +sub _open_output { + my ($self, $localname) = @_; + open(my $fd, '>', $localname) or $self->abort(EX_FAILURE, "can't open $localname: $!"); binmode($fd); truncate($fd, 0); + return $fd; +} + +sub _download_simple { + my ($self, $job, $localname) = @_; + + $self->debug(1, "downloading", $job->file_name(1), "in single part"); + return if $self->dry_run; + my $fd = $self->_open_output($localname); + my ($res, $tree_hash) = $self->glacier_eval('get_job_output', + $job->vault, $job->id); + if ($self->lasterr) { + $self->abend(EX_FAILURE, "downoad failed: ", + $self->last_error_message); + } + syswrite($fd, $res); + close($fd); + return $tree_hash; +} + +sub _download_multipart { + my ($self, $job, $localname) = @_; - my $archive_size = $job->get('ArchiveSizeInBytes'); + use threads; + use threads::shared; + + my $glacier = $self->{_glacier}; - my $njobs; + my $tree_hash; + + my $njobs = $self->{_options}{jobs} + || $self->cf_transfer_param(qw(download jobs)); + + my $archive_size = $job->get('ArchiveSizeInBytes'); my $part_size; - if ($self->{_options}{jobs}) { - $njobs = $self->{_options}{jobs}; - # Compute approximate part size - $part_size = int(($archive_size + $njobs - 1) / $njobs); - } else { - $part_size = ($self->{_options}{part_size} || 10 * MB); - } + # Compute approximate part size + $part_size = int(($archive_size + $njobs - 1) / $njobs); # Make sure the chunk is Tree-Hash aligned # http://docs.aws.amazon.com/amazonglacier/latest/dev/checksum-calculations-range.html?shortFooter=true#checksum-calculations-upload-archive-with-ranges - $part_size = TWOMB * 2 ** int(log($part_size / TWOMB) / log(2)); + $part_size = TWOMB * 2 ** int(log($part_size / TWOMB) / log(2) + 1); # Adjust the number of jobs $njobs = int(($archive_size + $part_size - 1) / $part_size); - my $glacier = $self->{_glacier}; - - my $tree_hash; - - if ($njobs <= 1 || $archive_size < $part_size) { - # simple download - my $res; - $self->debug(1, "downloading", $job->file_name(1), "in single part"); - ($res, $tree_hash) = $glacier->get_job_output($job->vault, $job->id); - syswrite($fd, $res); + if ($njobs <= 1) { + return $self->_download_simple($job, $localname); } else { use Fcntl qw(SEEK_SET); $self->debug(1, "downloading", $job->file_name(1), "to $localname in chunks of $part_size bytes, in $njobs jobs"); + return if $self->dry_run; + my $fd = $self->_open_output($localname); my @part_hashes :shared = (); my $read_bytes; my $rest_size = $archive_size; @@ -189,16 +216,9 @@ sub download { # FIXME: error handling $thr->join() or croak "thread $thr failed"; } - $tree_hash = $glacier->_tree_hash_from_array_ref(\@part_hashes); - } - - close($fd); -# print $tree_hash. ' ', $job->get('ArchiveSHA256TreeHash') , "\n"; - if ($tree_hash ne $job->get('ArchiveSHA256TreeHash')) { - unlink $localname; - $self->abend(EX_SOFTWARE, "downloaded file is corrupt"); + close($fd); + return $glacier->_tree_hash_from_array_ref(\@part_hashes); } - print "finished\n"; } 1; diff --git a/lib/App/Glacier/Command/Put.pm b/lib/App/Glacier/Command/Put.pm index d0e6cc0..594c957 100644 --- a/lib/App/Glacier/Command/Put.pm +++ b/lib/App/Glacier/Command/Put.pm @@ -44,7 +44,7 @@ sub _upload { or $self->abend(EX_USAGE, "can't stat \"$localname\": $!"); my $dir = $self->directory($vaultname); - my $id = ($size < $self->cfget(qw(glacier multipart-upload-size))) + my $id = ($size < $self->cf_transfer_param(qw(upload single-part-size))) ? $self->_upload_simple($vaultname, $localname, $remotename) : $self->_upload_multipart($vaultname, $localname, $remotename); $self->debug(1, "ID $id\n"); @@ -89,20 +89,15 @@ sub _upload_multipart { my $total_parts = int(($archive_size + $part_size - 1) / $part_size); # Compute number of threads - my $njobs = $self->{_options}{jobs}; - if (defined($njobs)) { - # Allow at most so many jobs as there are parts - $njobs = $total_parts if $njobs > $total_parts; - } else { - # Select default. - $njobs = $total_parts < 16 ? $total_parts : 16; - } + my $njobs = $self->{_options}{jobs} + || $self->cf_transfer_param(qw(upload jobs)); # Number of parts to upload by each job; my $job_parts = int(($total_parts + $njobs - 1) / $njobs); $self->debug(1, "uploading $localname in chunks of $part_size bytes, in $njobs jobs"); + return if $self->dry_run; open(my $fd, '<', $localname) or $self->abort(EX_FAILURE, "can't open $localname: $!"); |