aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2017-05-23 10:59:02 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2017-05-23 11:16:58 +0300
commite184e9cc590cd702d79e983c033df0d60a49f3df (patch)
tree29dd14b91a3331edefe7f421a4538f25c1860e52 /lib
parentba211551ca112afe9d19221c60afba0ce493526c (diff)
downloadglacier-e184e9cc590cd702d79e983c033df0d60a49f3df.tar.gz
glacier-e184e9cc590cd702d79e983c033df0d60a49f3df.tar.bz2
Improve transfer configuration
* lib/App/Glacier/Command.pm (ck_size): New function. New configuration file sections: [transfer], [transfer upload], and [transfer download] configure number of jobs and maximal size for single-part transfer operation. (cf_transfer_param, dry_run): New methods. (glacier_eval): Fix context evaluation. * lib/App/Glacier/Command/Get.pm: Rewrite using new cinfiguration parameters. Remove the --part-size option. Honor the dry_run setting. * lib/App/Glacier/Command/Put.pm: : Rewrite using new cinfiguration parameters. Honor the dry_run setting.
Diffstat (limited to 'lib')
-rw-r--r--lib/App/Glacier/Command.pm53
-rw-r--r--lib/App/Glacier/Command/Get.pm96
-rw-r--r--lib/App/Glacier/Command/Put.pm13
3 files changed, 112 insertions, 50 deletions
diff --git a/lib/App/Glacier/Command.pm b/lib/App/Glacier/Command.pm
index e46ed6e..515ec31 100644
--- a/lib/App/Glacier/Command.pm
+++ b/lib/App/Glacier/Command.pm
@@ -59,6 +59,8 @@ use constant {
EX_CONFIG => 78
};
+use constant MB => 1024*1024;
+
sub ck_number {
my ($vref) = @_;
return "not a number"
@@ -66,6 +68,23 @@ sub ck_number {
return undef;
}
+sub ck_size {
+ my ($vref) = @_;
+ if ($$vref =~ /^(\d+)\s*([kKmMgG])?$/) {
+ my $size = $1;
+ if ($2) {
+ my $suf = lc $2;
+ foreach my $m (qw(k m g)) {
+ $size *= 1024;
+ last if $m eq $suf;
+ }
+ }
+ $$vref = $size;
+ } else {
+ return 'invalid size specification';
+ }
+}
+
my %parameters = (
glacier => {
section => {
@@ -73,8 +92,24 @@ my %parameters = (
access => 1,
secret => 1,
region => 1,
- 'multipart-upload-size' => { default => 100*1024*1024,
- check => \&ck_number },
+ }
+ },
+ transfer => {
+ section => {
+ 'single-part-size' => { default => 100*MB, check => \&ck_size },
+ 'jobs' => { default => 16, check => \&ck_number },
+ upload => {
+ section => {
+ 'single-part-size' => { check => \&ck_size },
+ 'jobs' => { check => \&ck_number },
+ }
+ },
+ download => {
+ section => {
+ 'single-part-size' => { check => \&ck_size },
+ 'jobs' => { check => \&ck_number },
+ }
+ }
}
},
database => {
@@ -259,6 +294,12 @@ sub cfget {
return $self->config->get(@path);
}
+sub cf_transfer_param {
+ my ($self, $type, $param) = @_;
+ return $self->cfget('transfer', $type, $param)
+ || $self->cfget('transfer', $param);
+}
+
sub error {
my ($self, @msg) = @_;
print STDERR "$self->{_progname}: " if $self->{_progname};
@@ -274,6 +315,11 @@ sub debug {
}
}
+sub dry_run {
+ my $self = shift;
+ return $self->{_dry_run};
+}
+
sub abend {
my ($self, $code, @msg) = @_;
$self->error(@msg);
@@ -288,8 +334,9 @@ sub run {
sub glacier_eval {
my $self = shift;
my $method = shift;
+ my $wantarray = wantarray;
my $ret = http_catch(sub {
- wantarray ? [ $self->{_glacier}->${\$method}(@_) ]
+ $wantarray ? [ $self->{_glacier}->${\$method}(@_) ]
: $self->{_glacier}->${\$method}(@_)
},
err => \my %err,
diff --git a/lib/App/Glacier/Command/Get.pm b/lib/App/Glacier/Command/Get.pm
index 09e739f..5666d36 100644
--- a/lib/App/Glacier/Command/Get.pm
+++ b/lib/App/Glacier/Command/Get.pm
@@ -15,13 +15,11 @@ glacier get - download file from a vault
B<glacier put>
[B<-fit>]
-[B<-s> I<N>]
[B<--force>]
[B<--interactive>]
[B<-j> I<NJOBS>]
[B<--jobs=>I<NJOBS>]
[B<--no-clobber>]
-[B<--part-size=>I<N>]
[B<--test>]
I<VAULT>
I<FILE>
@@ -42,7 +40,6 @@ sub getopt {
'interactive|i' => sub { $self->{_options}{ifexists} = IFEXISTS_ASK },
'force|f' => sub { $self->{_options}{ifexists} = IFEXISTS_OVERWRITE },
'no-clobber|f' => sub { $self->{_options}{ifexists} = IFEXISTS_KEEP },
- 'part-size|s=i' => \$self->{_options}{part_size},
'jobs|j=i' => \$self->{_options}{jobs},
'test|t' => \$self->{_options}{test},
%opts);
@@ -95,7 +92,12 @@ sub run {
}
if ($job->is_completed) {
- $self->download($job, $localname);
+ my $tree_hash = $self->download($job, $localname);
+ if (!$self->dry_run
+ && $tree_hash ne $job->get('ArchiveSHA256TreeHash')) {
+ unlink $localname;
+ $self->abend(EX_SOFTWARE, "downloaded file is corrupt");
+ }
} else {
my ($status, $message) = $job->status;
if ($status eq 'InProgress') {
@@ -117,50 +119,75 @@ use constant TWOMB => 2*MB;
sub download {
my ($self, $job, $localname) = @_;
+ my $archive_size = $job->get('ArchiveSizeInBytes');
+ if ($archive_size < $self->cf_transfer_param(qw(download single-part-size))) {
+ # simple download
+ $self->_download_simple($job, $localname);
+ } else {
+ $self->_download_multipart($job, $localname);
+ }
+}
- use threads;
- use threads::shared;
-
- my $fd;
- open($fd, '>', $localname)
+sub _open_output {
+ my ($self, $localname) = @_;
+ open(my $fd, '>', $localname)
or $self->abort(EX_FAILURE, "can't open $localname: $!");
binmode($fd);
truncate($fd, 0);
+ return $fd;
+}
+
+sub _download_simple {
+ my ($self, $job, $localname) = @_;
+
+ $self->debug(1, "downloading", $job->file_name(1), "in single part");
+ return if $self->dry_run;
+ my $fd = $self->_open_output($localname);
+ my ($res, $tree_hash) = $self->glacier_eval('get_job_output',
+ $job->vault, $job->id);
+ if ($self->lasterr) {
+ $self->abend(EX_FAILURE, "downoad failed: ",
+ $self->last_error_message);
+ }
+ syswrite($fd, $res);
+ close($fd);
+ return $tree_hash;
+}
+
+sub _download_multipart {
+ my ($self, $job, $localname) = @_;
- my $archive_size = $job->get('ArchiveSizeInBytes');
+ use threads;
+ use threads::shared;
+
+ my $glacier = $self->{_glacier};
- my $njobs;
+ my $tree_hash;
+
+ my $njobs = $self->{_options}{jobs}
+ || $self->cf_transfer_param(qw(download jobs));
+
+ my $archive_size = $job->get('ArchiveSizeInBytes');
my $part_size;
- if ($self->{_options}{jobs}) {
- $njobs = $self->{_options}{jobs};
- # Compute approximate part size
- $part_size = int(($archive_size + $njobs - 1) / $njobs);
- } else {
- $part_size = ($self->{_options}{part_size} || 10 * MB);
- }
+ # Compute approximate part size
+ $part_size = int(($archive_size + $njobs - 1) / $njobs);
# Make sure the chunk is Tree-Hash aligned
# http://docs.aws.amazon.com/amazonglacier/latest/dev/checksum-calculations-range.html?shortFooter=true#checksum-calculations-upload-archive-with-ranges
- $part_size = TWOMB * 2 ** int(log($part_size / TWOMB) / log(2));
+ $part_size = TWOMB * 2 ** int(log($part_size / TWOMB) / log(2) + 1);
# Adjust the number of jobs
$njobs = int(($archive_size + $part_size - 1) / $part_size);
- my $glacier = $self->{_glacier};
-
- my $tree_hash;
-
- if ($njobs <= 1 || $archive_size < $part_size) {
- # simple download
- my $res;
- $self->debug(1, "downloading", $job->file_name(1), "in single part");
- ($res, $tree_hash) = $glacier->get_job_output($job->vault, $job->id);
- syswrite($fd, $res);
+ if ($njobs <= 1) {
+ return $self->_download_simple($job, $localname);
} else {
use Fcntl qw(SEEK_SET);
$self->debug(1,
"downloading", $job->file_name(1), "to $localname in chunks of $part_size bytes, in $njobs jobs");
+ return if $self->dry_run;
+ my $fd = $self->_open_output($localname);
my @part_hashes :shared = ();
my $read_bytes;
my $rest_size = $archive_size;
@@ -189,16 +216,9 @@ sub download {
# FIXME: error handling
$thr->join() or croak "thread $thr failed";
}
- $tree_hash = $glacier->_tree_hash_from_array_ref(\@part_hashes);
- }
-
- close($fd);
-# print $tree_hash. ' ', $job->get('ArchiveSHA256TreeHash') , "\n";
- if ($tree_hash ne $job->get('ArchiveSHA256TreeHash')) {
- unlink $localname;
- $self->abend(EX_SOFTWARE, "downloaded file is corrupt");
+ close($fd);
+ return $glacier->_tree_hash_from_array_ref(\@part_hashes);
}
- print "finished\n";
}
1;
diff --git a/lib/App/Glacier/Command/Put.pm b/lib/App/Glacier/Command/Put.pm
index d0e6cc0..594c957 100644
--- a/lib/App/Glacier/Command/Put.pm
+++ b/lib/App/Glacier/Command/Put.pm
@@ -44,7 +44,7 @@ sub _upload {
or $self->abend(EX_USAGE, "can't stat \"$localname\": $!");
my $dir = $self->directory($vaultname);
- my $id = ($size < $self->cfget(qw(glacier multipart-upload-size)))
+ my $id = ($size < $self->cf_transfer_param(qw(upload single-part-size)))
? $self->_upload_simple($vaultname, $localname, $remotename)
: $self->_upload_multipart($vaultname, $localname, $remotename);
$self->debug(1, "ID $id\n");
@@ -89,20 +89,15 @@ sub _upload_multipart {
my $total_parts = int(($archive_size + $part_size - 1) / $part_size);
# Compute number of threads
- my $njobs = $self->{_options}{jobs};
- if (defined($njobs)) {
- # Allow at most so many jobs as there are parts
- $njobs = $total_parts if $njobs > $total_parts;
- } else {
- # Select default.
- $njobs = $total_parts < 16 ? $total_parts : 16;
- }
+ my $njobs = $self->{_options}{jobs}
+ || $self->cf_transfer_param(qw(upload jobs));
# Number of parts to upload by each job;
my $job_parts = int(($total_parts + $njobs - 1) / $njobs);
$self->debug(1,
"uploading $localname in chunks of $part_size bytes, in $njobs jobs");
+ return if $self->dry_run;
open(my $fd, '<', $localname)
or $self->abort(EX_FAILURE, "can't open $localname: $!");

Return to:

Send suggestions and report system problems to the System administrator.