diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 006452e..0000000 --- a/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -Makefile -blib -pm_to_blib -Makefile.old diff --git a/.shipit b/.shipit deleted file mode 100644 index 82af237..0000000 --- a/.shipit +++ /dev/null @@ -1,8 +0,0 @@ -# auto-generated shipit config file. -# shipit needs to be smarter about modifying the RPM specfile version :/ -#steps = FindVersion, ChangeVersion, ChangeRPMVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist, UploadCPAN -steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist, UploadCPAN - -git.tagpattern = %v - - diff --git a/Changes b/Changes deleted file mode 100644 index 779f35e..0000000 --- a/Changes +++ /dev/null @@ -1,140 +0,0 @@ -2.21 -- 2011-10-29 - - * Improve error handling for broken usage files in `mogadm check` - -2.20 -- 2011-06-15 - - * Remove redundant tracker calls. Required for MogileFS::Server 2.50 as - the redundant commands are now cached harder. - (dormando ) - -2.19 -- 2011-01-08 - - * bump required client version (dormando ) - - * Add mogfiledebug utility (dormando ) - - * documentation updates. (dormando ) - - * new commandline utils. (dormando ) - - * teach mogstats what rebal queue is (dormando ) - - * make a few errors less infuriating (dormando ) - - * Fix files stat under Postgres. (Robin H. Johnson ) - - * edit the meta details a little. (dormando ) - - * Ignore generated Makefile (Tomas Doran ) - - * Add resources (Tomas Doran ) - -2.18 -- 2010-09-28 - - * Add rebalance commands - -2.17 -- 2010-08-13 - - * Add 'domains' argument to mogstats for faster queries on basic by-domain - and class stats. (pyhhak) - -2.16 -- 2010-04-02 - - * Add --replpolicy option for configuring a replication policy string. - Used in MogileFS::Network policies, etc. - (dormando) - - * Updates to mogstats (dormando, Barry Abrahamson) - -2.15 -- 2009-12-05 - - * Standalone mogstats utility (dormando) - - * New mogilefs.conf option: timeout (Robin H. Johnson) - - * Make --noverify a valid option for locate (Ask Bjoern Hansen) - - * Make the locate command return all replicas, not just two (Gavin Brebner) - -2.14 -- 2009-01-12 - - * Add --nobigfile option to skip bigfile parsing on large files. (dormando) - - * Allow "listkey" on all files, not good for big dbs (Ask Bjoern Hansen) - - * Make mogtool prefer command line settings over configuration file - settings (Ask) - -2.13 -- 2008-08-16 - - * Add --noreplwait option to mogtool. (Dormando) - - * Make mogadm prefer command line settings over configuration file - settings (Derek Boonstra) - - * Wire up fsck_reset's startpos argument. (Robin H. Johnson). - - * Fix the automatic retry on errors saving a chunk (Ask Bjoern Hansen) - - * Improve error messages from mogtool (Ask Bjoern Hansen). - -2.12 -- 2007-08-06 - - * Update POD documentation for mogadm to state of present code. - - * Add device-modify command for consistency with host/class. - - * Document return code behavior in the POD. - - * Provide specific return codes for list, listkey and locate when the item - was not found. - - * Sometimes we get a - when we are expecting a numeric. Force these to zero. - - * Add locate task to mogtool that provides path output, useful in debugging. - - * Implement "device summary" per RT#3784. This provides an output with one - line of storage data per host. - - * Handle stats where there are no files in the DB. - - * If there are hosts, but no devices, the totals were previously undefined, - leading to errors in the output. - -2.11 -- 2007-06-19 - - * utilization column was always 0. single character typo from - Arjan - -2.10 -- 2007-05-14 - - * mogadm check no longer shows dead devices - - * mogadm device list no longer shows devices, UNLESS you do: - mogadm device list --all - - * allow setting a setting to "" (empty string) - -2.09 -- 2007-05-03 - - * 'settings list' and 'settings set ' sub-commands, if you have - MogileFS::Client >= 1.07 installed. otherwise just unavailable. - - * fix multiple bugs in "fsck status" display/calculations. - -2.08 -- 2007-04-20 - - * stats command from Leon Brocard - - * fsck commands - - * improved docs - - * patch from Arthur Bebak to add "listkey" to mogtool - -1.50 -- 2006-11-13 - - * much-cleaned up and friendlier mogadm command - - diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index 3d970e2..0000000 --- a/MANIFEST +++ /dev/null @@ -1,15 +0,0 @@ -MANIFEST -MANIFEST.SKIP -Changes -Makefile.PL -lib/MogileFS/Utils.pm -mogtool -mogadm -mogstats -mogupload -mogdelete -mogfetch -mogfileinfo -moglistfids -moglistkeys -mogfiledebug diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP deleted file mode 100644 index f32ccad..0000000 --- a/MANIFEST.SKIP +++ /dev/null @@ -1,40 +0,0 @@ -^conf/ - -^# -\bCVS\b -^MANIFEST\. -^Makefile$ -~$ -\.html$ -\.old$ -^blib/ -_blib$ -^MakeMaker-\d -^\.exists -\bdebian\b -\btest\b -^MogileFS-Utils.spec$ - -# Avoid version control files. -\bRCS\b -\bCVS\b -,v$ -\B\.svn\b -\B\.git\b - -# Avoid Makemaker generated and utility files. -\bMANIFEST\.bak -\bMakefile$ -\bblib/ -\bMakeMaker-\d -\bpm_to_blib$ - -# Avoid Module::Build generated and utility files. -\bBuild$ -\b_build/ - -# Avoid temp and backup files. -~$ -\.old$ -\#$ -\b\.# diff --git a/Makefile.PL b/Makefile.PL deleted file mode 100644 index 7021c72..0000000 --- a/Makefile.PL +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/perl - -use strict; -use ExtUtils::MakeMaker; - -WriteMakefile( - NAME => 'MogileFS-Utils', - VERSION_FROM => 'lib/MogileFS/Utils.pm', - AUTHOR => 'Brad Fitzpatrick ', - ABSTRACT => 'MogileFS utilities', - EXE_FILES => ['mogtool', 'mogadm', 'mogstats', - 'mogupload', 'mogfetch', 'mogdelete', 'mogfileinfo', 'moglistkeys', - 'moglistfids', 'mogfiledebug', - ], - PREREQ_PM => { - 'LWP::Simple' => 0, - 'Compress::Zlib' => 0, - 'MogileFS::Client' => '1.14', - }, - META_MERGE => { - resources => { - homepage => 'http://www.mogilefs.org', - bugtracker => 'http://code.google.com/p/mogilefs/issues/list', - repository => 'git://github.com/mogilefs/MogileFS-Utils.git', - MailingList => 'http://groups.google.com/group/mogile', - }, - }, -); diff --git a/MogileFS-Utils.spec b/MogileFS-Utils.spec deleted file mode 100644 index 923cc9c..0000000 --- a/MogileFS-Utils.spec +++ /dev/null @@ -1,55 +0,0 @@ -name: MogileFS-Utils -summary: MogileFS-Utils - MogileFS utilities. -version: 2.19 -release: 1%{?dist} -vendor: Alan Kasindorf -packager: Jonathan Steinert -license: Artistic -group: Applications/CPAN -buildroot: %{_tmppath}/%{name}-%{version}-%(id -u -n) -buildarch: noarch -source: MogileFS-Utils-%{version}.tar.gz -autoreq: no -requires: perl -requires: perl(MogileFS::Client) >= 1.14 -requires: perl(Compress::Zlib) - -%description -MogileFS utilities. - -%prep -rm -rf "%{buildroot}" -%setup -n MogileFS-Utils-%{version} - -%build -%{__perl} Makefile.PL PREFIX=%{buildroot}%{_prefix} -make all -make test - -%install -make pure_install - -[ -x /usr/lib/rpm/brp-compress ] && /usr/lib/rpm/brp-compress - -# remove special files -find %{buildroot} \( \ - -name "perllocal.pod" \ - -o -name ".packlist" \ - -o -name "*.bs" \ - \) -exec rm -f {} \; - -# no empty directories -find %{buildroot}%{_prefix} \ - -type d -depth -empty \ - -exec rmdir {} \; - -%clean -[ "%{buildroot}" != "/" ] && rm -rf %{buildroot} - -%files -%defattr(-,root,root) -%{_prefix}/bin/* -%{_prefix}/share/man/man1/* -%{_prefix}/share/man/man3/* -%{_prefix}/lib/perl5/site_perl/* - diff --git a/conf/mogtool.conf b/conf/mogtool.conf deleted file mode 100644 index 0b73eac..0000000 --- a/conf/mogtool.conf +++ /dev/null @@ -1,20 +0,0 @@ -# Configuration for mogtool -# See man mogtool(1p) for a description of options - -trackers = 10.0.0.3:7001, 10.10.0.5/10.0.0.5:7001 -domain = mogiledomain -class = fileclass - -# Location of private MogileFS library -#lib = /home/foo/lib - -gzip = 1 -big = 1 -overwrite = 1 -chunksize = 32M - -# Where receipts should go to -# receipt = foo@bar.com, baz@bar.com -verify = 1 -concurrent = 3 - diff --git a/debian/changelog b/debian/changelog index 097e105..c52d033 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +mogilefs-utils (2.21-0mit1) oneiric; urgency=low + + * New upstream version + + -- Kiall Mac Innes Wed, 02 Nov 2011 23:42:56 +0000 + mogilefs-utils (0.01-1) unstable; urgency=low * Initial release diff --git a/debian/gbp.conf b/debian/gbp.conf new file mode 100644 index 0000000..d66d816 --- /dev/null +++ b/debian/gbp.conf @@ -0,0 +1,4 @@ +[DEFAULT] +debian-branch = master +pristine-tar = True +overlay = True diff --git a/lib/MogileFS/Utils.pm b/lib/MogileFS/Utils.pm deleted file mode 100644 index 27d3842..0000000 --- a/lib/MogileFS/Utils.pm +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/perl -package MogileFS::Utils; - -our $VERSION = '2.21'; - -use Getopt::Long; -use MogileFS::Client; - -use fields ( - 'config' - ); - -# Helper object for the individual utilities. -sub new { - my MogileFS::Utils $self = shift; - $self = fields::new($self) unless ref $self; - $self->_init(@_); - - return $self; -} - -# Predefine some options via configuration. -sub _init { - my MogileFS::Utils $self = shift; - - $self->{config} = {}; -} - -sub _readconf { - my MogileFS::Utils $self = shift; - my $args = shift; - - # Liftedish from mogadm, but we can refactor mogadm to use this instead. - my @configs = ($args->{conf}, $ENV{MOGUTILSCONF}, - "$ENV{HOME}/.mogilefs.conf", - "/etc/mogilefs/mogilefs.conf"); - my %opts = (); - for my $fn (reverse @configs) { - next unless $fn && -e $fn; - open my $file, "<$fn" - or die "unable to open $fn: $!"; - while (<$file>) { - s/\#.*//; - next unless m/^\s*(\w+)\s*=\s*(.+?)\s*$/; - $opts{$1} = $2 unless ( defined $opts{$1} ); - } - close $file; - } - - return \%opts; -} - -sub config { - my MogileFS::Utils $self = shift; - return $self->{config}; -} - -sub getopts { - my MogileFS::Utils $self = shift; - my $usage = shift; - my @want = @_; - - my %opts = (); - $self->abort_usage($usage) unless @ARGV; - GetOptions(\%opts, @want, qw/help trackers=s domain=s conf=s/) - or $self->abort_usage($usage); - my $config = $self->_readconf(\%opts); - - $self->{config} = {%$config, %opts}; - $self->_verify_config; - $self->abort_usage($usage) if $self->{config}->{help}; - - return $self->{config}; -} - -sub _verify_config { - my MogileFS::Utils $self = shift; - my $conf = $self->{config}; - - while (my ($k, $v) = each %$conf) { - if ($k =~ m/^trackers/) { - my @tr = split /,/, $v; - for (@tr) { - # Client is obnoxious about requiring a port. - if ($_ !~ m/:\d+/) { - $_ = $_ . ':7001'; - } - } - $conf->{$k} = \@tr; - } elsif ($k =~ m/class/) { - # "" means "default". Might have to remove this if people have - # been adding "default" classes, which I don't think is possible? - if ($v eq 'default') { - $conf->{$k} = ''; - } - } - } -} - -# Do we want to be fancier here? -sub abort_usage { - my MogileFS::Utils $self = shift; - my $usage = shift; - print "Usage: $0 $usage\n"; - exit; -} - -sub client { - my MogileFS::Utils $self = shift; - my $c = $self->{config}; - return MogileFS::Client->new(domain => $c->{domain}, - hosts => $c->{trackers}); -} - -=head1 NAME - -MogileFS::Utils - Command line utilities for the MogileFS distributed file system. - -=head1 SYNOPSIS - -L - -L - -L - -L - -L - -L - -L - -L - -L - -L (DEPRECATED: Do not use!) - -=head1 SUMMARY - -Please refer to the documentation for the tools included in this distribution. - -=head1 CONFIGURATION FILE - -Most of the utilities in this package support a configuration file. Common -options can be pushed into the config file, such as trackers, domain, or -class. The file is in B and B<~/.mogilefs.conf> -by default. You may also specify a configuration via B<--conf=filename> - -Example: - - trackers = 10.0.0.1:7001,10.0.0.3:7001 - domain = foo - -=head1 AUTHOR - -Brad Fitzpatrick ELE - -Dormando ELE - -=head1 BUGS - -Please report any on the MogileFS mailing list: L. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -1; diff --git a/mogadm b/mogadm deleted file mode 100755 index 5c8e3d5..0000000 --- a/mogadm +++ /dev/null @@ -1,1689 +0,0 @@ -#!/usr/bin/perl -# vim:ts=4 sw=4 ft=perl et: - -use strict; -use warnings; -use Getopt::Long; -use LWP::Simple; # FIXME: use of this makes 'mog check' hang too long when multiple things down -use Socket; - -my @topcmds = qw(check stats host device domain class slave fsck rebalance settings); -my $usage = { - check => { - des => "Check the state of the MogileFS world.", - }, - stats => { - des => "Show MogileFS system statistics. (DEPRECATED: use mogstats instead)", - }, - settings => { - des => "Change/list server settings.", - subcmd => { - list => { - des => "List all server settings", - }, - set => { - args => " ", - des => "Set server setting 'key' to 'value'.", - }, - }, - }, - host => { - des => "Add/modify hosts.", - subcmd => { - list => { - des => "List all hosts.", - }, - add => { - des => "Add a host to MogileFS.", - args => " [opts]", - opts => { - "" => "Hostname of machine", - "--status=s" => "One of {alive,down}. Default 'down'.", - "--ip=s" => "IP address of machine.", - "--port=i" => "HTTP port of mogstored", - "--getport=i" => "Alternate HTTP port serving readonly traffic", - "--altip=s" => "Alternate IP that is machine is reachable from", - "--altmask=s" => "Netmask which, when matches client, uses alt IP", - }, - }, - modify => { - des => "Modify a host's properties.", - args => " [opts]", - opts => { - "" => "Host name.", - "--status=s" => "One of {alive,down}.", - "--ip=s" => "IP address of machine.", - "--port=i" => "HTTP port of mogstored", - "--getport=i" => "Alternate HTTP port serving readonly traffic", - "--altip=s" => "Alternate IP that is machine is reachable from", - "--altmask=s" => "Netmask which, when matches client, uses alt IP", - }, - }, - mark => { - des => "Change the status of a host. (equivalent to 'modify --status')", - args => " ", - opts => { - "" => "Host name to bring up or down.", - "" => "One of {alive,down}.", - } - }, - delete => { - des => "Delete a host.", - args => "", - opts => { - "" => "Host name to delete.", - }, - }, - }, - }, - device => { - des => "Add/modify devices.", - subcmd => { - list => { - des => "List all devices, for each host.", - args => "[opts]", - opts => { - "--all" => "Include dead devices in list.", - }, - }, - summary => { - des => "List the summary of devices, for each host.", - args => "[opts]", - opts => { - "--status=s" => "Devices of status A. Defaults to 'alive,readonly'", - }, - }, - add => { - des => "Add a device to a host.", - args => " [opts]", - opts => { - "" => "Hostname to add a device", - "" => "Numeric devid. Never reuse these.", - "--status=s" => "One of 'alive' or 'down'. Defaults to 'alive'.", - }, - }, - mark => { - des => "Mark a device as {alive,dead,down,drain,readonly}", - args => " ", - opts => { - "" => "Hostname of device", - "" => "Numeric devid to modify.", - "" => "One of {alive,dead,down,drain,readonly}", - }, - }, - modify => { - des => "Modify a device's properties.", - args => " [opts]", - opts => { - "" => "Hostname of device", - "" => "Numeric devid to modify.", - "--status=s" => "One of {alive,dead,down,drain,readonly}", - "--weight=i" => "Positive numeric weight for device", - }, - }, - }, - }, - domain => { - des => "Add/modify domains (namespaces)", - subcmd => { - list => { - des => "List all hosts.", - }, - add => { - des => "Add a domain (namespace)", - args => "", - opts => { - "" => "Domain (namespace) to add.", - }, - }, - delete => { - des => "Delete a domain.", - args => "", - opts => { - "" => "Domain (namespace) to add.", - }, - }, - }, - }, - class => { - des => "Add/modify file classes.", - subcmd => { - list => { - des => "List all classes, for each domain.", - }, - add => { - des => "Add a file class to a domain.", - args => " [opts]", - opts => { - "" => "Domain to add class to.", - "" => "Name of class to add.", - "--mindevcount=i" => "Minimum number of replicas.", - "--replpolicy=s" => "Replication policy string.", - }, - }, - modify => { - des => "Modify properties of a file class.", - args => " [opts]", - opts => { - "" => "Domain to add class to.", - "" => "Name of class to add.", - "--mindevcount=i" => "Minimum number of replicas.", - "--replpolicy=s" => "Replication policy string.", - }, - }, - delete => { - des => "Delete a file class from a domain.", - args => " ", - opts => { - "" => "Domain of class to delete.", - "" => "Class to delete.", - }, - - }, - }, - }, - slave => { - des => 'Manipulate slave database information in a running mogilefsd.', - subcmd => { - list => { - des => 'List current store slave nodes.', - }, - add => { - des => 'Add a slave node for store usage', - args => ' [opts]', - opts => { - '--dsn=s' => "DBI DSN specifying what database to connect to.", - '--username=s' => "DBI username for connecting.", - '--password=s' => "DBI password for connecting.", - }, - }, - modify => { - des => 'Modify a slave node for store usage', - args => ' [opts]', - opts => { - '--dsn=s' => "DBI DSN specifying what database to connect to.", - '--username=s' => "DBI username for connecting.", - '--password=s' => "DBI password for connecting.", - }, - }, - delete => { - des => 'Delete a slave node for store usage', - args => '', - }, - }, - }, - rebalance => { - des => "Control file rebalancing operations.", - subcmd => { - start => { - des => 'Start a rebalance job', - }, - stop => { - des => 'Stop a rebalance job', - }, - status => { - des => 'Show status of current rebalance job', - }, - settings => { - des => 'Display rebalance settings', - }, - test => { - des => 'Show what devices the current policy would match', - }, - reset => { - des => 'Reset an existing policy', - }, - policy => { - des => 'Add or adjust the current policy', - args => '[opts]', - opts => { - '--options=s' => "Policy string (see docs/wiki for details)", - }, - }, - }, - }, - fsck => { - des => "Control a background filesystem check operation.", - subcmd => { - start => { - des => 'Start (or resume) background fsck', - }, - stop => { - des => 'Stop (pause) background fsck', - }, - status => { - des => 'Show fsck status', - }, - reset => { - des => 'Reset fsck position back to the beginning', - args => '[opts]', - opts => { - '--policy-only' => "Check repl policy (assumed locations); don't stat storage nodes", - '--startpos=i' => "FID to start at.", - } - }, - clearlog => { - des => 'Clear the fsck log', - }, - printlog => { - des => 'Display the fsck log', - }, - taillog => { - des => 'Tail the fsck log', - }, - - }, - }, -}; - -# load up our config files -my %opts; - -Getopt::Long::Configure("require_order", "pass_through"); -GetOptions( - "trackers=s" => \$opts{trackers}, - "config=s" => \$opts{config}, - "lib=s" => \$opts{lib}, - "help" => \$opts{help}, - "verbose" => \$opts{verbose}, - ) or abortWithUsage(); -Getopt::Long::Configure("require_order", "no_pass_through"); - -my @configs = ($opts{config}, "$ENV{HOME}/.mogilefs.conf", "/etc/mogilefs/mogilefs.conf"); -foreach my $fn (reverse @configs) { - next unless $fn && -e $fn; - open FILE, "<$fn" - or die "unable to open $fn: $!\n"; - while () { - s/\#.*//; - next unless m!^\s*(\w+)\s*=\s*(.+?)\s*$!; - $opts{$1} = $2 unless ( defined $opts{$1} ); - } - close FILE; -} - -# bail for help -abortWithUsage() if $opts{help}; - -# make sure we have at least a topcmd -my $topcmd = shift(@ARGV); -abortWithUsage() unless $topcmd && $usage->{$topcmd}; - -# break up the trackers and ensure we got some -if ($opts{trackers}) { - $opts{trackers} = [ split(/\s*,\s*/, $opts{trackers}) ]; -} -fail_text('no_trackers') - unless ($opts{trackers} && @{$opts{trackers}}) || detect_local_tracker(); - -# okay, load up the libraries that we need -if ($opts{lib}) { - eval "use lib '$opts{lib}';"; -} -eval "use MogileFS::Admin; use MogileFS::Client; 1;" or fail_text('cant_find_module'); - -# dispatch if it's special -if ($topcmd eq 'check') { - die "Unknown options/arguments to 'check' command.\n" if @ARGV; - cmd_check(); -} elsif ($topcmd eq 'stats') { - die "Unknown options/arguments to 'stats' command.\n" if @ARGV; - cmd_stats(); -} - -# get the verb -my $verb = shift(@ARGV) or - abort_with_topcmd_help($topcmd); -my $cmdinfo = $usage->{$topcmd}{subcmd}{$verb}; -abort_with_topcmd_help($topcmd) unless $cmdinfo; - -my $badargs = sub { - my $msg = shift; - abort_with_topcmd_help($topcmd, $verb, $msg); -}; - -# get the non-option (non --foo) arguments: -my %cmdargs; -if (my $args = $cmdinfo->{args}) { - my @args = split(/ /, $args); - foreach my $arg (@args) { - # positional (but named) parameter - if ($arg =~ /^<(.+)>$/) { - my $argname = $1; - my $val = shift @ARGV; - # map e.g. "dev5" to 5 - if ($argname eq "devid" && $val && $val =~ /^dev(\d+)$/) { - $val = $1; - } - $badargs->("Missing argument '$argname'") unless defined $val; - $badargs->("Unexpected option. Expected argument '$argname'") if $val =~ /^-/; - $cmdargs{$argname} = $val; - } elsif ($arg eq "[opts]") { - # handled later. - } else { - die "INTERNAL ERROR."; - } - } - $badargs->("Unexpected extra argument.") if @ARGV && $ARGV[0] !~ /^-/; -} else { - $badargs->("Unexpected arguments when expecting none.") if @ARGV; -} - -# parse the options -if (my $opts = $cmdinfo->{opts}) { - my %getopts; - foreach (keys %$opts) { - my $k = $_; - next if $k =~ /^(\%cmdargs); -exit 0; - -sub detect_local_tracker { - require IO::Socket::INET; - my $loctrack = "127.0.0.1:7001"; - my $sock = IO::Socket::INET->new(PeerAddr => $loctrack, Timeout => 1); - return 0 unless $sock; - $opts{trackers} = [$loctrack]; - return 1; -} - -########################################################################### -## command routines -########################################################################### - -sub cmd_check { - # step 1: we want to check each tracker for responsiveness - my $now = time(); - my ($hosts, $devices); - $| = 1; - print "Checking trackers...\n"; - foreach my $t (@{$opts{trackers}}) { - print " $t ... "; - my $mogadm = mogadm($t); - if ($mogadm) { - my $lhosts = hosts($mogadm); - my $ldevs = devices($mogadm); - if ($lhosts && $ldevs) { - print "OK\n"; - $hosts = $lhosts; - $devices = $ldevs; - } else { - print "REQUEST FAILURE (is the tracker up?)\n"; - } - } else { - print "INITIAL FAILURE (bad configuration?)\n"; - } - } - - # we should have hosts if we get here - fail_text('no_hosts') unless $hosts; - print "\n"; - - # step 2: now hit each of the hosts for responsiveness - print "Checking hosts...\n"; - my @urls; - foreach my $hostid (sort { $a <=> $b } keys %$hosts) { - printf " [%2d] %s ... ", $hostid, $hosts->{$hostid}->{hostname}; - if ($hosts->{$hostid}->{status} eq 'alive') { - my $url = 'http://' . $hosts->{$hostid}->{hostip} . ':' . $hosts->{$hostid}->{http_port} . '/'; - my $file = get($url); - if (defined $file) { - print "OK\n"; - push @urls, [ $hostid, $url ]; - } else { - print "REQUEST FAILURE FETCHING: $url\n"; - } - } else { - print "skipping; status = $hosts->{$hostid}->{status}\n"; - } - } - - # everything should be chill - fail_text('no_devices') unless @urls; - print "\n"; - - # step 3: check devices for each host - print "Checking devices...\n"; - printf " host device %10s %10s %10s %7s %7s %4s\n", 'size(G)', 'used(G)', 'free(G)', 'use% ', 'ob state', 'I/O%'; - printf " ---- ------------ ---------- ---------- ---------- ------ ---------- -----\n"; - my %total; - # Initialize to zero so that the total outputs doesn't need to check for undefined. - map { $total{$_} = 0; } qw(total used avail); - - foreach my $hosturl (@urls) { - my ($hostid, $url) = @$hosturl; - my $devs = $devices->{$hostid}; - DEV: foreach my $devid (sort { $a <=> $b } keys %$devs) { - my $dev = $devs->{$devid}; - my $status = $dev->{status} || "??"; - next if $status eq "dead"; - - printf " [%2d] %-7s", $hostid, "dev$devid"; - - my $usage = get($url . "/dev$devid/usage"); - if (! defined $usage) { - print "REQUEST FAILURE FETCHING: $url" . "dev$devid/usage\n"; - next; - } - if (length($usage) < 1) { - print "USAGE FILE BROKEN OR EMPTY: $url" . "dev$devid/usage\n"; - next; - } - my %data = ( map { split(/:\s+/, $_) } split(/\r?\n/, $usage) ); - foreach (qw(time used total avail)) { - $data{$_} = 0 if (!$data{$_} || - $data{$_} !~ /\A\d+(\.\d+)?\Z/); - } - foreach (qw(available device disk)) { - if (! exists $data{$_} || !$data{$_}) { - print "MISSING FIELD ($_) FROM USAGE FILE: $url" . "dev$devid/usage\n"; - next DEV; - } - } - $data{age} = $now - $data{time}; - $data{used} /= 1024**2; - $data{total} /= 1024**2; - $data{available} /= 1024**2; - $data{avail} = $data{available}; - my $pct = 100 - $data{available}/$data{total}*100; - $total{used} += $data{used}; - $total{avail} += $data{avail}; - $total{total} += $data{total}; - - $dev->{utilization} = 0 if (!defined($dev->{utilization}) || - $dev->{utilization} !~ /\A\d+(\.\d+)?\Z/); - printf(" %10.3f %10.3f %10.3f %6.2f%% %-7s %5.1f\n", - (map { $data{$_} } qw(total used avail)), - $pct, ($dev->{observed_state} || "?"), - $dev->{utilization}); - } - } - my $pct = 0; - # Avoid division by zero - $pct = 100 - $total{avail}/$total{total}*100 if($total{total} > 0); - - printf " ---- ------------ ---------- ---------- ---------- ------\n"; - printf " total:%10.3f %10.3f %10.3f %6.2f%%\n", (map { $total{$_} } qw(total used avail)), $pct; - - # if we get here, all's well - ok(); -} - -sub cmd_stats { - fail("mogadm stats is deprecated by new 'mogstats' utility"); -} - -sub cmd_host_list { - my $hosts = hosts(); - fail_text('no_hosts') unless $hosts; - - foreach my $hostid (sort keys %$hosts) { - my $host = $hosts->{$hostid}; - print "$host->{hostname} [$hostid]: $host->{status}\n"; - my @data = ( - 'IP', "$host->{hostip}:$host->{http_port}", - 'Alt IP', $host->{altip}, - 'Alt Mask', $host->{altmask}, - 'GET Port', $host->{http_get_port}, - ); - while (my ($k, $v) = splice(@data, 0, 2)) { - next unless $v; - printf " %-10s\%s\n", "$k:", $v; - } - print "\n"; - } - ok(); -} - -sub cmd_host_add { - my $args = shift; - - my $hosts = hosts_byname(); - fail_text('no_hosts') unless $hosts; - - my $name = delete $args->{hostname}; - cmd_help_die("No hostname") unless $name; - fail('Host already exists.') if $hosts->{$name}; - - # make sure we have an ip - unless ($args->{ip}) { - my $addr = gethostbyname($name); - fail_text('host_add_no_ip') unless $addr; - $args->{ip} = inet_ntoa($addr); - } - - # defaults - $args->{port} ||= 7500; - $args->{status} ||= 'down'; - - # FIXME: verify the status can't be 'alive' if we can't get to ip:port - # OR BETTER: also make default status the reachability of that ip:port - - # now create the host - my $mogadm = mogadm(); - $mogadm->create_host($name, $args); - if ($mogadm->err) { - fail("Failure creating host: " . $mogadm->errstr); - } - - ok('Host has been created.'); -} - -sub cmd_host_modify { - my $args = shift; - my $name = delete $args->{hostname}; - - # FIXME: verify the status can't be 'alive' if we can't get to ip:port - - # now modify the host - my $mogadm = mogadm(); - $mogadm->update_host($name, $args); - if ($mogadm->err) { - fail("Failure modifying host: " . $mogadm->errstr); - } - - ok('Host has been modified.'); -} - -sub cmd_host_delete { - my $args = shift; - my $name = delete $args->{hostname}; - - # now modify the host - my $mogadm = mogadm(); - $mogadm->delete_host($name); - if ($mogadm->err) { - fail("Failure deleting host: " . $mogadm->errstr); - } - - ok('Host has been deleted.'); -} - -sub cmd_host_mark { - my $args = shift; - - my $mogadm = mogadm(); - $mogadm->update_host($args->{hostname}, { status => $args->{status} }); - if ($mogadm->err) { - fail("Failure updating host status: " . $mogadm->errstr); - } - - ok('Host status updated.'); -} - -sub cmd_domain_list { - # actually lists domains and classes - my $domains = domains() or - fail_text('no_domains'); - # now iterate - printf " %-20s %-20s %-12s %-12s\n", "domain", "class", "mindevcount", "replpolicy"; - printf "%-20s %-20s %-12s %-12s\n", '-' x 20, '-' x 20, '-' x 13, '-' x 12; - foreach my $domain (sort keys %$domains) { - foreach my $class (sort keys %{$domains->{$domain}}) { - my $dom = $domains->{$domain}->{$class}; - printf " %-20s %-20s %-8d %-13s\n", $domain, $class, - $dom->{mindevcount} || 0, $dom->{replpolicy} || ''; - } - print "\n"; - } - - ok(); -} - -sub cmd_domain_add { - my $args = shift; - - my $domain = delete $args->{domain}; - - # create - my $mogadm = mogadm(); - $mogadm->create_domain($domain); - if ($mogadm->err) { - fail('Error creating domain: ' . $mogadm->errstr); - } - - ok('Domain created.'); -} - -sub cmd_domain_delete { - my $args = shift; - - my $domain = $args->{domain}; - - # destroy - my $mogadm = mogadm(); - $mogadm->delete_domain($domain); - if ($mogadm->err) { - fail('Error deleting domain: ' . $mogadm->errstr); - } - - ok('Domain deleted.'); -} - -sub cmd_class_list { - # same, pass it through - cmd_domain_list(); -} - -sub cmd_class_add { - my $args = shift; - - my $domain = delete $args->{domain}; - my $class = delete $args->{class}; - - cmd_help_die() unless $domain && $class; - - $args->{mindevcount} ||= 2; - $args->{replpolicy} ||= ''; - - my $mogadm = mogadm(); - $mogadm->create_class($domain, $class, $args); - if ($mogadm->err) { - fail('Error creating class: ' . $mogadm->errstr); - } - - ok('Class created.'); -} - -sub cmd_class_modify { - my $args = shift; - - my $domain = delete $args->{domain}; - my $class = delete $args->{class}; - - cmd_help_die() unless $domain && $class; - - $args->{mindevcount} ||= 2; - $args->{replpolicy} ||= ''; - - my $mogadm = mogadm(); - $mogadm->update_class($domain, $class, $args); - if ($mogadm->err) { - fail('Error updating class: ' . $mogadm->errstr); - } - - ok('Class updated.'); -} - -sub cmd_class_delete { - my $args = shift; - - my $domain = $args->{domain}; - my $class = $args->{class}; - - cmd_help_die() unless $domain && $class; - - my $mogadm = mogadm(); - $mogadm->delete_class($domain, $class); - if ($mogadm->err) { - fail('Error deleting class: ' . $mogadm->errstr); - } - - ok('Class deleted.'); -} - -sub cmd_device_add { - my $args = shift; - - my $hosts = hosts() or - fail_text('no_hosts'); - - my $host = $args->{hostname}; - my $devid = $args->{devid}; - my $state = $args->{status} || "alive"; - - cmd_help_die("devid should be numeric") unless $devid =~ /^\d+$/; - - # FIXME: server should be fixed to verify via HTTP that the devid directory exists - - my $mogadm = mogadm(); - $mogadm->create_device(hostname => $host, devid => $devid, state => $state); - - if ($mogadm->err) { - fail('Error adding device: ' . $mogadm->errstr); - } - - ok('Device added.'); -} - -sub cmd_device_mark { - my $args = shift; - - print "***NOTE***: As of server version 2.40 'drain' has changed. See docs/wiki\n"; - my $mogadm = mogadm(); - $mogadm->change_device_state($args->{hostname}, - $args->{devid}, - $args->{status}); - if ($mogadm->err) { - fail('Error updating device: ' . $mogadm->errstr); - } - - ok('Device updated.'); -} - -sub cmd_device_modify { - my $args = shift; - my $hostname = delete $args->{hostname}; - my $devid = delete $args->{devid}; - - print "***NOTE***: As of server version 2.40 'drain' has changed. See docs/wiki\n"; - my $mogadm = mogadm(); - $mogadm->update_device($hostname, $devid, $args); - - if ($mogadm->err) { - fail('Error updating device: ' . $mogadm->errstr); - } - - ok('Device updated.'); -} - -sub cmd_device_list { - my $args = shift; - - my $hosts = hosts(); - fail_text('no_hosts') unless $hosts; - - my $devs = devices(); - fail_text('no_devices') unless $devs; - - foreach my $hostid (sort keys %$hosts) { - my $host = $hosts->{$hostid}; - print "$host->{hostname} [$hostid]: $host->{status}\n"; - - printf "%6s %-10s %7s %7s %7s\n", '', '', 'used(G)', 'free(G)', 'total(G)'; - foreach my $devid (sort keys %{$devs->{$hostid} || {}}) { - my $dev = $devs->{$hostid}->{$devid}; - next if $dev->{status} eq "dead" && ! $args->{all}; - - my $total = $dev->{mb_total} / 1024; - my $used = $dev->{mb_used} / 1024; - my $free = $total - $used; - printf "%6s: %-10s %-7.3f %-7.3f %-7.3f\n", "dev$devid", $dev->{status}, $used, $free, $total; - } - - print "\n"; - } - - ok(); -} - -sub cmd_device_summary { - my $args = shift; - my %show_state; - $show_state{$_} = 1 foreach split(/,/, ($args->{status} || "alive,readonly")); - - my $hosts = hosts(); - fail_text('no_hosts') unless $hosts; - - my $devs = devices(); - fail_text('no_devices') unless $devs; - - printf "%-15s %6s %7s %8s %8s %8s %8s\n", 'Hostname', 'HostID', 'Status', 'used(G)', 'free(G)', 'total(G)', '%Used'; - foreach my $hostid (sort keys %$hosts) { - my $host = $hosts->{$hostid}; - my ($total,$used) = (0, 0); - - foreach my $devid (sort keys %{$devs->{$hostid} || {}}) { - my $dev = $devs->{$hostid}->{$devid}; - next unless $show_state{$dev->{status}}; - - my $devtotal = $dev->{mb_total} / 1024; - my $devused = $dev->{mb_used} / 1024; - - $total += $devtotal; - $used += $devused; - } - my $free = $total - $used; - printf "%-15s [%4d]: %6s", $host->{hostname}, $hostid, $host->{status}; - printf " %8.3f %8.3f %8.3f ", $used, $free, $total; - printf "%8.2f", 100*$used/$total if $total; - print "\n"; - } - - ok(); - -} - -sub cmd_slave_list { - my $mogadm = mogadm(); - - my $slaves = $mogadm->slave_list(); - - foreach my $key (sort keys %$slaves) { - my $slavedata = $slaves->{$key}; - my ($dsn, $username, $password) = @$slavedata; - print "$key --dsn=$dsn --username=$username --password=$password\n"; - } - - ok(); -} - -sub cmd_slave_add { - my $mogadm = mogadm(); - my $args = shift; - - my $rc = $mogadm->slave_add($args->{slave_key}, $args->{dsn}, $args->{username}, $args->{password}); - - if ($rc) { - ok("Slave added"); - } else { - fail("Slave failed to be added"); - } -} - -sub cmd_slave_modify { - my $mogadm = mogadm(); - my $args = shift; - - my $key = delete $args->{slave_key} or cmd_help_die("Key argument is required"); - - my $rc = $mogadm->slave_modify($key, %$args); - - if ($rc) { - ok("Slave modify success"); - } else { - fail("Slave modify failure: " . $mogadm->errstr); - } -} - -sub cmd_slave_delete { - my $mogadm = mogadm(); - - my $args = shift; - - my $rc = $mogadm->slave_delete($args->{slave_key}); - - if ($rc) { - ok("Slave deleted"); - } else { - fail("Slave delete failed"); - } -} - -sub cmd_rebalance_start { - my $mogadm = mogadm(); - my $res = $mogadm->rebalance_start || fail($mogadm->errstr); - ok("rebalance started"); -} - -sub cmd_rebalance_stop { - my $mogadm = mogadm(); - my $res = $mogadm->rebalance_stop || fail($mogadm->errstr); - ok("rebalance stopped"); -} - -sub cmd_rebalance_reset { - my $mogadm = mogadm(); - my $res = $mogadm->rebalance_reset || fail($mogadm->errstr); - ok("rebalance reset"); -} - -# TODO: Make output prettier? Put hostname next to device name, print device -# info? -sub cmd_rebalance_test { - my $mogadm = mogadm(); - my $res = $mogadm->rebalance_test || fail($mogadm->errstr); - print "Tested rebalance policy...\n"; - my $s = $mogadm->server_settings; - print "Policy: ", $s->{rebal_policy}, "\n\n"; - print "Source devices:\n"; - for my $dev (sort split /,/, $res->{sdevs}) { - print " - ", $dev, "\n"; - } - print "Destination devices:\n"; - for my $dev (sort split /,/, $res->{ddevs}) { - print " - ", $dev, "\n"; - } -} - -sub cmd_rebalance_status { - my $mogadm = mogadm(); - - my $ss = $mogadm->server_settings or fail ($mogadm->errstr); - my $res = $mogadm->rebalance_status or fail ($mogadm->errstr); - if ($ss->{rebal_host}) { - print "Rebalance is running\n"; - } else { - print "Rebalance is stopped\n"; - } - print "Rebalance status:\n"; - for my $o (sort split /\s+/, $res->{state}) { - my ($k, $v) = split /=/, $o; - printf("%25s = %-s\n", $k, $v); - } -} - -sub cmd_rebalance_policy { - my $mogadm = mogadm(); - my $args = shift; - - my $res = $mogadm->rebalance_set_policy($args->{options}) - or fail($mogadm->errstr); - - ok("changed policy setting"); -} - -sub cmd_rebalance_settings { - my $mogadm = mogadm(); - - my $ss = $mogadm->server_settings - or fail("can't get settings"); - foreach my $k (sort keys %$ss) { - next unless ($k =~ '^rebal_'); - next if ($k eq 'rebal_state'); - printf("%25s = %-s\n", $k, $ss->{$k}); - } -} - -sub cmd_fsck_start { - my $mogadm = mogadm(); - my $res = $mogadm->fsck_start || fail($mogadm->errstr); - ok("fsck started"); -} - -sub cmd_fsck_stop { - my $mogadm = mogadm(); - my $res = $mogadm->fsck_stop || fail($mogadm->errstr); - ok("fsck stopped"); -} - -sub cmd_fsck_reset { - my $mogadm = mogadm(); - my $args = shift; - my $res = $mogadm->fsck_reset( - policy_only => $args->{"policy-only"}, - startpos => $args->{"startpos"}, - ) - or fail($mogadm->errstr); - ok("fsck stopped"); -} - -sub cmd_fsck_clearlog { - my $mogadm = mogadm(); - my $res = $mogadm->fsck_clearlog || fail($mogadm->errstr); - ok("fsck log cleared"); -} - -sub _log_dump { - my %opts = @_; - my $max = $opts{start}; - my $mogadm = mogadm(); - - my $fmt = "%-20s %5s %13s %10s\n"; - printf($fmt, "unixtime", "event", "fid", "devid"); - while (1) { - my @rows = $mogadm->fsck_log_rows(after_logid => $max); - unless (@rows) { - $opts{on_stall}->(); - next; - } - foreach my $row (@rows) { - printf($fmt, - $row->{utime}, - $row->{evcode}, - $row->{fid}, - $row->{devid} || "-"); - $max = $row->{logid}; - } - } -} - -sub cmd_fsck_printlog { - _log_dump(start => 0, - on_stall => sub { exit 0; }); -} - -sub cmd_fsck_taillog { - my $mogadm = mogadm(); - my $status = $mogadm->fsck_status - or fail("can't get fsck status"); - _log_dump(start => $status->{max_logid} - 20, - on_stall => sub { sleep 5; }); -} - -sub cmd_fsck_status { - my $mogadm = mogadm(); - my $status = $mogadm->fsck_status - or fail("can't get fsck status"); - - my %known = map { $_ => 1 } qw( - current_time - max_logid - ); - my $st = sub { - my $k = shift; - $known{$k} = 1; - return $status->{$k}; - }; - - my $line = sub { - printf("%11s: %-s\n", @_); - }; - print "\n"; - my $host = $st->('host'); - $line->("Running", $st->('running') ? "Yes (on $host)" : "No"); - - my $ratio = $st->('end_fid') ? ($st->('max_fid_checked') / $st->('end_fid')) : 0; - my $perc = sprintf("%0.02f%%", 100 * $ratio); - - $line->("Status", - $st->('max_fid_checked') . " / " . $st->('end_fid') - . " ($perc)"); - my $elap = $st->('start_time') ? - (($st->('stop_time') || $st->('current_time')) - $st->('start_time')) : - 0; - my $as_time = sub { - my $s = shift; - return int($s) . "s" if $s < 60; - return int($s/60) . "m"; - }; - my $per_sec = $elap ? ($st->('max_fid_checked') / $elap) : 0; - $line->("Time", sprintf("%s (%d fids/s; %s remain)", - $as_time->($elap), - sprintf("%0.1f", $per_sec), - $as_time->($per_sec ? - (($st->('end_fid') - $st->('max_fid_checked')) - / $per_sec) : - 0))); - - $line->("Check Type", ($st->('policy_only') ? - "Repl policy only (skip file checks)" : - "Normal (check policy + files)")); - - if (my @unk = grep { !$known{$_} } sort keys %$status) { - print "\n"; - foreach (@unk) { - $line->("[$_]", $status->{$_}); - } - } - print "\n"; -} - -sub cmd_settings_list { - my $mogadm = mogadm(); - unless ($mogadm->can("server_settings")) { - fail("settings commands require MogileFS::Client >= 1.07"); - } - my $ss = $mogadm->server_settings - or fail("can't get settings"); - foreach my $k (sort keys %$ss) { - # Don't list noisy "setting" - next if ($k =~ '^rebal'); - printf("%25s = %-s\n", $k, $ss->{$k}); - } -} - -sub cmd_settings_set { - my $mogadm = mogadm(); - unless ($mogadm->can("set_server_setting")) { - fail("settings commands require MogileFS::Client >= 1.07"); - } - my $args = shift; - - $mogadm->set_server_setting($args->{key}, $args->{value}) - or fail($mogadm->errstr); - ok(); -} - -########################################################################### -## helper routines -########################################################################### - -sub abortWithUsage { - my $ret = "Usage: (enter any command prefix, leaving off options, for further help)\n\n"; - foreach my $cmd (@topcmds) { - my $sbc = $usage->{$cmd}->{subcmd}; - if ($sbc) { - $ret .= " mogadm $cmd ...\n"; - } else { - $ret .= sprintf(" mogadm %-25s %-s\n", - "$cmd", - $usage->{$cmd}{des} || ""); - next; - } - foreach my $v (sort keys %$sbc) { - my $scv = $usage->{$cmd}{subcmd}{$v}; - $ret .= " "; - my $dotdot = $scv->{args} ? "..." : ""; - $ret .= sprintf(" %-25s %-s\n", - "$cmd $v $dotdot", - $scv->{des} || ""); - - } - } - print $ret, "\n"; - exit(1); -} - -sub abort_with_topcmd_help { - my ($cmd, $verb, $msg) = @_; - if ($msg) { - print "\nERROR: $msg\n\n"; - } - my $cmdsfx = $verb ? "-$verb" : ""; - my $ret = "Help for '$cmd$cmdsfx' command:\n"; - unless ($verb) { - $ret .= " (enter any command prefix, leaving off options, for further help)\n"; - } - $ret .= "\n"; - foreach my $subcmdv (sort keys %{$usage->{$cmd}{subcmd}}) { - next if $verb && $verb ne $subcmdv; - my $scv = $usage->{$cmd}{subcmd}{$subcmdv}; - $ret .= sprintf(" %-50s %-s\n", - "mogadm $cmd $subcmdv " . ($scv->{args} || ""), - $scv->{des}); - } - print $ret, "\n"; - if ($verb) { - my $scv = $usage->{$cmd}{subcmd}{$verb}; - foreach my $opt (sort { - (substr($b, 0, 1) cmp substr($a, 0, 1)) || - $a cmp $b - } keys %{$scv->{opts} || {}}) - { - printf(" %-20s %s\n", $opt, $scv->{opts}->{$opt}); - } - print "\n"; - } - exit 1; -} - -sub cmd_help_die { - my ($msg) = @_; - abort_with_topcmd_help($topcmd, $verb, $msg); -} - - -sub text { - return { - - ###################################################################### - cant_find_module => < < "Unable to retrieve domains from tracker(s).\n", - - ###################################################################### - no_devices => "No devices found on tracker(s).\n", - - ###################################################################### - host_add_no_ip => < --ip= [...] -END - - ###################################################################### - no_hosts => < <{$_[0]} || "UNDEFINED [$_[0]]"; -} - -sub fail_text { - print STDERR text($_[0]) . "\n"; - exit 1; -} - -sub fail { - print STDERR $_[0] . "\n"; - exit 1; -} - -sub ok { - if ($opts{verbose}) { - print STDOUT $_[0] . "\n" if $_[0]; - } - exit 0; -} - -sub mogadm { - my $host = shift(); - if ($host) { - $host = [ $host ] unless ref $host; - } else { - $host = $opts{trackers}; - } - # 10 seconds is the max time used for any of the admin locks (fsck status) - # plus we leave a bit of time for work. - my $timeout = 15; - $timeout = $opts{timeout} if $opts{timeout} && $opts{timeout} =~ /^[0-9]+$/; -# $MogileFS::DEBUG = 2; - my $mogadm = MogileFS::Admin->new( hosts => $host, timeout => $timeout ); - fail_text('no_mogadm') unless $mogadm; - return $mogadm; -} - -sub stats { - my $mogadm = shift() || mogadm(); - my $res; - eval { - $res = $mogadm->get_stats(); - }; - return undef if $@; - return $res; -} - -sub hosts_byname { - my $mogadm = shift() || mogadm(); - fail_text('no_mogadm') unless $mogadm; - - my $res; - eval { - $res = _array_to_hashref($mogadm->get_hosts(), 'hostname'); - }; - return undef if $@; - return $res; -} - -sub hosts { - my $mogadm = shift() || mogadm(); - fail_text('no_mogadm') unless $mogadm; - - my $res; - eval { - $res = _array_to_hashref($mogadm->get_hosts(), 'hostid'); - }; - return undef if $@; - return $res; -} - -sub devices { - my $mogadm = shift() || mogadm(); - fail_text('no_mogadm') unless $mogadm; - - my $res; - eval { - $res = _array_to_hashref($mogadm->get_devices(), [ 'hostid', 'devid' ]); - }; - return undef if $@; - return $res; -} - -sub domains { - my $mogadm = shift() || mogadm(); - fail_text('no_mogadm') unless $mogadm; - - my $res; - eval { - $res = $mogadm->get_domains(); - }; - return undef if $@; - return $res; -} - -sub _array_to_hashref { - my ($array, $key) = @_; - die "bad caller to _array_to_hashref\n" - unless $array && $key; - $key = [ $key ] unless ref $key eq 'ARRAY'; - my $kmax = scalar(@$key) - 1; - - # and a dose of handwavium... - my %res; - foreach my $row (@$array) { - my $ref = \%res; - for (my $i = 0; $i <= $kmax; $i++) { - if ($i == $kmax) { - # we're on the last key so just assign into $ref - $ref->{$row->{$key->[$i]}} = $row; - } else { - # not on the last, so keep descending - $ref->{$row->{$key->[$i]}} ||= {}; - $ref = $ref->{$row->{$key->[$i]}}; - } - } - } - - # return result.. duh - return \%res; -} - -__END__ - -=head1 NAME - -mogadm - MogileFS admin tool - -=head1 SYNOPSIS - - $ mogadm [config options] [argument options] - - $ mogadm - .... - (prints contextual help, if missing command/arguments) - ... - -=head1 OPTIONS - -=over 8 - -=item B<--lib=/path/to/lib> - -Set this option to a path to include this directory in the module -search path. - -=item B<--trackers=10.0.0.117:7001,10.0.0.118:7001,...> - -Use these MogileFS trackers for status information. - -=back - -=head1 ARGUMENTS - -=over 8 - -=item B - -Check to ensure that all of the MogileFS system components are functioning -and that we can contact everybody. The quickest way of ensuring that the -entire MogileFS system is functional from the current machine's point of view. - -=item BhostE [host options]> - -=item BhostE [host options]> - -=item BhostE EstatusE> - -=item BhostE> - -=item B - -Functions for manipulating hosts. For add and modify, host options is in -the format of normal command line options and can include anything in the -L section. - -=item BhostE Edevice idE> - -=item BhostE Edevice idE EstatusE> - -=item BhostE EdeviceE [device options]> - -=item BhostE EdeviceE> - -=item B - -Functions for manipulating devices. For add and modify, device options are in -the format of normal command line options and can include anything in the -L section. - -=item BdomainE> - -=item BdomainE> - -=item B - -Simple commands for managing MogileFS domains. Note that you cannot delete -a domain unless it has no classes and is devoid of files. - -=item BdomainE EclassE [class options]> - -=item BdomainE EclassE [class options]> - -=item BdomainE EclassE> - -=item B - -Commands for working with classes. Please see the L section -for the options to use with add/modify. Also, delete requires that the class -have no files in it before it will allow the deletion. - -=item B - -Add/remove slaves replicating from MogileFS master database. - -TODO: detail this - -Run B by itself for contextual help. - -=item B - -=item B - -=item B - -Display or clear the log of fsck events. - -=item B - -Reset fsck position back to the beginning. Please see the L -section for options to use with fsck. - -=item B - -Start (or resume) background fsck from the last checked fid. If you want to -check every fid, you must call B before calling start. - -=item B - -Show the status of the presently active (or last if none active) fsck. This -includes what FIDs are being checked, time statistics, check type as well as a -summary of problems encountered so far. - -=item B - -Stop (pause) background fsck - -=item B - -Display all present MogileFS settings. - -=item BkeyE EvalueE> - -Set the server setting for 'key' to 'value'. - -The current settings are Eenable_rebalanceE (set to 1 to start -rebalance mode to move files to under-used devices) and -Ememcache_serversE (enable memcached caching in the tracker). - -=back - -=head1 HOST OPTIONS - -=over 8 - -=item B<--ip=Eip of hostE> - -=item B<--port=Eport of mogstored on hostE> - -Contact information for the host. This is the minimum set of information needed -to setup a host. - -=item B<--getport=Ealternate retrieval part on hostE> - -If provided, causes the tracker to use this port for retrieving files. Uploads are -still processed at the standard port. - -=item B<--altip=Ealternate IPE> - -=item B<--altmask=Emask to activate alternate IPE> - -If a client request comes in from an IP that matches the alternate mask, then the -host IP is treated as the alternate IP instead of the standard IP. This can be -used, for example, if you have two networks and you need to return one IP to -reach the node on one network, but a second IP to reach it on the alternate -network. - -=item B<--status=Ehost statusE> - -Valid host statuses are one of: alive, down, dead. - -=back - -=head1 DEVICE OPTIONS - -=over 8 - -=item B<--status=Edevice statusE> - -Valid device statuses are one of: alive, dead, down, drain, readonly. - -=item B<--weight=Edevice weight> - -The weight used in calculation of preferred paths. It must be a positive -integer. - -=back - -=head1 CLASS OPTIONS - -=over 8 - -=item B<--mindevcount=EvalueE> - -Number of devices the files in this class should be replicated across. Can be -set to anything >= 1. - -=item B<--replpolicy=EvalueE> - -Stringified replication policy. ie "MultipleHosts(3)" is equivalent to a ---mindevcount=3. See documentation or plugins on alternative policies. - -=back - -=head1 FSCK OPTIONS - -=over 8 - -=item B<--policy-only> - -Check replication policy (assumed locations) only; don't stat storage nodes for -actual file presence. - -=back - -=head1 EXAMPLES - -Host manipulation: - - $ mogadm host list - $ mogadm host add foo.local - $ mogadm host add foo.local --status=down --ip=10.0.0.34 --port=7900 - $ mogadm host mark foo.local down - $ mogadm host modify foo.local --port=7500 - $ mogadm host delete foo.local - -Device manipulation: - - $ mogadm device list - $ mogadm device summary - $ mogadm device summary --status=dead,down - $ mogadm device add foo.local 16 - $ mogadm device add foo.local 17 --status=alive - $ mogadm device mark foo.local 17 down - $ mogadm device modify foo.local 17 --status=alive --weight=10 - $ mogadm device delete foo.local 17 - -Domain manipulation: - - $ mogadm domain list - $ mogadm domain add first.domain - $ mogadm domain delete first.domain - -Class manipulation - - $ mogadm class list - $ mogadm class add first.domain my.class - $ mogadm class add first.domain my.class --mindevcount=3 - $ mogadm class add first.domain my.class --replpolicy="MultipleHosts(3)" - $ mogadm class modify first.domain my.class --mindevcount=2 - $ mogadm class modify first.domain my.class --replpolicy="MultipleHosts(3)" - $ mogadm class delete first.domain my.class - -Check the status of your entire MogileFS system: - - $ mogadm check - -Check every file in the entire MogileFS system: - - $ mogadm fsck reset - $ mogadm fsck start - $ mogadm fsck status - $ mogadm fsck printlog - -See all the things mogadm can do: - - $ mogadm - -Get help on a sub-command: - - $ mogadm device - - -=head1 CONFIGURATION - -It is recommended that you create a configuration file such as C (or at C<~/.mogilefs.conf>) to -be used for configuration information. Basically all you need is something like: - - trackers = 10.0.0.23:7001, 10.0.0.15:7001 - - # if MogileFS::Admin files aren't installed in standard places: - lib = /home/mogilefs/cgi-bin - -Note that these can also be specified on the command line, as per above. - -=head1 AUTHOR - -Brad Fitzpatrick ELE - -Mark Smith ELE - -Leon Brocard ELE, open sourced permissions from Foxtons Ltd. - -Robin H. Johnson Erobbat2@orbis-terrarum.netE - -=head1 BUGS - -Please report any on the MogileFS mailing list: L. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut diff --git a/mogdelete b/mogdelete deleted file mode 100755 index 8bdd898..0000000 --- a/mogdelete +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/perl - -=head1 NAME - -mogdelete -- Delete keys from a MogileFS installation - -=head1 SYNOPSIS - - $ mogdelete --trackers=host --domain=foo --key="/hello.jpg" - -=head1 OPTIONS - -=over - -=item --trackers=host1:7001,host2:7001 - -Use these MogileFS trackers to negotiate with. - -=item --domain= - -Set the MogileFS domain to use. - -=item --key="" - -A key to delete. Can be an arbitrary string. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -Produces a slightly weird error if key does not exist. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -use strict; -use warnings; - -use lib './lib'; -use MogileFS::Utils; - -my $util = MogileFS::Utils->new; -my $usage = "--trackers=host --domain=foo --key='/hello.jpg'"; -my $c = $util->getopts($usage, 'key=s'); - -my $mogc = $util->client; - -$mogc->delete($c->{key}); -if ($mogc->errcode) { - print STDERR "Error deleting file: ", $mogc->errstr, "\n"; -} diff --git a/mogfetch b/mogfetch deleted file mode 100755 index e3cb006..0000000 --- a/mogfetch +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/perl - -=head1 NAME - -mogfetch -- Fetch data from a MogileFS installation - -=head1 SYNOPSIS - - $ mogfetch [options] - $ mogfetch [options] --file="-" > filename - - $ mogfetch --trackers=host --domain=foo \ - --key="/hello.jpg" --file="output.jpg" - -=head1 OPTIONS - -=over - -=item --trackers=host1:7001,host2:7001 - -Use these MogileFS trackers to negotiate with. - -=item --domain= - -Set the MogileFS domain to use. - -=item --key="" - -The key to locate the data with. Can be an arbitrary string. - -=item --file="" - -A local destination file. If '-', data is written to STDOUT instead. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -None known. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -use strict; -use warnings; - -use lib './lib'; -use MogileFS::Utils; - -my $util = MogileFS::Utils->new; -my $usage = "--trackers=host --domain=foo --key='/hello.jpg' --file='./output'"; -my $c = $util->getopts($usage, qw/key=s file=s/); - -my $mogc = $util->client; - -# Default to noverify, don't hang up the tracker. We'll try all paths. -my @paths = $mogc->get_paths($c->{key}, { noverify => 1 }); -if ($mogc->errcode) { - die "Error fetching paths: " . $mogc->errstr; -} - -die "No paths found or key does not exist" unless @paths; - -my $filename = $c->{file}; -my @resses; -for my $path (@paths) { - next unless $path; # overparanoid? - my $ua = LWP::UserAgent->new; - $ua->timeout(10); - - my $file; - if ($filename eq '-') { - $file = *STDOUT; - } else { - open($file, "> $filename") or die "Could not open " . $filename; - } - - my $writeout = sub { - print $file $_[0]; - }; - my $res = $ua->get($path, ':content_cb' => $writeout, - ':read_size_hint' => 32768); - - if ($res->is_success) { - last; - } else { - # print all the errors to be the most helpful - push(@resses, $res); - next; - } -} - -if (@resses) { - for my $res (@resses) { - print STDERR "Got errors while trying to fetch:\n"; - print STDERR $res->status_line, "\n"; - } -} diff --git a/mogfiledebug b/mogfiledebug deleted file mode 100755 index 5c9c162..0000000 --- a/mogfiledebug +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/perl - -=head1 NAME - -mogfiledebug -- Dump gobs of information about a FID - -=head1 SYNOPSIS - - $ mogfiledebug --trackers=host --domain=foo --key=bar - $ mogfiledebug --trackers=host --fid=1234 - -=head1 DESCRIPTION - -Utility for troubleshooting problemic files in a mogilefs cluster. Also useful -for verification or testing new setups. - -Finds as much information about a file as it can. All of the paths, any queues -it might be sitting in, etc. Will then test all of the paths, MD5 hash their -contents, and check the file lengths. If you see errors about a FID in -mogilefsd's logs plugging it through mogfiledebug should illuminate most of -the potential issues. - -This is also useful information for posting to the mailing list, along with -the error you had. - -=head1 OPTIONS - -=over - -=item --trackers=host1:7001,host2:7001 - -Use these MogileFS trackers to negotiate with. - -=item --domain= - -Set the MogileFS domain to use. - -=item --key="" - -The key to inspect. Can be an arbitrary string. - -=item --fid= - -A numeric fid to inspect. Provide this as an alternative to a domain/key -combination. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -None known. Could use more helpful prints, or a longer troubleshooting manual. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -use strict; -use warnings; - -use lib './lib'; -use MogileFS::Utils; -use Digest::MD5; -use LWP::UserAgent; - -my $util = MogileFS::Utils->new; -my $usage = "--trackers=host --domain=foo --key='/hello.jpg'"; -# FIXME: add "nofetch" mode that just prints paths? -my $c = $util->getopts($usage, qw/key=s fid=i/); - -my $mogc = $util->client; - -my $arg = $c->{fid} ? 'fid' : 'key'; -my $details = $mogc->file_debug($arg => $c->{$arg}); -if ($mogc->errcode) { - die "Error fetching fid info: " . $mogc->errstr; -} - -my %parts = (); -my @paths = grep { $_ =~ m/^devpath_/ } keys %$details; -while (my ($k, $v) = each %$details) { - next if $k =~ m/^devpath_/; - if ($k =~ s/^(\w+)_//) { - $parts{$1}->{$k} = $v; - } -} - -# If no paths, print something about that. -if (@paths) { - my @results; - # For each actual path, fetch and calculate the MD5SUM. - print "Fetching and summing paths...\n"; - for my $key (@paths) { - my $path = $details->{$key}; - push(@results, fetch_path($path)); - } - my $hash; # detect if hashes don't match - my $len = $parts{fid}->{length}; - print "No length, cannot verify content length" unless defined $len; - # No I don't have a good excuse for why this isn't one loop. - for my $res (@results) { - print "Results for path: ", $res->{path}, "\n"; - $hash = $res->{hash} unless $hash; - if ($hash ne $res->{hash}) { - print " - ERROR: Hash does not match first path!\n"; - } - if (defined $len && $len != $res->{length}) { - print " - ERROR: Length does not match file row!\n"; - } - print " - Hash: ", $res->{hash}, "\n"; - print " - Length: ", $res->{length}, "\n"; - print " - HTTP result: ", $res->{res}, "\n"; - } -} else { - print "No valid-ish paths found\n"; -} - -# print info from all of the queues. Raw is fine? failcount/etc. -print "Tempfile and/or queue rows...\n"; -my $found = 0; -for my $type (qw/tempfile replqueue delqueue rebqueue fsckqueue/) { - my $part = $parts{$type}; - next unless (defined $part); - $found++; - printf("- %12s\n", $type); - while (my ($k, $v) = each %$part) { - printf(" %20s: %20s\n", $k, $v); - } -} -print "none.\n" unless $found; - -# Print rest of file info like file_info -if (my $fid = $parts{fid}) { - print "- File Row:\n"; - for my $item (sort keys %$fid) { - printf(" %8s: %20s\n", $item, $fid->{$item}); - } -} else { - print qq{- ERROR: No file row was found! -File may have been deleted or never closed. -See above for any matching rows from tempfile or delqueue. -}; -} - -if (my $devids = $details->{devids}) { - print " - Raw devids: ", $devids, "\n"; -} - -sub fetch_path { - my $path = shift; - my $ua = LWP::UserAgent->new; - my $ctx = Digest::MD5->new; - $ua->timeout(10); - my %toret = (); - - my $sum_up = sub { - $toret{length} += length($_[0]); - $ctx->add($_[0]); - }; - my $res = $ua->get($path, ':content_cb' => $sum_up, - ':read_size_hint' => 32768); - - $toret{hash} = $ctx->hexdigest; - $toret{res} = $res->status_line; - $toret{path} = $path; - return \%toret; -} diff --git a/mogfileinfo b/mogfileinfo deleted file mode 100755 index 2fd5961..0000000 --- a/mogfileinfo +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/perl - -=head1 NAME - -mogfileinfo -- Fetch key metadata from a MogileFS installation - -=head1 SYNOPSIS - - $ mogfileinfo --trackers=host --domain=foo --key="/hello.jpg" - -=head1 OPTIONS - -=over - -=item --trackers=host1:7001,host2:7001 - -Use these MogileFS trackers to negotiate with. - -=item --domain= - -Set the MogileFS domain to use. - -=item --key="" - -The key to inspect. Can be an arbitrary string. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -None known, but output might change in the future. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -use strict; -use warnings; - -use lib './lib'; -use MogileFS::Utils; - -my $util = MogileFS::Utils->new; -my $usage = "--trackers=host --domain=foo --key='/hello.jpg'"; -my $c = $util->getopts($usage, 'key=s'); - -my $mogc = $util->client; - -my $fid = $mogc->file_info($c->{key}); -if ($mogc->errcode) { - die "Error fetching file info: " . $mogc->errstr; -} -die "Key not found: " . $c->{key} unless $fid; - -# Might replace this with just fetching the devids from above... -my @paths = $mogc->get_paths($c->{key}, { noverify => 1, pathcount => 99 }); -if ($mogc->errcode) { - die "Error fetching paths: " . $mogc->errstr; -} -die "No paths found or key does not exist" unless @paths; - -print "- file: ", $c->{key}, "\n"; -for my $item (sort keys %$fid) { - printf(" %8s: %20s\n", $item, $fid->{$item}); -} - -for my $path (@paths) { - print " - ", $path, "\n"; -} diff --git a/moglistfids b/moglistfids deleted file mode 100755 index b11f2d1..0000000 --- a/moglistfids +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/perl - -=head1 NAME - -moglistfids -- Iterate fid/key data from a MogileFS installation - -=head1 DESCRIPTION - -Example utility for pulling all file data out of a MogileFS installation. -Utilities like this can be built on for creating backup systems, -Mogile<->Mogile syncronization systems, or Mogile->S3 syncronization. - -This method is only a way of pulling new files which have existed since the -last time it was checked, as there's no logging of deleted files. - -=head1 OPTIONS - -=over - -=item --trackers=host1:7001,host2:7001 - -Use these MogileFS trackers to negotiate with. - -=item --fromfid= - -The highest numbered fid fetched the last time this utility was run. - -=item --count= - -Numer of fids to inspect and return. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -None known. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -use strict; -use warnings; - -use MogileFS::Admin; -use lib './lib'; -use MogileFS::Utils; - -my $util = MogileFS::Utils->new; -my $usage = "--trackers=host --fromfid=123 --count=5000"; -my $c = $util->getopts($usage, qw/fromfid=i count=i/); - -my $moga = MogileFS::Admin->new(hosts => $c->{trackers}); - -my $fromfid = $c->{fromfid} || 0; -my $count = $c->{count} || 100; - -while ($count) { - # Try to fetch the max, but we will likely get less. - my $fids_chunk = $moga->list_fids($fromfid, $count); - if ($moga->errcode) { - die "Error listing fids: ", $moga->errstr, "\n"; - } - my @fids = sort { $a <=> $b } keys %$fids_chunk; - last unless @fids; - $fromfid = $fids[-1]; - $count -= @fids; - for my $fid (@fids) { - my $file = $fids_chunk->{$fid}; - print "fid ", $fid, "\n"; - for my $key (sort keys %$file) { - my $val = $file->{$key}; - $val = _escape_url_string($val) if $key eq 'dkey'; - print $key, " ", $val, "\n"; - } - print "\n"; - } -} - -sub _escape_url_string { - my $str = shift; - $str =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg; - $str =~ tr/ /+/; - return $str; -} diff --git a/moglistkeys b/moglistkeys deleted file mode 100755 index 6c6242b..0000000 --- a/moglistkeys +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/perl - -=head1 NAMe - -moglistkeys -- Lists keys out of a MogileFS domain - -=head1 SYNOPSIS - - $ moglistkeys --trackers=host --domain=foo --key_prefix="/foo/bar/" - -=head1 DESCRIPTION - -If you store your MogileFS keys in a logical "structure", you may use this -tool to view lists of subsets of keys. Note that this is not going to be -equivalent to "cd" and "ls" tools, as listing "foo/" will list everything -underneath, so it's more akin to "ls -R" - -=head1 OPTIONS - -=over - -=item --trackers=host1:7001,host2:7001 - -Use these MogileFS trackers to negotiate with. - -=item --domain= - -Set the MogileFS domain to use. - -=item --key_prefix="/foo/bar/" - -Search for keys starting with this prefix. Can be an arbitrary string. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -None known. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -# TODO: Add ways to limit # of keys displayed - -use strict; -use warnings; - -use lib './lib'; -use MogileFS::Utils; - -my $util = MogileFS::Utils->new; -my $usage = "--trackers=host --domain=foo --key_prefix='bar/'"; -my $c = $util->getopts($usage, 'key_prefix=s'); - -my $mogc = $util->client; - -$mogc->foreach_key(prefix => $c->{key_prefix}, sub { - my $key = shift; - print $key, "\n"; -}); - -if ($mogc->errcode) { - print STDERR "Error listing files: ", $mogc->errstr, "\n"; -} diff --git a/mogstats b/mogstats deleted file mode 100755 index 2566fc8..0000000 --- a/mogstats +++ /dev/null @@ -1,600 +0,0 @@ -#!/usr/bin/perl -# vim:ts=4 sw=4 ft=perl et: -# TODO: -# YAML display mode -# timing output during fetch -# support different db backends better (something mildly generic). - -=head1 NAME - -mogstats -- Utility for calculating slow stats directly against a MogileFS DB - -=head1 SYNOPSIS - - $ mogstats --db_dsn="DBI:mysql:mfs:host=mfshost" --db_user="mfs" \ - --db_pass="mfs" --verbose --stats="devices,files" - $ mogstats --stats="all" - $ mogstats [all options in ~/.mogilefs.conf] - -=head1 DESCRIPTION - -Utility for inspecting queues and running general statistics against a -MogileFS database. Some of these stats can take a very long time to run -against a large instance, so the utility can be pointed at a read slave or -special account. - -=head1 OPTIONS - -=over - -=item --db_dsn= - -The DSN to use for connecting to the MogileFS database server. - -=item --db_user= - -A database user for connecting to the database. - -=item --db_pass= - -An optional password for the database user. - -=item --config= - -An explicit config file to use. By default /etc/mogilefs/mogilefs.conf and -~/.mogilefs.conf are checked. - -=item --verbose - -Print some extra text during processing. Mostly notes about what stats are -starting or finishing. - -=item --stats= - -A list of which statistics to calculate. Notes on some of them are listed -below, see --help for full list. A value of "all" fetches all possible stats. - -=item --help - -List usage info and supported statistics. - -=back - -=head1 AVAILABLE STATISTICS - -Contains notes on which stats may be fast or slow. - -=over - -=item devices - -Lists count of files and current database status per-device. Can be very slow. - -=item fids - -Lists the current highest file id. Should be fast. - -=item files - -Gives a breakdown of where files are by domain and class. Displays the size of -all stored files pre-replication, as well as post-replication size. The latter -being closer to the actual storage amount. Can be very slow. - -=item domains - -Shows a simple count of where files are by domain and class. Faster than using -"files" but displays less information. - -=item replication - -Displays a breakdown of devcount per domain/class combo. Shows number of files -in domain "foo" with class "bar" that have a current devcount of 0, 1, 2, 3, -etc. Useful for spotting broken files (devcount 0), replication lag, or over -replication bugs. Can be very slow. - -=item replication-queue - -Quick breakdown of how many fids are due for replication. Fids listed as -"manual" need manual intervention before they can be replicated, and could be -broken. Will be fast unless there are many files in queue. - -=item delete-queue - -Similar to replication-queue. - -=item general-queue - -Displays a breakdown of what's in the general queue. This includes FSCK, -Rebalance, and other temporary queueing systems MogileFS has. Should be fast -unless you have configured MogileFS to queue many fids at once. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -None known. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -use strict; -use warnings; -use DBI; -use Getopt::Long; - -# FIXME: decide how to share constants between utils and server. -use constant ENDOFTIME => 2147483647; -my %QUEUES = ( 1 => 'FSCK_QUEUE', 2 => 'REBAL_QUEUE' ); -my %valid_stats = map { $_ => 1 } qw/devices fids files domains replication replication-queue delete-queue general-queues all/; - -my $DBH_CACHE = ''; -my $DB_TYPE = ''; - -my %opts; - -# FIXME: Use MogileFS::Utils for configuration junk. -Getopt::Long::Configure("require_order", "pass_through"); -GetOptions( - "config=s" => \$opts{config}, - "lib=s" => \$opts{lib}, - "help" => \$opts{help}, - "verbose" => \$opts{verbose}, - "stats=s" => \$opts{stats}, - "db_dsn=s" => \$opts{db_dsn}, - "db_user=s" => \$opts{db_user}, - "db_pass=s" => \$opts{db_pass}, - ) or abort_with_usage(); -Getopt::Long::Configure("require_order", "no_pass_through"); - -my @configs = ($opts{config}, "$ENV{HOME}/.mogilefs.conf", "/etc/mogilefs/mogilefs.conf"); -foreach my $fn (reverse @configs) { - next unless $fn && -e $fn; - open FILE, "<$fn" - or die "unable to open $fn: $!\n"; - while () { - s/\#.*//; - next unless m/^\s*(\w+)\s*=\s*(.+?)\s*$/; - $opts{$1} = $2 unless ( defined $opts{$1} ); - } - close FILE; -} - -abort_with_usage() if $opts{help}; - -cmd_stats($opts{stats}); - -sub abort_with_usage { - my $message = shift; - print "ERROR: $message\n\n" if $message; - - print qq{Usage: - mogstats --db_dsn="DBI:mysql:mfs:host=mfshost" --db_user="mfs" \ - --db_pass="mfs" --verbose --stats="devices,files" - mogstats --stats="all" - mogstats [all options in ~/.mogilefs.conf] - -}; - print "valid stats: ", join(', ', sort keys %valid_stats), "\n"; - exit 1; -} - -sub cmd_stats { - my $args = shift; - $args = 'all' unless $args; - my %args = map { $_ => 1 } split(/,/, $args); - for my $arg (keys %args) { - abort_with_usage("Invalid stat $arg") unless $valid_stats{$arg}; - } - - print "Fetching statistics... ($args)\n"; - my $stats = stats_from_db(\%args) - or die "Can't fetch stats"; - - if ($args{devices} || $args{all}) { - print "\nStatistics for devices...\n"; - printf " %-10s %-15s %12s %10s\n", "device", "host", "files", "status"; - printf " ---------- ---------------- ------------ ----------\n"; - foreach my $device (sort { $a <=> $b } keys %{$stats->{devices}}) { - my $value = $stats->{devices}->{$device}; - printf " %-10s %-10s %10s %10s\n", "dev$device", $value->{host}, $value->{files}, $value->{status}; - } - printf " ---------- ---------------- ------------ ----------\n"; - } - - if ($args{fids} || $args{all}) { - print "\nStatistics for file ids...\n"; - printf " Max file id: %s\n", $stats->{fids}->{max} || 'none'; - } - - if ($args{files} || $args{all}) { - print "\nStatistics for files...\n"; - printf " %-20s %-10s %10s %11s %13s\n", 'domain', 'class', 'files', - 'size (m)', 'fullsize (m)'; - printf " -------------------- ----------- ---------- ----------- -------------\n"; - foreach my $domain (sort keys %{$stats->{files}}) { - my $classes = $stats->{files}->{$domain}; - foreach my $class (sort keys %$classes) { - my $stat = $classes->{$class}; - my $files = $stat->[2]; - my $size = int($stat->[3] / 1024 / 1024); - my $total = int($stat->[4] / 1024 / 1024); - printf " %-20s %-10s %10s %11s %13s\n", $domain, $class, - $files, $size, $total; - } - } - printf " -------------------- ----------- ---------- ----------- -------------\n"; - } - - if ($args{domains} && !($args{files} || $args{all})) { - print "\nStatistics for domains...\n"; - printf " %-20s %-10s %10s\n", 'domain', 'class', 'files'; - printf " -------------------- ----------- ----------\n"; - foreach my $domain (sort keys %{$stats->{domains}}) { - my $classes = $stats->{domains}->{$domain}; - foreach my $class (sort keys %$classes) { - my $files = $classes->{$class}; - printf " %-20s %-10s %10s\n", $domain, $class, $files; - } - } - printf " -------------------- ----------- ----------\n"; - } - - if ($args{replication} || $args{all}) { - print "\nStatistics for replication...\n"; - printf " %-20s %-10s %10s %10s\n", 'domain', 'class', 'devcount', 'files'; - printf " -------------------- ----------- ---------- ----------\n"; - foreach my $domain (sort keys %{$stats->{replication}}) { - my $classes = $stats->{replication}->{$domain}; - foreach my $class (sort keys %$classes) { - my $devcounts = $classes->{$class}; - foreach my $devcount (sort { $a <=> $b } keys %$devcounts) { - my $files = $devcounts->{$devcount}; - printf " %-20s %-10s %10s %10s\n", $domain, $class, $devcount, $files; - } - } - } - printf " -------------------- ----------- ---------- ----------\n"; - } - # Now new stats. - if ($args{'replication-queue'} || $args{all}) { - print "\nStatistics for replication queue...\n"; - printf " %-20s %12s\n", 'status', 'count'; - printf " -------------------- ------------\n"; - for my $status (sort keys %{$stats->{to_replicate}}) { - my $files = $stats->{to_replicate}->{$status}; - printf " %-20s %12s\n", $status, $files; - } - printf " -------------------- ------------\n"; - - } - - if ($args{'delete-queue'} || $args{all}) { - print "\nStatistics for delete queue...\n"; - printf " %-20s %12s\n", 'status', 'count'; - printf " -------------------- ------------\n"; - for my $status (sort keys %{$stats->{to_delete}}) { - my $files = $stats->{to_delete}->{$status}; - printf " %-20s %12s\n", $status, $files; - } - printf " -------------------- ------------\n"; - - } - - if ($args{'general-queues'} || $args{all}) { - print "\nStatistics for general queues...\n"; - printf " %-15s %-20s %12s\n", 'queue', 'status', 'count'; - printf " --------------- -------------------- ------------\n"; - for my $queue (sort keys %{$stats->{queue}}) { - my $status = $stats->{queue}->{$queue}; - for my $stat (sort keys %{$status}) { - my $files = $status->{$stat}; - printf " %-15s %-20s %12s\n", $queue, $stat, $files; - } - } - printf " --------------- -------------------- ------------\n"; - } - - print "\ndone\n"; -} - -sub get_dbh { - return $DBH_CACHE if ($DBH_CACHE && $DBH_CACHE->ping); - $DBH_CACHE = DBI->connect($opts{db_dsn}, $opts{db_user}, $opts{db_pass}, { - PrintError => 0, - AutoCommit => 1, - RaiseError => 1, - }) or die "Failed to connect to database: " . DBI->errstr; - my $dsn = $opts{db_dsn}; - if ($dsn =~ /^DBI:mysql:/i) { - $DB_TYPE = "MySQL"; - } elsif ($dsn =~ /^DBI:SQLite:/i) { - $DB_TYPE = "SQLite"; - } elsif ($dsn =~ /^DBI:Pg:/i) { - $DB_TYPE = "Postgres"; - } else { - die "Unknown database type: $dsn"; - } - return $DBH_CACHE; -} - -sub stats_from_db { - my $args = shift; - - # get database handle - my $ret = {}; - my $dbh = get_dbh() or die "Could not get database handle"; - - # get names of all domains and classes for use later - my %classes; - my $rows; - - $rows = $dbh->selectall_arrayref('SELECT d.dmid, d.namespace, c.classid, c.classname ' . - 'FROM domain d LEFT JOIN class c ON c.dmid=d.dmid'); - - foreach my $row (@$rows) { - $classes{$row->[0]}->{name} = $row->[1]; - $classes{$row->[0]}->{classes}->{$row->[2] || 0} = $row->[3] || 'default'; - } - $classes{$_}->{classes}->{0} = 'default' - foreach keys %classes; - - # get host and device information with device status - my %devices; - $rows = $dbh->selectall_arrayref('SELECT device.devid, hostname, device.status ' . - 'FROM device, host WHERE device.hostid = host.hostid'); - foreach my $row (@$rows) { - $devices{$row->[0]}->{host} = $row->[1]; - $devices{$row->[0]}->{status} = $row->[2]; - } - my %globals = ( classes => \%classes, devices => \%devices ); - - # if they want replication counts, or didn't specify what they wanted - if ($args->{replication} || $args->{all}) { - $ret->{replication} = stats_for_replication(\%globals); - } - - # Stats about the replication queue (deferred, overdue) - if ($args->{'replication-queue'} || $args->{all}) { - $ret->{to_replicate} = stats_for_to_replicate(\%globals); - } - - # Stats about the delete queue (deferred, overdue) - if ($args->{'delete-queue'} || $args->{all}) { - $ret->{to_delete} = stats_for_to_delete(\%globals); - } - - # file statistics (how many files there are and in what domains/classes) - if ($args->{files} || $args->{all}) { - $ret->{files} = stats_for_files(\%globals); - } - - # domain statistics (how many files per domain, faster than file stats) - if ($args->{domains} && !($args->{files} || $args->{all})) { - $ret->{domains} = stats_for_domains(\%globals); - } - - # device statistics (how many files are on each device) - if ($args->{devices} || $args->{all}) { - $ret->{devices} = stats_for_devices(\%globals); - } - - # now fid statistics - if ($args->{fids} || $args->{all}) { - verbose("... fid stats..."); - my $max = $dbh->selectrow_array('SELECT MAX(fid) FROM file'); - $ret->{fids} = { max => $max }; - verbose("... done"); - } - - if ($args->{'general-queues'} || $args->{all}) { - $ret->{queue} = stats_for_to_queue(\%globals); - } - - return $ret; -} - -sub stats_for_devices { - my $globals = shift; - my %classes = %{$globals->{classes}}; - my %devices = %{$globals->{devices}}; - my $dbh = get_dbh() or die "Could not get database handle"; - - verbose("... per-device stats..."); - my $stats = $dbh->selectall_arrayref('SELECT devid, COUNT(devid) FROM file_on GROUP BY 1'); - my $devs = {}; - for my $stat (@$stats) { - my $host = $devices{$stat->[0]}->{host}; - my $status = $devices{$stat->[0]}->{status}; - $devs->{$stat->[0]} = { - host => $host, - status => $status, - files => $stat->[1], - }; - } - verbose("... done"); - return $devs; -} - -sub stats_for_files_sql { - my $sql = 'SELECT dmid, classid, COUNT(classid), sum(length), sum(length::int4 * devcount::int4) FROM file GROUP BY 1, 2'; - return $sql if ($DB_TYPE eq 'Postgres'); - $sql =~ s/::int4//g; - return $sql; -} - -sub stats_for_files { - my $globals = shift; - my %classes = %{$globals->{classes}}; - my %devices = %{$globals->{devices}}; - my $dbh = get_dbh() or die "Could not get database handle"; - - verbose("... files stats..."); - my $stats = $dbh->selectall_arrayref(stats_for_files_sql()); - my $files = {}; - for my $stat (@$stats) { - my $domain = $classes{$stat->[0]}->{name}; - my $class = $classes{$stat->[0]}->{classes}->{$stat->[1]}; - $files->{$domain}->{$class} = $stat; - } - verbose("... done"); - return $files; -} - -sub stats_for_domains { - my $globals = shift; - my %classes = %{$globals->{classes}}; - my %devices = %{$globals->{devices}}; - my $dbh = get_dbh() or die "Could not get database handle"; - - verbose("... domains stats..."); - my $stats = $dbh->selectall_arrayref('SELECT dmid, classid, COUNT(classid) FROM file GROUP BY 1, 2'); - my $files = {}; - for my $stat (@$stats) { - my $domain = $classes{$stat->[0]}->{name}; - my $class = $classes{$stat->[0]}->{classes}->{$stat->[1]}; - $files->{$domain}->{$class} = $stat->[2]; - } - verbose("... done"); - return $files; -} - -sub stats_for_replication { - my $globals = shift; - my %classes = %{$globals->{classes}}; - my %devices = %{$globals->{devices}}; - my $dbh = get_dbh() or die "Could not get database handle"; - - verbose("... replication stats..."); - # replication stats - # This is the old version that used devcount: - my @stats = get_stats_files_per_devcount(); - - my $repl = {}; - for my $stat (@stats) { - my $domain = $classes{$stat->{dmid}}->{name}; - my $class = - $classes{$stat->{dmid}}->{classes}->{$stat->{classid}}; - $repl->{$domain}->{$class}->{$stat->{devcount}} = $stat->{count}; - } - - verbose("... done"); - return $repl; -} - -sub stats_for_to_queue { - my $ret = {}; - my $dbh = get_dbh() or die "Could not get database handle"; - - verbose("... queue stats..."); - my $db_time = $dbh->selectrow_array('SELECT '. unix_timestamp()); - my $stats = $dbh->selectall_arrayref('SELECT type, nexttry, COUNT(*) FROM file_to_queue GROUP BY 1, 2'); - for my $stat (@$stats) { - my $qname = $QUEUES{$stat->[0]} || "UNKNOWN_QUEUE"; - if ($stat->[1] < 1000) { - my $name = { 0 => 'new', 1 => 'redo' }->{$stat->[1]} || - "unknown"; - $ret->{$qname}->{"$name"} += $stat->[2]; - } elsif ($stat->[1] == ENDOFTIME) { - $ret->{$qname}->{"manual"} = $stat->[2]; - } elsif ($stat->[0] < $db_time) { - $ret->{$qname}->{"overdue"} += $stat->[2]; - } else { - $ret->{$qname}->{"deferred"} += $stat->[2]; - } - } - verbose("... done"); - return $ret; -} - -# TODO: See how much of this code is collapsable... -sub stats_for_to_delete { - my $ret = {}; - my $dbh = get_dbh() or die "Could not get database handle"; - - verbose("... delete queue stats..."); - my $db_time = $dbh->selectrow_array('SELECT '. unix_timestamp()); - my $stats = $dbh->selectall_arrayref('SELECT nexttry, COUNT(*) FROM file_to_delete2 GROUP BY 1'); - for my $stat (@$stats) { - if ($stat->[0] < 1000) { - my $name = { 0 => 'new', 1 => 'redo' }->{$stat->[0]} || - "unknown"; - $ret->{$name} += $stat->[1]; - } elsif ($stat->[0] == ENDOFTIME) { - $ret->{manual} = $stat->[1]; - } elsif ($stat->[0] < $db_time) { - $ret->{overdue} += $stat->[1]; - } else { - $ret->{deferred} += $stat->[1]; - } - } - verbose("... done"); - return $ret; -} - -sub stats_for_to_replicate { - my $ret = {}; - my $dbh = get_dbh() or die "Could not get database handle"; - - # now we want to do the "new" replication stats - verbose("... replication queue stats..."); - my $db_time = $dbh->selectrow_array('SELECT '. unix_timestamp()); - my $stats = $dbh->selectall_arrayref('SELECT nexttry, COUNT(*) FROM file_to_replicate GROUP BY 1'); - foreach my $stat (@$stats) { - if ($stat->[0] < 1000) { - # anything under 1000 is a specific state, so let's define those. here's the list - # of short names to describe them. - my $name = { - 0 => 'newfile', # new files that need to be replicated - 1 => 'redo', # files that need to go through replication again - }->{$stat->[0]} || "unknown"; - - # now put it in the output hashref. note that we do += because we might - # have more than one group of unknowns. - $ret->{"$name"} += $stat->[1]; - - } elsif ($stat->[0] == ENDOFTIME) { - $ret->{"manual"} = $stat->[1]; - - } elsif ($stat->[0] < $db_time) { - $ret->{"overdue"} += $stat->[1]; - - } else { - $ret->{"deferred"} += $stat->[1]; - } - } - verbose("... done"); - return $ret; -} - -# FIXME: This is obviously MySQL-only. -sub unix_timestamp { - if ($DB_TYPE eq 'MySQL') { - return "UNIX_TIMESTAMP()"; - } elsif ($DB_TYPE eq 'Postgres') { - return "EXTRACT(epoch FROM NOW())::int4"; - } elsif ($DB_TYPE eq 'SQLite') { - return "strftime('%s','now')"; - } -} - -sub get_stats_files_per_devcount { - my $dbh = get_dbh(); - my @ret; - my $sth = $dbh->prepare('SELECT dmid, classid, devcount, COUNT(devcount) AS "count" FROM file GROUP BY 1, 2, 3'); - $sth->execute; - while (my $row = $sth->fetchrow_hashref) { - push @ret, $row; - } - return @ret; -} - -sub verbose { - print $_[0], "\n" if $opts{verbose}; -} diff --git a/mogtool b/mogtool deleted file mode 100755 index 0b6c333..0000000 --- a/mogtool +++ /dev/null @@ -1,1535 +0,0 @@ -#!/usr/bin/perl -# vim:ts=4 sw=4 et ft=perl: -eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' - if 0; # not running under some shell -############################################################################ - -=head1 NAME - -mogtool -- Inject/extract data to/from a MogileFS installation - -B: this utility is deprecated! See L - -=head1 SYNOPSIS - - $ mogtool [general-opts] [command-opts] - - $ mogtool --trackers=127.0.0.1:6001 --domain=foo --class=bar ... - $ mogtool --conf=foo.conf ... - - $ mogtool inject thefile.tgz thefilekey - $ mogtool inject --bigfile thebigfile.tgz thefilekey - $ mogtool inject --bigfile --gzip thebigfile.tar thefilekey - $ mogtool inject --bigfile --gzip mydirectory thedirkey - $ mogtool inject --bigfile --gzip /dev/hda4 thedevkey - $ mogtool inject --nobigfile bigcontiguousfile bigcfilekey - - $ mogtool inject --bigfile --gzip --verify \ - --description="Description" \ - --receipt="foo@bar.com, baz@bar.com" \ - --concurrent=5 --chunksize=32M \ - somehugefile thefilekey - - $ mogtool extract thefilekey thenewfile.tgz - $ mogtool extract thefilekey - - $ mogtool extract --bigfile thedirkey . - $ mogtool extract --bigfile --asfile thedirkey thefile.tgz - $ mogtool extract --bigfile thedevkey /dev/hda4 - - $ mogtool delete thekey - - $ mogtool locate --noverify thekey - $ mogtool locate --bigfile thekey - - $ mogtool list - $ mogtool listkey key - -=head1 GENERAL OPTIONS - -=over 4 - -=item --debug - -Turn on MogileFS debug output. - -=item --trackers=<[preferred_ip/]ip:port>[,<[preferred_ip/]ip:port>]* - -Specify one or more trackers for your MogileFS installation. Note that -you can specify preferred IPs to override the default IPs with. So it -would look something like B<10.10.0.1/10.0.0.1:8081>. - -=item --domain= - -Set the MogileFS domain to use. - -=item --class= - -Set the class within the domain to use. Defaults to _default. - -=item --conf= - -Specify a configuration file to load from. - -=item --lib= - -Specify a directory to use as a library path. Right now, this should -be the directory where you expect to find the MogileFS.pm file, if it's -not actually installed. - -=back - -=head1 COMMANDS - -=over 4 - -=item inject|i - -Insert a resource into MogileFS. See L and L -for the rest of how to use the inject mode. - -=item extract|x - -Extract a resource from MogileFS. See L and L -for how to use extract. - -=item delete|rm - -Delete a resource. See L and L. - -=item locate|lo key - -List the paths to the file identified by the given key. - -=item list|ls - -List all big files contained in MogileFS. No options, no arguments. - -=item listkey|lsk key - -List all files which match the key. Key is just a prefix, and this will list -all keys which match the prefix. So if you specify key as "ABC1" then you'll -get all keys which start with the characters "ABC1" - -=back - -=head1 INJECT OPTIONS - -The following options are used to control the behavior of the injector. - -=over 4 - -=item --bigfile|-b - -If specified, use chunking to break the resource into manageable pieces. - -=item --chunksize=[B|K|M|G] - -When instructed to break files into chunks, the injector will use the specified -chunk size as the maximum chunk size. Defaults to 64M. You can specify the -chunk size manually and specify the units--defaults to bytes. - -=item --gzip|-z - -If specified, mogtool will gzip the data as it's going into MogileFS. The resource -will be marked as compressed. - -Note that you do not need to specify this if the resource is already gzipped, but -it doesn't hurt. (We automatically detect that and mark it as compressed.) - -=item --overwrite - -If you previously were working on injecting a big file as chunks and the process -died, normally mogtool refuses to do it again. Specify this option to force the -overwrite of that file. - -B Other than in the above case (partial failure), mogtool will not prompt -before overwriting an existing file. - -=item --verify - -If on, we do a full MD5 verification of every chunk after it is replicated. This -can take a while on large files! - -=item --description= - -Specifies a description for this file. Optional, but assists in reporting and -listing the large files in MogileFS. (This is also displayed in any receipts -that are created.) - -=item --receipt=[, ]* - -If specified, emails a copy of the receipt file to the specified comma-separated -email addresses. Also creates a local filesystem copy of the receipt file. - -=item --concurrent= - -Specifies the number of concurrent processes to run for MogileFS insertion. If -you are noticing mogtool spend most of it's time waiting for children and not -actually buffering data, you may wish to raise this number. The default is 1 -but we've found 3 or 4 work well. - -=back - -=head1 INJECT ARGUMENTS - -=over 4 - -=item resource - -What you actually want to inject. This can be a file, directory, or a raw -partition in the format I. - -Please see L for more information on how to inject these -different types of resources and the differences thereof. - -=item key - -Specifies the key to save this file to. For big files, the key is actually -"_big_N:key" and "key,#" where N is one of a bunch of things we use and # is -the chunk number. - -Generally, you want this to be descriptive so you remember what it is later -and can identify the file just by looking at the key. - -=back - -=head1 EXTRACT OPTIONS - -=over 4 - -=item --bigfile|-b - -If specified, indicates that this resource was chunked on injection and should be -reassembled for extraction. - -=item --gzip|-z - -Specifies to mogtool that it should ungzip the output if and only if it was -compressed when inserted into the MogileFS system. So, if you're extracting a -file that wasn't gzipped to begin with, this doesn't do anything. - -=item --asfile - -Useful when extracting something previously inserted as a directory--this option -instructs mogtool to treat the resource as a file and not actually run it -through tar for decompression. - -=back - -=head1 EXTRACT ARGUMENTS - -=over 4 - -=item key - -Specifies the key to get the file from. - -=item destination - -What destination means varies depending on what type of resource you're extracting. -However, no matter what, you can specify a single dash (B<->) to mean STDOUT. - -Please see the usage examples for more information on how extract works. - -=back - -=head1 DELETE OPTIONS - -=over 4 - -=item --bigfile|-b - -The resource is a "big file" and all chunks should be deleted. - -=back - -=head1 DELETE ARGUMENTS - -=over 4 - -=item key - -Specifies the key of the file to delete. - -=back - -=head1 LOCATE OPTIONS - -=over 4 - -=item --verify - -Verify that the returned paths actually contain the file. The locate -commands defaults to verify, you can disable it with --noverify. - -=item --bigfile|-b - -The resource is a "big file" and the locations of the information key should be printed. - -=back - -=head1 LOCATE ARGUMENTS - -=over 4 - -=item key - -Specifies the key of the file to locate - -=back - -=head1 RETURN VALUES - -=over 4 - -=item 0 - -Success during operation. - -=item 1 - -During the locate, list, or listkey operation, the key was not found. - -=item 2 - -Some fatal error occurred. - -=back - -=head1 USAGE EXAMPLES - -I - -=head2 Small Files (<64MB) - -When it comes to using small files, mogtool is very, very easy. - -=head3 Injection - - $ mogtool inject foo.dbm foo.dbm.2004.12 - -Injects the file I into MogileFS under the key of I. - - $ mogtool inject --gzip foo.dbm foo.dbm.2004.12 - -Injects the same file to the same key, but compresses it on the fly for you. - -=head3 Extraction - - $ mogtool extract foo.dbm.2004.12 newfoo.dbm - -Retrieves the key I and saves it as I. - - $ mogtool extract --gzip foo.dbm.2004.12 newfoo.dbm - -Gets the file and automatically decompresses it, if and only if it was compressed. -So basically, you can turn on gzip in your config file and mogtool will do the -smart thing each time. - - $ mogtool extract foo.dbm.2004.12 - - -Print the resource to standard out. If you want, you can pipe it somewhere or -redirect to a file (but why not just specify the filename?). - -=head2 Large Files (>64MB) - -Given mogtool's ability to break files into chunks and later reassemble them, -inserting large files (even files over the 4GB barrier) is relatively easy. - -=head3 Injection - - $ mogtool inject --bigfile largefile.dat largefile.dat - -As expected, inserts the file I into the MogileFS system under -the name I. Not very creative. Uses the default 64MB chunks. - - $ mogtool inject --bigfile --chunksize=16M largefile.dat largefile.dat - -Specify to use 16MB chunks instead of the default. Otherwise, the same. - - $ mogtool inject --bigfile --chunksize=1000K --gzip largefile.dat somekey - -Do it again, but specify 1000KB chunks, gzip automatically, and upload it under -a different key I. - - $ mogtool inject --bigfile --concurrent=5 --gzip largefile.dat somekey - -Same as above, but use 5 children processes for uploading chunks to MogileFS. -This can take up to 300MB of memory in this example! (It tends to use about -(concurrency + 1) * chunksize bytes.) - - $ mogtool inject --bigfile --chunksize=32M --concurrent=3 --gzip \ - --receipt="foo@bar.com" --verify --description="A large file" \ - largefile.dat somekey - -Break this file into 128MB chunks, set a description, use 3 children to -upload them, gzip the file as you go, do a full MD5 verification of every -chunk, then email a receipt with all of the MogileFS paths to me. - -Lots of flexibility with mogtool. - -=head3 Extraction - - $ mogtool extract --bigfile somekey newfile.dat - -In its basic form, extracts the previously inserted large file and saves it as -I. - - $ mogtool extract --bigfile --gzip somekey newfile.dat - -If the file was gzipped on entry, ungzip it and save the result. If it wasn't -gzipped, then we just save it. - -=head2 Directories - -Directories are easily injected and extracted with mogtool. To create the data -stream that is inserted into MogileFS, we use tar. - -=head3 Injection - - $ mogtool inject --bigfile mydir mykey - -Run I through tar and then save it as I. - - $ mogtool inject --bigfile --gzip --concurrent=5 mydir mykey - -Inject, but also gzip and use multiple injectors. - -I - -=head3 Extraction - - $ mogtool extract --bigfile mykey . - -Extract the previously injected directory I to your local directory. - - $ mogtool extract --bigfile --asfile mykey foo.tar - -Take the previously generated tarball and save it as I. Simply creates -the file instead of extracting everything inside. - -=head2 Partitions/Devices - -mogtool has the ability to inject raw partitions into MogileFS and to retrieve -them later and write them back to a partition. They're treated just like directories -for the most part, we just don't pipe things through tar. - -=head3 Injection - - $ mogtool inject --bigfile /dev/hda3 hda3.backup - -Save a raw copy of your partition I to the key I. - - $ mogtool inject --bigfile --gzip /dev/hda3 hda3.backup - -Same, but compress on the fly during injection. - -=head3 Extraction - - $ mogtool extract --bigfile hda3.backup /dev/hda4 - -Extract the partition at I to the partition I. B -mogtool won't ask for confirmation, make sure you don't mistype partition numbers! - -=head2 Deleting a Resource - -B Please make sure you're specifying the right parameter, as delete does -not prompt for confirmation of the request! - - $ mogtool delete thekey - -Delete a normal file. - - $ mogtool delete --bigfile thekey - -Delete a chunked file--this deletes all chunks and the receipt, so the file is gone. - -=head2 Listing Big Files - - $ mogtool list backup - -Lists all large files stored in MogileFS. It is not possible to list all normal files -at this time. - -=head2 Listing Files Matching a Key - - $ mogtool listkey abc1 - -Lists all files in MogileFS whose keys start with the characters "abc1". - -=head1 CONFIGURATION FILE - -Instead of adding a ton of options to the command line every time, mogtool enables -you to create a default configuration file that it will read all of the options from. -It searches two locations for a default configuration file: B<~/.mogtool> and -B. (Alternately, you can specify B<--conf=whatever> as -an option on the command line.) - -The file can consist of any number of the following items: - - trackers = 10.0.0.3:7001, 10.10.0.5/10.0.0.5:7001 - domain = mogiledomain - class = fileclass - lib = /home/foo/lib - gzip = 1 - big = 1 - overwrite = 1 - chunksize = 32M - receipt = foo@bar.com, baz@bar.com - verify = 1 - concurrent = 3 - -=head1 KNOWN BUGS - -None? Send me any you find! :) - -=head1 PLANNED FEATURES - -=over 4 - -=item --concurrent for extract - -It would be nice to have concurrent extraction going on. - -=item recover mode - -If the receipt file is ever corrupt in MogileFS it would be useful to recover a -file given just a receipt. It would have the same arguments as the extract mode, -except use a receipt file as the data source. - -=item partition size verification - -We can easily get the partition size when we save one to MogileFS, so we should -use that information to determine during extraction if a target partition is going -to be big enough. - -=item on the fly gzip extraction - -Right now we can gzip on an injection, but we should support doing decompression -on the fly coming out of MogileFS. - -=item make list take a prefix - -If you can specify a prefix, that makes things easier for finding small files that -are stored in MogileFS. - -=item more information on list - -Have list load up the info file and parse it for information about each of the -big files being stored. Maybe have this as an option (-l). (This means the -reading and parsing of info files should be abstracted into a function.) - -=back - -=head1 AUTHOR - -Mark Smith Ejunior@danga.comE - most of the implementation and maintenance. - -Brad Fitzpatrick Ebrad@danga.comE - concepts and rough draft. - -Robin H. Johnson Erobbat2@orbis-terrarum.netE - locate function. - -Copyright (c) 2002-2004 Danga Interactive. All rights reserved. - -=cut - -############################################################################## - -use strict; -use Getopt::Long; -use Pod::Usage qw{ pod2usage }; -use Digest::MD5 qw{ md5_hex }; -use Time::HiRes qw{ gettimeofday tv_interval }; -use LWP::Simple; -use POSIX qw(:sys_wait_h); -use Compress::Zlib; - -$| = 1; - -use constant ERR_MISSING => 1; -use constant ERR_FATAL => 2; - -my %opts; -$opts{help} = 0; - -abortWithUsage() unless - GetOptions( - # general purpose options - 'trackers=s' => \$opts{trackers}, - 'domain=s' => \$opts{domain}, - 'class=s' => \$opts{class}, - 'config=s' => \$opts{config}, - 'help' => \$opts{help}, - 'debug' => \$MogileFS::DEBUG, - 'lib' => \$opts{lib}, - - # extract+inject options - 'gzip|z' => \$opts{gzip}, - 'bigfile|b' => \$opts{big}, - 'nobigfile' => \$opts{nobig}, - - # inject options - 'overwrite' => \$opts{overwrite}, - 'chunksize=s' => \$opts{chunksize}, - 'receipt=s' => \$opts{receipt}, - 'reciept=s' => \$opts{receipt}, # requested :) - 'verify!' => \$opts{verify}, - 'description=s' => \$opts{des}, - 'concurrent=i' => \$opts{concurrent}, - 'noreplwait' => \$opts{noreplwait}, - - # extract options - 'asfile' => \$opts{asfile}, - ); - -# now load the config file? -my @confs = ( $opts{config}, "$ENV{HOME}/.mogtool", "/etc/mogilefs/mogtool.conf" ); -foreach my $conf (@confs) { - next unless $conf && -e $conf; - open FILE, "<$conf"; - foreach () { - s!#.*!!; - next unless m!(\w+)\s*=\s*(.+)!; - $opts{$1} = $2 unless $opts{$1}; - } - close FILE; -} - -# now bring in MogileFS, because hopefully we have a lib by now -if ($opts{lib}) { - eval "use lib '$opts{lib}';"; -} - -# no trackers and domain..? -unless ($opts{trackers} && $opts{domain}) { - abortWithUsage("--trackers and --domain configuration required"); -} - -eval qq{ - use MogileFS::Client; 1 -} or die "Failed to load MogileFS::Client module: $@\n"; - -# init connection to mogile -my $mogfs = get_mogfs(); - -# get our command and pass off to our functions -my $cmd = shift; -inject() if $cmd eq 'i' || $cmd eq "inject"; -extract() if $cmd eq 'x' || $cmd eq "extract"; -list() if $cmd eq 'ls' || $cmd eq "list"; -listkey() if $cmd eq 'lsk' || $cmd eq "listkey"; -mdelete() if $cmd eq 'rm' || $cmd eq "delete"; -locate() if $cmd eq 'lo' || $cmd eq "locate"; - -# fail if we get this far -abortWithUsage(); - -###################################################################### - -sub get_mogfs { - my @trackerinput = split(/\s*,\s*/, $opts{trackers}); - my @trackers; - my %pref_ip; - foreach my $tracker (@trackerinput) { - if ($tracker =~ m!(.+)/(.+):(\d+)!) { - $pref_ip{$2} = $1; - push @trackers, "$2:$3"; - } else { - push @trackers, $tracker; - } - } - - my $mogfs = MogileFS::Client->new( - domain => $opts{domain}, - hosts => \@trackers, - ) - or error("Could not initialize MogileFS", ERR_FATAL); - $mogfs->set_pref_ip(\%pref_ip); - return $mogfs; -} - -sub error { - my $err = shift() || "ERROR: no error message provided!"; - - my $mogerr = undef; - if ($mogerr = $mogfs->errstr) { - $mogerr =~ s/^\s+//; - $mogerr =~ s/\s+$//; - } - - my $syserr = undef; - if ($@) { - $syserr = $@; - $syserr =~ s/[\r\n]+$//; - } - - my $exitcode = shift(); - - print STDERR "$err\n"; - print STDERR "MogileFS backend error message: $mogerr\n" if $mogerr && $exitcode != ERR_MISSING; - print STDERR "System error message: $@\n" if $syserr; - - # if a second argument, exit - if (defined ($exitcode)) { - exit $exitcode+0; - } -} - -sub inject { - my $src = shift @ARGV; - my $key = shift @ARGV; - abortWithUsage("source and key required to inject") unless $src && $key; - - # make sure the source exists and the key is valid - die "Error: source $src doesn't exist.\n" - unless -e $src; - die "Error: key $key isn't valid; must not contain spaces or commas.\n" - unless $key =~ /^[^\s\,]+$/; - - # before we get too far, find sendmail? - my $sendmail; - if ($opts{receipt}) { - $sendmail = `which sendmail` || '/usr/sbin/sendmail'; - $sendmail =~ s/[\r\n]+$//; - unless (-e $sendmail) { - die "Error: attempted to find sendmail binary in /usr/sbin but couldn't.\n"; - } - } - - # open up O as the handle to use for reading data - my $type = 'unknown'; - if (-d $src) { - my $taropts = ($opts{gzip} ? 'z' : '') . "cf"; - $type = 'tarball'; - open (O, '-|', 'tar', $taropts, '-', $src) - or die "Couldn't open tar for reading: $!\n"; - } elsif (-f $src) { - $type = 'file'; - open (O, "<$src") - or die "Couldn't open file for reading: $!\n"; - } elsif (-b $src) { - $type = 'partition'; - open (O, "<$src") - or die "Couldn't open block device for reading: $!\n"; - } else { - die "Error: not file, directory, or partition.\n"; - } - - # now do some pre-file checking... - my $size = -s $src; - if ($type ne 'file') { - die "Error: you specified to store a file of type $type but didn't specify --bigfile. Please see documentation.\n" - unless $opts{big}; - } elsif ($size > 64 * 1024 * 1024) { - die "Error: the file is more than 64MB and you didn't specify --bigfile. Please see documentation, or use --nobigfile to disable large file chunking and allow large single file uploads\n" - unless $opts{big} || $opts{nobig}; - } - - if ($opts{big} && $opts{nobig}) { - die "Error: You cannot specify both --bigfile and --nobigfile\n"; - } - - if ($opts{nobigfile} && $opts{gzip}) { - die "Error: --gzip is not compatible with --nobigfile\n"; - } - - # see if there's already a pre file? - if ($opts{big}) { - my $data = $mogfs->get_file_data("_big_pre:$key"); - if (defined $data) { - unless ($opts{overwrite}) { - error(<delete("_big_pre:$key") - or error("ERROR: Unable to delete _big_pre:$key.", ERR_FATAL); - } - - # now create our pre notice - my $prefh = $mogfs->new_file("_big_pre:$key", $opts{class}) - or error("ERROR: Unable to create _big_pre:$key.", ERR_FATAL); - $prefh->print("starttime:" . time()); - $prefh->close() - or error("ERROR: Unable to save to _big_pre:$key.", ERR_FATAL); - } - - # setup config and temporary variables we're going to be using - my $chunk_size = 64 * 1024 * 1024; # 64 MB - if ($opts{big}) { - if ($opts{chunksize} && ($opts{chunksize} =~ m!^(\d+)(G|M|K|B)?!i)) { - $chunk_size = $1; - unless (lc $2 eq 'b') { - $chunk_size *= (1024 ** ( { g => 3, m => 2, k => 1 }->{lc $2} || 2 )); - } - print "NOTE: Using chunksize of $chunk_size bytes.\n"; - } - } - my $read_size = ($chunk_size > 1024*1024 ? 1024*1024 : $chunk_size); - - # temporary variables - my $buf; - my $bufsize = 0; - my $chunknum = 0; - my %chunkinfo; # { id => [ md5, length ] } - my %chunkbuf; # { id => data } - my %children; # { pid => chunknum } - my %chunksout; # { chunknum => pid } - - # this function writes out a chunk - my $emit = sub { - my $cn = shift() + 0; - return unless $cn; - - # get the length of the chunk we're going to send - my $bufsize = length $chunkbuf{$cn}; - return unless $bufsize; - - # now spawn off a child to do the real work - if (my $pid = fork()) { - print "Spawned child $pid to deal with chunk number $cn.\n"; - $chunksout{$cn} = $pid; - $children{$pid} = $cn; - return; - } - - # drop other memory references we're not using anymore - foreach my $chunknum (keys %chunkbuf) { - next if $chunknum == $cn; - delete $chunkbuf{$chunknum}; - } - - # as a child, get a new mogile connection - my $mogfs = get_mogfs(); - my $dkey = $opts{big} ? "$key,$chunknum" : "$key"; - - my $start_time = [ gettimeofday() ]; - my $try = 0; - while (1) { - $try++; - eval { - my $fh = $mogfs->new_file($dkey, $opts{class}, $bufsize); - unless (defined $fh) { - die "Unable to create new file"; - } - $fh->print($chunkbuf{$cn}); - unless ($fh->close) { - die "Close failed"; - } - }; - if (my $err = $@) { - error("WARNING: Unable to save file '$dkey': $err"); - printf "This was try #$try and it's been %.2f seconds since we first tried. Retrying...\n", tv_interval($start_time); - sleep 1; - next; - } - last; - } - my $diff = tv_interval($start_time); - printf " chunk $cn saved in %.2f seconds.\n", $diff; - - # make sure we never return, always exit - exit 0; - }; - - # just used to reap our children in a loop until they're done. also - # handles respawning a child that failed. - my $reap_children = sub { - # find out if we have any kids dead - while ((my $pid = waitpid -1, WNOHANG) > 0) { - my $cnum = delete $children{$pid}; - unless ($cnum) { - print "Error: reaped child $pid, but no idea what they were doing...\n"; - next; - } - if (my $status = $?) { - print "Error: reaped child $pid for chunk $cnum returned non-zero status... Retrying...\n"; - $emit->($cnum); - next; - } - my @paths = grep { defined $_ } $mogfs->get_paths($opts{big} ? "$key,$cnum" : "$key", 1); - unless (@paths) { - print "Error: reaped child $pid for chunk $cnum but no paths exist... Retrying...\n"; - $emit->($cnum); - next; - } - delete $chunkbuf{$cnum}; - delete $chunksout{$cnum}; - print "Child $pid successfully finished with chunk $cnum.\n"; - } - }; - - # this function handles parallel threads - $opts{concurrent} ||= 1; - $opts{concurrent} = 1 if $opts{concurrent} < 1; - my $handle_children = sub { - # here we pause while our children are working - my $first = 1; - while ($first || scalar(keys %children) >= $opts{concurrent}) { - $first = 0; - $reap_children->(); - select undef, undef, undef, 0.1; - } - - # now spawn until we hit the limit - foreach my $cnum (keys %chunkbuf) { - next if $chunksout{$cnum}; - $emit->($cnum); - last if scalar(keys %children) >= $opts{concurrent}; - } - }; - - # setup compression stuff - my $dogzip = 0; - my $zlib; - if ($opts{gzip}) { - # if they turned gzip on we may or may not need this stream, so make it - $zlib = deflateInit() - or error("Error: unable to create gzip deflation stream", ERR_FATAL); - } - - my $upload_fh; - if ($opts{nobig}) { - eval { - $upload_fh = $mogfs->new_file($key, $opts{class}, $size); - unless (defined $upload_fh) { - die "Unable to create new file"; - } - }; - if (my $err = $@) { - error("ERROR: Unable to open file '$key': $err"); - die "Giving up.\n"; - } - } - - # read one meg chunks while we have data - my $sum = 0; - my $readbuf = ''; - while (my $rv = read(O, $readbuf, $read_size)) { - # if this is a file, and this is our first read, see if it's gzipped - if (!$sum && $rv >= 2) { - if (substr($readbuf, 0, 2) eq "\x1f\x8b") { - # this is already gzipped, so just mark it as such and insert it - $opts{gzip} = 1; - } else { - # now turn on our gzipping if the user wants the output gzipped - $dogzip = 1 if $opts{gzip}; - } - } - - # now run it through the deflation stream before we process it here - if ($dogzip) { - my ($out, $status) = $zlib->deflate($readbuf); - error("Error: Deflation failure processing stream", ERR_FATAL) - unless $status == Z_OK; - $readbuf = $out; - $rv = length $readbuf; - - # we don't always get a chunk from deflate - next unless $rv; - } - - $sum += $rv; - # Short circuit if we're just plopping up a big file. - if ($opts{nobig}) { - $upload_fh->print($readbuf); - if ($size) { - printf "Upload so far: $sum bytes [%.2f%% complete]\n", - ($sum / $size * 100); - } - next; - } - - # now stick our data into our real buffer - $buf .= $readbuf; - $bufsize += $rv; - $readbuf = ''; - - # generate output - if ($type ne 'tarball' && $size && $size > $read_size) { - printf "Buffer so far: $bufsize bytes [%.2f%% complete]\r", ($sum / $size * 100); - } else { - print "Buffer so far: $bufsize bytes\r"; - } - - # if we have one chunk, handle it - if ($opts{big} && $bufsize >= $chunk_size) { - $chunkbuf{++$chunknum} = substr($buf, 0, $chunk_size); - - # calculate the md5, print out status, and save this chunk - my $md5 = md5_hex($buf); - if ($opts{big}) { - print "chunk $key,$chunknum: $md5, len = $chunk_size\n"; - } else { - print "file $key: $md5, len = $chunk_size\n"; - } - $chunkinfo{$chunknum} = [ $md5, $chunk_size ]; - - # reset for the next read loop - $buf = substr($buf, $chunk_size); - $bufsize = length $buf; - - # now spawn children to save chunks - $handle_children->(); - } - } - close O; - - # now we need to flush the gzip engine - if ($dogzip) { - my ($out, $status) = $zlib->flush; - error("Error: Deflation failure processing stream", ERR_FATAL) - unless $status == Z_OK; - $buf .= $out; - $bufsize += length $out; - $sum += length $out; - } - - # final piece - if ($buf) { - $chunkbuf{++$chunknum} = $buf; - my $md5 = md5_hex($buf); - if ($opts{big}) { - print "chunk $key,$chunknum: $md5, len = $bufsize\n"; - } else { - print "file $key: $md5, len = $bufsize\n"; - } - $chunkinfo{$chunknum} = [ $md5, $bufsize ]; - } - - # now, while we still have chunks to process... - while (%chunkbuf) { - $handle_children->(); - sleep 1; - } - - # verify replication and chunks - my %paths; # { chunknum => [ path, path, path ... ] } - my %still_need = ( %chunkinfo ); - while (%still_need) { - print "Replicating: " . join(' ', sort { $a <=> $b } keys %still_need) . "\n"; - sleep 1; # give things time to replicate some - - # now iterate over each and get the paths - foreach my $num (keys %still_need) { - my $dkey = $opts{big} ? "$key,$num" : $key; - my @npaths = grep { defined $_ } $mogfs->get_paths($dkey, 1); - - unless (@npaths) { - error("FAILURE: chunk $num has no paths at all.", ERR_FATAL); - } - - if (scalar(@npaths) >= 2 || $opts{noreplwait}) { - # okay, this one's replicated, actually verify the paths - foreach my $path (@npaths) { - if ($opts{verify}) { - print " Verifying chunk $num, path $path..."; - my $data = get($path); - my $len = length($data); - my $md5 = md5_hex($data); - if ($md5 ne $chunkinfo{$num}->[0]) { - print "md5 mismatch\n"; - next; - } elsif ($len != $chunkinfo{$num}->[1]) { - print "length mismatch ($len, $chunkinfo{$num}->[1])\n"; - next; - } - print "ok\n"; - } elsif ($opts{receipt}) { - # just do a quick size check - print " Size verifying chunk $num, path $path..."; - my $clen = (head($path))[1] || 0; - unless ($clen == $chunkinfo{$num}->[1]) { - print "length mismatch ($clen, $chunkinfo{$num}->[1])\n"; - next; - } - print "ok\n"; - } - push @{$paths{$num} ||= []}, $path; - } - - # now make sure %paths contains at least 2 verified - next if scalar(@{$paths{$num} || []}) < 2 && !$opts{noreplwait}; - delete $still_need{$num}; - } - } - } - - # prepare the info file - my $des = $opts{des} || 'no description'; - my $compressed = $opts{gzip} ? '1' : '0'; - #FIXME: add 'partblocks' to info file - - # create the info file - my $info = < $b } keys %chunkinfo) { - $info .= "part $_ bytes=$chunkinfo{$_}->[1] md5=$chunkinfo{$_}->[0] paths: "; - $info .= join(', ', @{$paths{$_} || []}); - $info .= "\n"; - } - - # now write out the info file - if ($opts{big}) { - my $fhinfo = $mogfs->new_file("_big_info:$key", $opts{class}) - or error("ERROR: Unable to create _big_info:$key.", ERR_FATAL); - $fhinfo->print($info); - $fhinfo->close() - or error("ERROR: Unable to save _big_info:$key.", ERR_FATAL); - - # verify info file - print "Waiting for info file replication...\n" unless $opts{noreplwait}; - while (!$opts{noreplwait}) { - my @paths = $mogfs->get_paths("_big_info:$key", 1); - if (@paths < 2) { - select undef, undef, undef, 0.25; - next; - } - foreach my $path (@paths) { - my $data = get($path); - error(" FATAL: content mismatch on $path", ERR_FATAL) - unless $data eq $info; - } - last; - } - - # now delete our pre file - print "Deleting pre-insert file...\n"; - $mogfs->delete("_big_pre:$key") - or error("ERROR: Unable to delete _big_pre:$key", ERR_FATAL); - } - - # Wrap up the non big file... - if ($opts{nobig}) { - eval { - unless ($upload_fh->close) { - die "Close failed"; - } - }; - if (my $err = $@) { - error("ERROR: Unable to close file '$key': $err"); - die "Giving up.\n"; - } - } - - # now email and save a receipt - if ($opts{receipt}) { - open MAIL, "| $sendmail -t" - or error("ERROR: Unable to open sendmail binary: $sendmail", ERR_FATAL); - print MAIL <mogtool.$key.receipt" - or error("ERROR: Unable to create file mogtool.$key.receipt in current directory.", ERR_FATAL); - print FILE $info; - close FILE; - print "Receipt stored in mogtool.$key.receipt.\n"; - } - - exit 0; -} - -sub _parse_info { - my $info = shift; - my $res = {}; - - # parse out the header data - $res->{des} = ($info =~ /^des\s+(.+)$/m) ? $1 : undef; - $res->{type} = ($info =~ /^type\s+(.+)$/m) ? $1 : undef; - $res->{compressed} = ($info =~ /^compressed\s+(.+)$/m) ? $1 : undef; - $res->{filename} = ($info =~ /^filename\s+(.+)$/m) ? $1 : undef; - $res->{chunks} = ($info =~ /^chunks\s+(\d+)$/m) ? $1 : undef; - $res->{size} = ($info =~ /^size\s+(\d+)$/m) ? $1 : undef; - - # now get the pieces - $res->{maxnum} = undef; - while ($info =~ /^part\s+(\d+)\s+bytes=(\d+)\s+md5=(.+)\s+paths:\s+(.+)$/mg) { - $res->{maxnum} = $1 if !defined $res->{maxnum} || $1 > $res->{maxnum}; - $res->{parts}->{$1} = { - bytes => $2, - md5 => $3, - paths => [ split(/\s*,\s*/, $4) ], - }; - } - - return $res; -} - -sub extract { - my $key = shift @ARGV; - my $dest = shift @ARGV; - abortWithUsage("key and destination required to extract") unless $key && $dest; - - error("Error: key $key isn't valid; must not contain spaces or commas.", ERR_FATAL) - unless $key =~ /^[^\s\,]+$/; - unless ($dest eq '-' || $dest eq '.') { - error("Error: destination exists: $dest (specify --overwrite if you want to kill it)", ERR_FATAL) - if -e $dest && !$opts{overwrite} && !-b $dest; - } - - # see if this is really a big file - my $file; - if ($opts{big}) { - my $info = $mogfs->get_file_data("_big_info:$key"); - die "$key doesn't seem to be a valid big file.\n" - unless $info && $$info; - - # verify validity - $file = _parse_info($$info); - - # make sure we have enough info - error("Error: info file doesn't contain the number of chunks", ERR_FATAL) - unless $file->{chunks}; - error("Error: info file doesn't contain the total size", ERR_FATAL) - unless $file->{size}; - - } else { - # not a big file, so it has to be of a certain type - $file->{type} = 'file'; - $file->{maxnum} = 1; - $file->{parts}->{1} = { - paths => [ grep { defined $_ } $mogfs->get_paths($key) ], - }; - - # now, if it doesn't exist.. - unless (scalar(@{$file->{parts}->{1}->{paths}})) { - error("Error: file doesn't exist (or did you forget --bigfile?)", ERR_FATAL); - } - } - - # several cases.. going to STDOUT? - if ($dest eq '-') { - *O = *STDOUT; - } else { - # open up O as the handle to use for reading data - if ($file->{type} eq 'file' || $file->{type} eq 'partition' || - ($file->{type} eq 'tarball' && $opts{asfile})) { - # just write it to the file with this name, but don't overwrite? - if ($dest eq '.') { - $dest = $file->{filename}; - $dest =~ s!^(.+)/!!; - } - if (-b $dest) { - # if we're targeting a block device... - warn "FIXME: add in block checking\n"; - open O, ">$dest" - or die "Couldn't open $dest: $!\n"; - } elsif (-e $dest) { - if ($opts{overwrite}) { - open O, ">$dest" - or die "Couldn't open $dest: $!\n"; - } else { - die "File already exists: $dest ... won't overwrite without --overwrite.\n"; - } - } else { - open O, ">$dest" - or die "Couldn't open $dest: $!\n"; - } - - } elsif ($file->{type} eq 'tarball') { - my $taropts = ($file->{compressed} ? 'z' : '') . "xf"; - open O, '|-', 'tar', $taropts, '-' - or die "Couldn't open tar for writing: $!\n"; - - } else { - die "Error: unable to handle type '$file->{type}'\n"; - } - } - - # start fetching pieces - foreach my $i (1..$file->{maxnum}) { - print "Fetching piece $i...\n"; - - foreach my $path (@{$file->{parts}->{$i}->{paths} || []}) { - print " Trying $path...\n"; - my $data = get($path); - next unless $data; - - # now verify MD5, etc - if ($opts{big}) { - my $len = length $data; - my $md5 = md5_hex($data); - print " ($len bytes, $md5)\n"; - next unless $len == $file->{parts}->{$i}->{bytes} && - $md5 eq $file->{parts}->{$i}->{md5}; - } - - # this chunk verified, write it out - print O $data; - last; - } - } - - # at this point the file should be complete! - close O; - print "Done.\n"; - - # now make sure we have enough data -#$ mogtool [opts] extract {,,} - #=> - (for STDOUT) (if compressed, add "z" flag) - #=> . (to untar) (if compressed, do nothing???, make .tar.gz file -- unless they use -z again?) - #=> /dev/sda4 (but check /proc/partitions that it's big enough) (if compress, Compress::Zlib to ungzip -# => foo.jpg (write it to a file) - - - # now check - exit 0; -} - -sub locate { - my $key = shift(@ARGV); - abortWithUsage("key required to locate") unless $key; - $opts{verify} = 1 unless defined $opts{verify}; - $opts{bigfile} = 0 unless $opts{big}; - - my $dkey = $key; - $dkey = "_big_info:$key" if $opts{big}; - - # list all paths for the file - my $ct = 0; - my @paths = []; - my @paths = grep { defined $_ } - $mogfs->get_paths($dkey, - {verify => $opts{verify}, pathcount => 1024 }, - ); - if(@paths == 0 && $mogfs->errstr =~ /unknown_key/) { - error("Error: bigfile $key doesn't exist (or did you force --bigfile?)", ERR_MISSING) if $opts{big}; - error("Error: file $key doesn't exist (or did you forget --bigfile?)", ERR_MISSING); - } - error("Error: Something went wrong", ERR_FATAL) if($mogfs->errstr); - foreach my $key (@paths) { - $ct++; - print "$key\n"; - } - print "#$ct paths found\n"; - exit 0 if($ct > 0); - exit ERR_MISSING; -} - -sub list { - # list all big files in mogile - my ($ct, $after, $list); - my $ct = 0; - while (($after, $list) = $mogfs->list_keys("_big_info:", $after)) { - last unless $list && @$list; - - # now extract the key and dump it - foreach my $key (@$list) { - next unless $key =~ /^_big_info:(.+)$/; - - $key = $1; - $ct++; - - print "$key\n"; - } - } - print "#$ct files found\n"; - exit 0 if($ct > 0); - exit ERR_MISSING; -} - -sub listkey { - my $key_pattern = shift @ARGV; - $key_pattern = '' unless defined $key_pattern; - - # list all files matching a key - my ($ct, $after, $list); - $ct = 0; - while (($after, $list) = $mogfs->list_keys("$key_pattern", $after)) { - last unless $list && @$list; - - # now extract the key and dump it - foreach my $key (@$list) { - - $ct++; - - print "$key\n"; - } - } - error("Error: Something went wrong", ERR_FATAL) if ($mogfs->errstr && ! ($mogfs->errstr =~ /none_match/)); - print "#$ct files found\n"; - exit 0 if($ct > 0); - exit ERR_MISSING; -} - -sub mdelete { - my $key = shift(@ARGV); - abortWithUsage("key required to delete") unless $key; - - # delete simple file - unless ($opts{big}) { - my $rv = $mogfs->delete($key); - error("Failed to delete: $key.", ERR_FATAL) - unless $rv; - print "Deleted.\n"; - exit 0; - } - - # delete big file - my $info = $mogfs->get_file_data("_big_info:$key"); - error("$key doesn't seem to be a valid big file.", ERR_FATAL) - unless $info && $$info; - - # verify validity - my $file = _parse_info($$info); - - # make sure we have enough info to delete - error("Error: info file doesn't contain required information?", ERR_FATAL) - unless $file->{chunks} && $file->{maxnum}; - - # now delete each chunk, best attempt - foreach my $i (1..$file->{maxnum}) { - $mogfs->delete("$key,$i"); - } - - # delete the main pieces - my $rv = $mogfs->delete("_big_info:$key"); - error("Unable to delete _big_info:$key.", ERR_FATAL) - unless $rv; - print "Deleted.\n"; - exit 0; -} - -abortWithUsage() if $opts{help}; - -sub abortWithUsage { - my $msg = "!!!mogtool is DEPRECATED and will be removed in the future!!!\n"; - $msg .= join '', @_; - - if ( $msg ) { - pod2usage( -verbose => 1, -exitval => 1, -message => "\n$msg\n" ); - } else { - pod2usage( -verbose => 1, -exitval => 1 ); - } -} - - -__END__ - -Usage: mogtool [opts] [command-opts] [command-args] - -General options: - * --trackers=[,]* - - * --domain= - - * --class= - - * --conf= Location of config file listing trackers, default - domain, and default class - - Default: ~/.mogilefs, /etc/mogilefs/mogilefs.conf - - * --bigfile | -b Tell mogtool to split file into 64MB chunks and - checksum the chunks, - - * --gzip | -z Use gzip compression/decompression - - -Commands: - - inject | i Inject a file into MogileFS, by key - extract | x Extract a file from MogileFS, by key - list | ls List large files in MogileFS - -'inject' syntax: - -$ mogtool [opts] inject [i-opts] - -Valid i-opts: - --overwrite Ignore existing _big_pre: and start anew. - --chunksize=n Set the size of individual chunk files. n is in the format of - number[scale] so 10 is 10 megabytes, 10M is also 10 megs, 10G, 10B, 10K... - case insensitive - --receipt=email Send a receipt to the specified email address - --verify Make sure things replicate and then check the MD5s? - --des=string Set the file description - - -$ mogtool [opts] extract {,,} - => - (for STDOUT) (if compressed, add "z" flag) - => . (to untar) (if compressed, do nothing???, make .tar.gz file -- unless they use -z again?) - => /dev/sda4 (but check /proc/partitions that it's big enough) (if compress, Compress::Zlib to ungzip) - => foo.jpg (write it to a file) - - ---key - -# mogtool add --key='roast.sdb1.2004-11-07' -z /dev/sda1 - - - - = "cow.2004.11.17" - -# this is a temporary file that we delete when we're doing recording all chunks - -_big_pre: - - starttime=UNIXTIMESTAMP - -# when done, we write the _info file and delete the _pre. - -_big_info: - - des Cow's ljdb backup as of 2004-11-17 - type { partition, file, tarball } - compressed {0, 1} - filename ljbinlog.305.gz - partblocks 234324324324 - - - part 1 - part 2 - part 3 - part 4 - part 5 - -_big:, -_big:, -_big:, - - -Receipt format: - -BEGIN MOGTOOL RECIEEPT -type partition -des Foo -compressed foo - -part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid -part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid -part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid -part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid - - -END RECIEPT - - -### -perl -w bin/mogtool --gzip inject --overwrite --chunksize=24M --des="This is a description" --receipt="marksmith@danga.com" ../music/jesse/Unsorted jesse.music.unsorted diff --git a/mogupload b/mogupload deleted file mode 100755 index 85015b4..0000000 --- a/mogupload +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/perl - -=head1 NAME - -mogupload -- Upload data to a MogileFS installation - -=head1 SYNOPSIS - - $ mogupload [options] - $ mogupload [options] --file="-" < filename - - $ mogupload --trackers=host --domain=foo --class=bar \ - --key="/hello.jpg" --file="input.jpg" - - $ echo "why hello" | mogupload [opts] --key="world" --file="-" - -=head1 OPTIONS - -=over - -=item --trackers=host1:7001,host2:7001 - -Use these MogileFS trackers to negotiate with. - -=item --domain= - -Set the MogileFS domain to use. - -=item --class= - -Set the class to use. Will use default class if not specified - -=item --key="" - -A key to store the file under. Can be an arbitrary string. - -=item --file="" - -A local file to upload. If '-', read file from STDIN instead. - -=back - -=head1 AUTHOR - -Dormando ELE - -=head1 BUGS - -mogupload must buffer the upload in memory before transferring it. This makes it difficult to upload very large files. Future versions will lift this limitation. - -=head1 LICENSE - -Licensed for use and redistribution under the same terms as Perl itself. - -=cut - -use strict; -use warnings; - -use lib './lib'; -use MogileFS::Utils; - -my $util = MogileFS::Utils->new; -my $usage = "--trackers=host --domain=foo --key='/hello.jpg' --file='./hello.jpg'"; -my $c = $util->getopts($usage, qw/class=s key=s file=s/); - -my $mogc = $util->client; - -my $filename = $c->{file}; - -my $fh; -my $size = 0; -if ($filename eq '-') { - $fh = *STDIN; -} else { - $size = -s $filename; - die "Could not stat " . $filename unless defined $size; - open($fh, "< $filename") or die "Could not open " . $filename; -} - -my $mf = $mogc->new_file($c->{key}, $c->{class}, undef); -if ($mogc->errcode) { - die "Error opening MogileFS file: " . $mogc->errstr; -} - -my $buf; -while (my $read = read($fh, $buf, 1024 * 1024)) { - die "error reading file" unless defined $read; - $mf->print($buf); -} - -unless ($mf->close) { - die "Error writing file: " . $mogc->errcode . ": " . $mogc->errstr; -}