#!/usr/bin/perl -w
#
#   watchdog - Check disk space and load, send mails if specified numbers
#              are exceeded.
#
#   Author:     Jochen Wiedmann
#               Am Eisteich 9
#               72555 Metzingen
#               Germany
#
#               E-Mail: [email protected]
#
############################################################################
use strict;

#
#   Configurable section
#
my $MAX_LOAD = 3;       # Send mail, if this load is exceeded
my $MAX_DISC = 80;      # Send mail, if a partition's usage in percent
                       # exceeds this number
my $MAX_DISC_INC = 10;  # Send mail, if a partitions capacity raises
                       # this number of percents between two checks
my $ADMIN = 'root';     # Send mail to this address
my $HOST = 'monitor.wuestenrot.de';     # Local host name


############################################################################
#
#   Features:
#
#       - Mail will be sent only once for a specified event until the
#         event happens to be fixed.
#
#   These features depend on the existence of the following cache file:
#
############################################################################

my $CACHE_FILE = $^O eq 'linux' ? "/var/log/watchdog.log" :
   $^O eq 'sco3.2v5.0' ? '/var/adm/watchdog.log' : die "Unknown OS: $^O";

# Command to use for sending mail
# Must read the mail body from stdin.
my $MAIL = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ?
           '/usr/bin/mail -s $subject $to' :
           die "Unknown OS: $^O");

# Command to determine disk usage; must print to stdout
my $DF = ($^O eq 'linux' ? '/bin/df -vk' :
         $^O eq 'sco3.2v5.0' ? '/bin/df -Bk' :
         die "Unknown OS: $^O");

# Command to determine inode usage; must print to stdout
my $DFI = ($^O eq 'linux' ? '/bin/df -i' :
          $^O eq 'sco3.2v5.0' ? '/bin/df -I' :
          die "Unknown OS: $^O");

# Command to determine the load; must print to stdout
my $UPTIME = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ? '/usr/bin/uptime' :
             die "Unknown OS: $^O");

my $USE_LOCKS = ($^O eq 'linux' ? 1 :
                $^O eq 'sco3.2v5.0' ? 0 :
                die "Unknown OS: $^O");


# If you want to specify a different capacity limit for some drive, you
# can do it here. By default $MAX_DISC will be used.

my %CAPACITIES = (
   # Example:
   # '/dev/hda1' => 75,
   # '/dev/hda2' => 70
);


use vars qw($debug $verbose);


############################################################################
#
#   We use *no* external modules here.
#   This command will be executed from within cron, thus it should be
#   small and not use much resources.
#
############################################################################

# This code stolen from the "Symbol" module. See "perldoc Symbol".
package Symbol;

{
 my $genpkg = "Symbol::";
 my $genseq = 0;

 sub gensym () {
   my $name = "GEN" . $genseq++;
   no strict 'refs';
   my $ref = \*{$genpkg . $name};
   delete $$genpkg{$name};
   $ref;
 }
}


# This code similar to Data::Dumper. We save a complex hash ref
#   { var1 => 'val1',
#     var2 => 'val2',
#     var3 => { var4 => 'val4',
#               var5 => 'val5' }
#   }
# into the following format:
#     var1=val1
#     var2=val2
#     var3__var4=val4
#     var3__var5=val5
#
package Dump;

sub _new {
   my $proto = shift; my $ref = shift; my $prefix = shift;
   my $dump = '';
   while (my($var, $val) = each %$ref) {
       if (ref($val)) {
           $dump .= $proto->_new($val, $prefix ? "$prefix\__$var" : $var);
       } else {
           $dump .= "$prefix\__$var=$val\n";
       }
   }
   $dump;
}

sub new {
   my $proto = shift; my $ref = shift;
   my $dump = $proto->_new($ref, '');
   my $self = \$dump;
   bless($self, (ref($proto) || $proto));
}

sub Dump { my $str = shift; $$str }

sub Read {
   my $proto = shift; my $file = shift;
   my $ref = {};
   my $num = 0;
   my $fh = Symbol::gensym();
   if (!open($fh, "<$file")) {
       print STDERR "Failed to open file $file: $!\n";
       return $ref;
   }
   while(defined(my $line = <$fh>)) {
       ++$num;
       # Ignore comments and empty lines
       next if $line =~ /^\s*$/ || $line =~ /^\s*\#/;
       if (my($var, $val) = ($line =~ /^(\w+)=(.*)/)) {
           my $r = $ref;
           my @vars = split(/__/, $var);
           print "Dump::Read: Setting ", join("->", @vars), " to $val.\n"
               if $main::verbose;
           while (defined(my $v = shift @vars)) {
               if (@vars) {
                   $r->{$v} ||= {};
                   $r = $r->{$v};
               } else {
                   $r->{$v} = $val;
               }
           }
       } else {
           print STDERR "Invalid line $num in file $file.\n";
       }
   }
   $ref;
}


package main;

sub Mail ($$;$) {
   my($subject, $body, $to) = @_;
   # $to defaults to $ADMIN
   $to ||= $ADMIN;
   my $command = $MAIL;
   $command =~ s/\$subject/quotemeta($subject)/eg;
   $command =~ s/\$to/quotemeta($to)/eg;
   if ($verbose) {
       print "Sending mail via command: $command\n$body\n";
   }
   return if $debug;
   open(PIPE, "| $command") and (print PIPE $body) and close(PIPE);
}

sub DiskUsage ($$$$$) {
   my($DF, $MAX_DISC, $MAX_DISC_INC, $cache, $key) = @_;

   my $ph = Symbol::gensym();
   if (!open($ph, "$DF |")) {
       print STDERR "Failed to open pipe to command $DF: $!\n";
       return 1;
   }
   while (defined(my $line = <$ph>)) {
       if (my($device, $total, $used, $avail, $capacity, $mount) =
           ($line =~ /^(\S+)                   # Device
                        \s+(\d+)               # Blocks total
                        \s+(\d+)               # Blocks used
                        \s+(\d+)               # Blocks available
                        \s+(\d+(?:\.\d+)?)\%   # Capacity (in percent)
                        \s+(\S.*)/x)) {        # Mount point
           my $name = $key . $device;
           $name =~ s/\//_/g;

           my $old_capacity = $cache->{'diskusage'}->{$name}->{'capacity'};
           $cache->{'diskusage'}->{$name}->{'capacity'} = $capacity;

           print "Capacity of $mount ($device) is $capacity\%.\n" if $verbose;
           my $cap = exists($CAPACITIES{$device}) ?
               $CAPACITIES{$device} : $MAX_DISC;

           if (!($capacity <= $cap)) {
               print "=> Exceeds allowed capacity of $cap\%.\n" if $verbose;
               if ($cache->{'diskusage'}->{$name}->{'mail_sent'}) {
                   print "Mail already sent, ignoring.\n" if $verbose;
               } else {
                   Mail("Disk capacity exceeded on $HOST", <<"EOF");

Drive $mount ($device) exceeds its allowed disk capacity of $cap \%.
The current parameters are:

       Mount point:    $mount
       Device:         $device
       Blocks total:   $total
              used:    $used
              avail:   $avail
       Capacity:       $capacity %
       Allowed:        $cap %

This message is generated by the script $0.
EOF
                   $cache->{'diskusage'}->{$name}->{'mail_sent'} = 1;
               }
           } else {
               $cache->{'diskusage'}->{$name}->{'mail_sent'} = 0;
               print "=> Doesn't exceed allowed capacity of $cap\%.\n"
                   if $verbose;
           }


           printf("Checking increment for $device: $capacity <=> %s.\n",
                  defined($old_capacity) ? $old_capacity : "undef")
               if $verbose;
           if (!(!defined($old_capacity)  ||
                 ($MAX_DISC_INC  &&
                  ($capacity < $old_capacity + $MAX_DISC_INC)))) {
               print "=> Exceeds allowed increment $MAX_DISC_INC\%.\n"
                   if $verbose;
               if ($cache->{'diskusage'}->{$name}->{'inc_mail_sent'}) {
                   print "Mail already sent, ignoring.\n" if $verbose;
               } else {
                   Mail("Disk capacity raised on $HOST", <<"EOF");

Drive $mount ($device) has raised its capacity from $old_capacity
to $capacity since the last check. The current parameters are:

       Mount point:    $mount
       Device:         $device
       Blocks total:   $total
              used:    $used
              avail:   $avail
       Capacity:       $capacity %
       Allowed:        $cap %
       Old capacity:   $old_capacity %

This message is generated by the script $0.
EOF
                   $cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 1;
               }
           } else {
               print "=> Doesn't exceed allowed increment of $MAX_DISC_INC.\n"
                   if $verbose;
               $cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 0;
           }
       }
   }
   return 0;
}

sub Uptime ($$) {
   my $MAX_LOAD = shift; my $cache = shift;

   my $ph = Symbol::gensym();
   if (!open($ph, "$UPTIME |")) {
       print STDERR "Failed to open pipe to load command $UPTIME: $!\n";
       return 1;
   }
   my $line = <$ph>;
   if (!$line) {
       print STDERR "Uptime command $UPTIME returned empty output.\n";
       return 1;
   }
   undef $ph;
   if (my($one, $five, $fifteen) =
       ($line =~ /load\s+average\:\s+
                       (\d+(?:\.\d+)?)         # 1 Minute average
                       ,\s+(\d+(?:\.\d+)?)     # 5 Minutes average
                       ,\s+(\d+(?:\.\d+)?)     # 15 Minutes average
                 $/x)) {
       printf("Detected average loads %s, %s, %s (1, 5 and 15 minutes)\n",
              $one, $five, $fifteen) if $verbose;

       if (!($one <= $MAX_LOAD  &&  $five <= $MAX_LOAD  &&
              $fifteen <= $MAX_LOAD)) {
           print "=> Exceeds allowed maximum of $MAX_LOAD.\n" if $verbose;
           if ($cache->{'uptime'}->{'mail_sent'}) {
               print "Mail already sent, ignoring.\n" if $verbose;
           } else {
               Mail("Maximum load exceeded on $HOST", <<"EOF");

The maximum load of $MAX_LOAD is exceeded on host $HOST. The average loads
are:

   Last minute:        $one
   Last 5 minutes:     $five
   Last 15 minutes:    $fifteen

This message is generated by the script $0.
EOF
               $cache->{'uptime'}->{'mail_sent'} = 1;
           }
       } else {
           print "=> Doesn't exceed allowed maximum of $MAX_LOAD.\n"
               if $verbose;
           $cache->{'uptime'}->{'mail_sent'} = 0;
       }
   } else {
       print STDERR "Failed to parse output of Uptime command $UPTIME: $!\n";
       return 1;
   }
   return 0;
}


sub Usage {
   print STDERR <<"EOF";
Usage: $0 [options]

Possible options are:

 --cache-file=<file>  Store results in the given file; will be used for
                      watching increments and to prevent sending more than
                      one mail for a certain event. Defaults to
                      $CACHE_FILE.
 --max-load=<num>     Set maximum load; defaults to $MAX_LOAD
 --max-disc=<num>     Set maximum disk capacity in percent; defaults to
                      $MAX_DISC.
 --max-disc-inc=<num> Set the maximum increment of the disk capacity
                      between two checks. Defaults to $MAX_DISC_INC.
 --debug              Enable debugging mode (implies --verbose)
 --verbose            Enable verbose mode
 --help               Print this message
EOF
   exit 1;
}


sub LoadCacheFile ($$) {
   my $file = shift; my $ref = shift;
   return unless ($file and $file ne "none" and -f $file);
   print "Loading cache file $file.\n" if $verbose;
   if ($USE_LOCKS) {
       my $lfh = Symbol::gensym();
       if (!open($lfh, ">>$file.lock")  ||  !flock($lfh, 1)) {
           print STDERR "Failed to open lock file $file.lock: $!";
           return 1;
       }
   }
   %$ref = %{Dump->Read($file)};
   return 0;
}

sub SaveCacheFile ($$) {
   my $file = shift; my $ref = shift;
   if ($USE_LOCKS) {
       my $lfh = Symbol::gensym();
       if (!open($lfh, ">>$file.lock")  ||  !flock($lfh, 2)) {
           print STDERR "Failed to open lock file $file.lock: $!";
           return 1;
       }
   }
   my $cfh = Symbol::gensym();
   my $dump = Dump->new($ref)->Dump();
   my $time = localtime();
   $dump = <<"EOF";
#
# Automatically generated by $0 at $time.
# Do not edit.
#
$dump
EOF
   print "Saving cache:\n$dump\n" if $verbose;
   return 0 if $debug;
   if (!open($cfh, ">$file.new")  or
       !(print $cfh $dump)  or
       !close($cfh)) {
       print STDERR "Failed to open cache file $file.new: $!";
       return 1;
   }
   if (-f "$file.bak"  &&  !unlink "$file.bak") {
       print STDERR "Failed to remove backup file $file.bak: $!";
       return 1;
   }
   if (-f $file  &&  !rename $file, "$file.bak") {
       print STDERR "Failed to rename $file to $file.bak: $!";
       return 1;
   }
   if (!rename "$file.new", $file) {
       print STDERR "Failed to rename $file.new to $file: $!";
       return 1;
   }
   return 0;
}


############################################################################
#
#   This is main().
#
############################################################################

{
   my $max_load = $MAX_LOAD;
   my $max_disc = $MAX_DISC;
   my $max_disc_inc = $MAX_DISC_INC;
   my $cache_file = $CACHE_FILE;

   while (defined(my $arg = shift @ARGV)) {
       if ($arg =~ /^\-\-cache-file(?:=(.*))?$/) {
           $cache_file = defined $1 ? $1 : shift(@ARGV);
           Usage() unless defined $cache_file;
       } elsif ($arg =~ /^\-\-max\-load(?:=(.*))?$/) {
           $max_load = defined $1 ? $1 : shift(@ARGV);
           Usage() unless defined $max_load;
       } elsif ($arg =~ /^\-\-max\-disc-inc(?:=(.*))?$/) {
           $max_disc_inc = defined $1 ? $1 : shift(@ARGV);
           Usage() unless defined $max_disc_inc;
       } elsif ($arg =~ /^\-\-max\-disc(?:=(.*))?$/) {
           $max_disc = defined $1 ? $1 : shift(@ARGV);
           Usage() unless defined $max_disc;
       } elsif ($arg eq "--debug") {
           $debug = $verbose = 1;
       } elsif ($arg eq "--verbose") {
           $verbose = 1;
       } else {
           Usage();
       }
   }

   my %cache;
   my $status = 0;
   $status ||= LoadCacheFile($cache_file, \%cache);
   $status ||= DiskUsage($DF, $max_disc, $max_disc_inc, \%cache, "f");
   $status ||= DiskUsage($DFI, $max_disc, $max_disc_inc, \%cache, "i");
   $status ||= Uptime($max_load, \%cache);
   $status ||= SaveCacheFile($cache_file, \%cache);
   exit $status;
}