#!/usr/bin/perl -w
#
# watchdog - Check disk space and load, send mails if specified numbers
# are exceeded.
#
# Author: Jochen Wiedmann
# Am Eisteich 9
# 72555 Metzingen
# Germany
#
# E-Mail:
[email protected]
#
############################################################################
use strict;
#
# Configurable section
#
my $MAX_LOAD = 3; # Send mail, if this load is exceeded
my $MAX_DISC = 80; # Send mail, if a partition's usage in percent
# exceeds this number
my $MAX_DISC_INC = 10; # Send mail, if a partitions capacity raises
# this number of percents between two checks
my $ADMIN = 'root'; # Send mail to this address
my $HOST = 'monitor.wuestenrot.de'; # Local host name
############################################################################
#
# Features:
#
# - Mail will be sent only once for a specified event until the
# event happens to be fixed.
#
# These features depend on the existence of the following cache file:
#
############################################################################
my $CACHE_FILE = $^O eq 'linux' ? "/var/log/watchdog.log" :
$^O eq 'sco3.2v5.0' ? '/var/adm/watchdog.log' : die "Unknown OS: $^O";
# Command to use for sending mail
# Must read the mail body from stdin.
my $MAIL = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ?
'/usr/bin/mail -s $subject $to' :
die "Unknown OS: $^O");
# Command to determine disk usage; must print to stdout
my $DF = ($^O eq 'linux' ? '/bin/df -vk' :
$^O eq 'sco3.2v5.0' ? '/bin/df -Bk' :
die "Unknown OS: $^O");
# Command to determine inode usage; must print to stdout
my $DFI = ($^O eq 'linux' ? '/bin/df -i' :
$^O eq 'sco3.2v5.0' ? '/bin/df -I' :
die "Unknown OS: $^O");
# Command to determine the load; must print to stdout
my $UPTIME = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ? '/usr/bin/uptime' :
die "Unknown OS: $^O");
my $USE_LOCKS = ($^O eq 'linux' ? 1 :
$^O eq 'sco3.2v5.0' ? 0 :
die "Unknown OS: $^O");
# If you want to specify a different capacity limit for some drive, you
# can do it here. By default $MAX_DISC will be used.
my %CAPACITIES = (
# Example:
# '/dev/hda1' => 75,
# '/dev/hda2' => 70
);
use vars qw($debug $verbose);
############################################################################
#
# We use *no* external modules here.
# This command will be executed from within cron, thus it should be
# small and not use much resources.
#
############################################################################
# This code stolen from the "Symbol" module. See "perldoc Symbol".
package Symbol;
{
my $genpkg = "Symbol::";
my $genseq = 0;
sub gensym () {
my $name = "GEN" . $genseq++;
no strict 'refs';
my $ref = \*{$genpkg . $name};
delete $$genpkg{$name};
$ref;
}
}
# This code similar to Data::Dumper. We save a complex hash ref
# { var1 => 'val1',
# var2 => 'val2',
# var3 => { var4 => 'val4',
# var5 => 'val5' }
# }
# into the following format:
# var1=val1
# var2=val2
# var3__var4=val4
# var3__var5=val5
#
package Dump;
sub _new {
my $proto = shift; my $ref = shift; my $prefix = shift;
my $dump = '';
while (my($var, $val) = each %$ref) {
if (ref($val)) {
$dump .= $proto->_new($val, $prefix ? "$prefix\__$var" : $var);
} else {
$dump .= "$prefix\__$var=$val\n";
}
}
$dump;
}
sub new {
my $proto = shift; my $ref = shift;
my $dump = $proto->_new($ref, '');
my $self = \$dump;
bless($self, (ref($proto) || $proto));
}
sub Dump { my $str = shift; $$str }
sub Read {
my $proto = shift; my $file = shift;
my $ref = {};
my $num = 0;
my $fh = Symbol::gensym();
if (!open($fh, "<$file")) {
print STDERR "Failed to open file $file: $!\n";
return $ref;
}
while(defined(my $line = <$fh>)) {
++$num;
# Ignore comments and empty lines
next if $line =~ /^\s*$/ || $line =~ /^\s*\#/;
if (my($var, $val) = ($line =~ /^(\w+)=(.*)/)) {
my $r = $ref;
my @vars = split(/__/, $var);
print "Dump::Read: Setting ", join("->", @vars), " to $val.\n"
if $main::verbose;
while (defined(my $v = shift @vars)) {
if (@vars) {
$r->{$v} ||= {};
$r = $r->{$v};
} else {
$r->{$v} = $val;
}
}
} else {
print STDERR "Invalid line $num in file $file.\n";
}
}
$ref;
}
package main;
sub Mail ($$;$) {
my($subject, $body, $to) = @_;
# $to defaults to $ADMIN
$to ||= $ADMIN;
my $command = $MAIL;
$command =~ s/\$subject/quotemeta($subject)/eg;
$command =~ s/\$to/quotemeta($to)/eg;
if ($verbose) {
print "Sending mail via command: $command\n$body\n";
}
return if $debug;
open(PIPE, "| $command") and (print PIPE $body) and close(PIPE);
}
sub DiskUsage ($$$$$) {
my($DF, $MAX_DISC, $MAX_DISC_INC, $cache, $key) = @_;
my $ph = Symbol::gensym();
if (!open($ph, "$DF |")) {
print STDERR "Failed to open pipe to command $DF: $!\n";
return 1;
}
while (defined(my $line = <$ph>)) {
if (my($device, $total, $used, $avail, $capacity, $mount) =
($line =~ /^(\S+) # Device
\s+(\d+) # Blocks total
\s+(\d+) # Blocks used
\s+(\d+) # Blocks available
\s+(\d+(?:\.\d+)?)\% # Capacity (in percent)
\s+(\S.*)/x)) { # Mount point
my $name = $key . $device;
$name =~ s/\//_/g;
my $old_capacity = $cache->{'diskusage'}->{$name}->{'capacity'};
$cache->{'diskusage'}->{$name}->{'capacity'} = $capacity;
print "Capacity of $mount ($device) is $capacity\%.\n" if $verbose;
my $cap = exists($CAPACITIES{$device}) ?
$CAPACITIES{$device} : $MAX_DISC;
if (!($capacity <= $cap)) {
print "=> Exceeds allowed capacity of $cap\%.\n" if $verbose;
if ($cache->{'diskusage'}->{$name}->{'mail_sent'}) {
print "Mail already sent, ignoring.\n" if $verbose;
} else {
Mail("Disk capacity exceeded on $HOST", <<"EOF");
Drive $mount ($device) exceeds its allowed disk capacity of $cap \%.
The current parameters are:
Mount point: $mount
Device: $device
Blocks total: $total
used: $used
avail: $avail
Capacity: $capacity %
Allowed: $cap %
This message is generated by the script $0.
EOF
$cache->{'diskusage'}->{$name}->{'mail_sent'} = 1;
}
} else {
$cache->{'diskusage'}->{$name}->{'mail_sent'} = 0;
print "=> Doesn't exceed allowed capacity of $cap\%.\n"
if $verbose;
}
printf("Checking increment for $device: $capacity <=> %s.\n",
defined($old_capacity) ? $old_capacity : "undef")
if $verbose;
if (!(!defined($old_capacity) ||
($MAX_DISC_INC &&
($capacity < $old_capacity + $MAX_DISC_INC)))) {
print "=> Exceeds allowed increment $MAX_DISC_INC\%.\n"
if $verbose;
if ($cache->{'diskusage'}->{$name}->{'inc_mail_sent'}) {
print "Mail already sent, ignoring.\n" if $verbose;
} else {
Mail("Disk capacity raised on $HOST", <<"EOF");
Drive $mount ($device) has raised its capacity from $old_capacity
to $capacity since the last check. The current parameters are:
Mount point: $mount
Device: $device
Blocks total: $total
used: $used
avail: $avail
Capacity: $capacity %
Allowed: $cap %
Old capacity: $old_capacity %
This message is generated by the script $0.
EOF
$cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 1;
}
} else {
print "=> Doesn't exceed allowed increment of $MAX_DISC_INC.\n"
if $verbose;
$cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 0;
}
}
}
return 0;
}
sub Uptime ($$) {
my $MAX_LOAD = shift; my $cache = shift;
my $ph = Symbol::gensym();
if (!open($ph, "$UPTIME |")) {
print STDERR "Failed to open pipe to load command $UPTIME: $!\n";
return 1;
}
my $line = <$ph>;
if (!$line) {
print STDERR "Uptime command $UPTIME returned empty output.\n";
return 1;
}
undef $ph;
if (my($one, $five, $fifteen) =
($line =~ /load\s+average\:\s+
(\d+(?:\.\d+)?) # 1 Minute average
,\s+(\d+(?:\.\d+)?) # 5 Minutes average
,\s+(\d+(?:\.\d+)?) # 15 Minutes average
$/x)) {
printf("Detected average loads %s, %s, %s (1, 5 and 15 minutes)\n",
$one, $five, $fifteen) if $verbose;
if (!($one <= $MAX_LOAD && $five <= $MAX_LOAD &&
$fifteen <= $MAX_LOAD)) {
print "=> Exceeds allowed maximum of $MAX_LOAD.\n" if $verbose;
if ($cache->{'uptime'}->{'mail_sent'}) {
print "Mail already sent, ignoring.\n" if $verbose;
} else {
Mail("Maximum load exceeded on $HOST", <<"EOF");
The maximum load of $MAX_LOAD is exceeded on host $HOST. The average loads
are:
Last minute: $one
Last 5 minutes: $five
Last 15 minutes: $fifteen
This message is generated by the script $0.
EOF
$cache->{'uptime'}->{'mail_sent'} = 1;
}
} else {
print "=> Doesn't exceed allowed maximum of $MAX_LOAD.\n"
if $verbose;
$cache->{'uptime'}->{'mail_sent'} = 0;
}
} else {
print STDERR "Failed to parse output of Uptime command $UPTIME: $!\n";
return 1;
}
return 0;
}
sub Usage {
print STDERR <<"EOF";
Usage: $0 [options]
Possible options are:
--cache-file=<file> Store results in the given file; will be used for
watching increments and to prevent sending more than
one mail for a certain event. Defaults to
$CACHE_FILE.
--max-load=<num> Set maximum load; defaults to $MAX_LOAD
--max-disc=<num> Set maximum disk capacity in percent; defaults to
$MAX_DISC.
--max-disc-inc=<num> Set the maximum increment of the disk capacity
between two checks. Defaults to $MAX_DISC_INC.
--debug Enable debugging mode (implies --verbose)
--verbose Enable verbose mode
--help Print this message
EOF
exit 1;
}
sub LoadCacheFile ($$) {
my $file = shift; my $ref = shift;
return unless ($file and $file ne "none" and -f $file);
print "Loading cache file $file.\n" if $verbose;
if ($USE_LOCKS) {
my $lfh = Symbol::gensym();
if (!open($lfh, ">>$file.lock") || !flock($lfh, 1)) {
print STDERR "Failed to open lock file $file.lock: $!";
return 1;
}
}
%$ref = %{Dump->Read($file)};
return 0;
}
sub SaveCacheFile ($$) {
my $file = shift; my $ref = shift;
if ($USE_LOCKS) {
my $lfh = Symbol::gensym();
if (!open($lfh, ">>$file.lock") || !flock($lfh, 2)) {
print STDERR "Failed to open lock file $file.lock: $!";
return 1;
}
}
my $cfh = Symbol::gensym();
my $dump = Dump->new($ref)->Dump();
my $time = localtime();
$dump = <<"EOF";
#
# Automatically generated by $0 at $time.
# Do not edit.
#
$dump
EOF
print "Saving cache:\n$dump\n" if $verbose;
return 0 if $debug;
if (!open($cfh, ">$file.new") or
!(print $cfh $dump) or
!close($cfh)) {
print STDERR "Failed to open cache file $file.new: $!";
return 1;
}
if (-f "$file.bak" && !unlink "$file.bak") {
print STDERR "Failed to remove backup file $file.bak: $!";
return 1;
}
if (-f $file && !rename $file, "$file.bak") {
print STDERR "Failed to rename $file to $file.bak: $!";
return 1;
}
if (!rename "$file.new", $file) {
print STDERR "Failed to rename $file.new to $file: $!";
return 1;
}
return 0;
}
############################################################################
#
# This is main().
#
############################################################################
{
my $max_load = $MAX_LOAD;
my $max_disc = $MAX_DISC;
my $max_disc_inc = $MAX_DISC_INC;
my $cache_file = $CACHE_FILE;
while (defined(my $arg = shift @ARGV)) {
if ($arg =~ /^\-\-cache-file(?:=(.*))?$/) {
$cache_file = defined $1 ? $1 : shift(@ARGV);
Usage() unless defined $cache_file;
} elsif ($arg =~ /^\-\-max\-load(?:=(.*))?$/) {
$max_load = defined $1 ? $1 : shift(@ARGV);
Usage() unless defined $max_load;
} elsif ($arg =~ /^\-\-max\-disc-inc(?:=(.*))?$/) {
$max_disc_inc = defined $1 ? $1 : shift(@ARGV);
Usage() unless defined $max_disc_inc;
} elsif ($arg =~ /^\-\-max\-disc(?:=(.*))?$/) {
$max_disc = defined $1 ? $1 : shift(@ARGV);
Usage() unless defined $max_disc;
} elsif ($arg eq "--debug") {
$debug = $verbose = 1;
} elsif ($arg eq "--verbose") {
$verbose = 1;
} else {
Usage();
}
}
my %cache;
my $status = 0;
$status ||= LoadCacheFile($cache_file, \%cache);
$status ||= DiskUsage($DF, $max_disc, $max_disc_inc, \%cache, "f");
$status ||= DiskUsage($DFI, $max_disc, $max_disc_inc, \%cache, "i");
$status ||= Uptime($max_load, \%cache);
$status ||= SaveCacheFile($cache_file, \%cache);
exit $status;
}