@@ -1,111 +1,164 @@
#!/usr/bin/perl
#
-# The following line disables the Nagios embedded Perl interpreter:
-# nagios: -epn
-#
# Nagios plugin
#
# Monitor Dell server hardware status using Dell OpenManage Server
# Administrator, either locally via NRPE, or remotely via SNMP.
#
-# $Id: check_openmanage 13896 2009-04-28 09:43:53Z trondham $
+# $Id: check_openmanage 16302 2010-01-22 10:09:58Z trondham $
+#
+# Copyright (C) 2010 Trond H. Amundsen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
+require 5.006; # Perl v5.6.0 or newer is required
use strict;
use warnings;
-use POSIX qw(isatty);
+use POSIX qw(isatty ceil);
use Getopt::Long qw(:config no_ignore_case);
-use Pod::Usage;
-use File::Basename qw(basename);
+
+# Global (package) variables used throughout the code
+use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL
+ $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE
+ $snmp_session $snmp_error $omreport $globalstatus $global
+ $linebreak $omopt_chassis $omopt_system $blade
+ $exit_code $snmp $original_sigwarn
+ %check %opt %perfdata %reverse_exitcode %status2nagios
+ %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo
+ %blacklist %nagios_alert_count %count
+ @perl_warnings @controllers @enclosures
+ @report_storage @report_chassis @report_other
+ );
#---------------------------------------------------------------------
# Initialization and global variables
#---------------------------------------------------------------------
-# Version and similar info
-my $NAME = 'check_openmanage';
-my $VERSION = '3.3.1';
-my $AUTHOR = 'Trond H. Amundsen';
-my $CONTACT = 't.h.amundsen@usit.uio.no';
-
-# Exit codes
-my $OK = 0;
-my $WARNING = 1;
-my $CRITICAL = 2;
-my $UNKNOWN = 3;
+# Small subroutine to collect any perl warnings during execution
+sub collect_perl_warning {
+ push @perl_warnings, [@_];
+}
-# SNMP session variables
-my $snmp_session = undef;
-my $snmp_error = undef;
+# Set the WARN signal to use our collect subroutine above
+$original_sigwarn = $SIG{__WARN__};
+$SIG{__WARN__} = \&collect_perl_warning;
-# Firmware update lock file
-my $firmware_lock = '/var/lock/.spsetup'; # FIXME: location on Windows?
+# Version and similar info
+$NAME = 'check_openmanage';
+$VERSION = '3.5.5';
+$AUTHOR = 'Trond H. Amundsen';
+$CONTACT = 't.h.amundsen@usit.uio.no';
-# The omreport command
-my $omreport = undef;
+# Exit codes
+$E_OK = 0;
+$E_WARNING = 1;
+$E_CRITICAL = 2;
+$E_UNKNOWN = 3;
+
+# Firmware update lock file [FIXME: location on Windows?]
+$FW_LOCK = '/var/lock/.spsetup'; # default on Linux
+
+# Usage text
+$USAGE = <<"END_USAGE";
+Usage: $NAME [OPTION]...
+END_USAGE
+
+# Help text
+$HELP = <<'END_HELP';
+
+GENERAL OPTIONS:
+
+ -p, --perfdata Output performance data
+ -t, --timeout Plugin timeout in seconds
+ -c, --critical Customise temperature critical limits
+ -w, --warning Customise temperature warning limits
+ -d, --debug Debug output, reports everything
+ -h, --help Display this help text
+ -V, --version Display version info
+
+SNMP OPTIONS:
+
+ -H, --hostname Hostname or IP of the server (needed for SNMP)
+ -C, --community SNMP community string
+ -P, --protocol SNMP protocol version
+ --port SNMP port number
+
+OUTPUT OPTIONS:
+
+ -i, --info Prefix any alerts with the service tag
+ -e, --extinfo Append system info to alerts
+ -s, --state Prefix alerts with alert state
+ --short-state Prefix alerts with alert state (abbreviated)
+ -o, --okinfo Verbosity when check result is OK
+ --htmlinfo HTML output with clickable links
+
+CHECK CONTROL AND BLACKLISTING:
+
+ -a, --all Check everything, even log content
+ -b, --blacklist Blacklist missing and/or failed components
+ --only Only check a certain component or alert type
+ --check Fine-tune which components are checked
+
+For more information and advanced options, see the manual page or URL:
+ http://folk.uio.no/trondham/software/check_openmanage.html
+END_HELP
-# Check flags, override available with the --check option
-my %check
- = (
- 'storage' => 1, # check storage subsystem
- 'memory' => 1, # check memory (dimms)
- 'fans' => 1, # check fan status
- 'power' => 1, # check power supplies
- 'temperature' => 1, # check temperature
- 'cpu' => 1, # check processors
- 'voltage' => 1, # check voltage
- 'batteries' => 1, # check battery probes
- 'pwrmonitor' => 1, # check power consumption
- 'intrusion' => 0, # check intrusion detection
- 'alertlog' => 0, # check the alert log
- 'esmlog' => 0, # check the ESM log (hardware log)
- 'esmhealth' => 1, # check the ESM log overall health
- );
+# Version and license text
+$LICENSE = <<"END_LICENSE";
+$NAME $VERSION
+Copyright (C) 2010 $AUTHOR
+License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law.
-# If we were called with alternate basename, check to see which
-# component should be checked
-my $self = basename($0);
-my $component = undef;
-if ($self =~ m/\A ${NAME}_(.+?)(\.exe)? \z/xms) { # matches "$NAME_foo" and "$NAME_foo.exe"
- $component = $1;
- if (!exists $check{$component}) {
- print "CONFIGURATION ERROR: Unknown component '$component'. Check plugin filename\n";
- exit $UNKNOWN;
- }
-}
+Written by $AUTHOR <$CONTACT>
+END_LICENSE
# Options with default values
-my %opt = ( 'blacklist' => [],
- 'check' => [],
- 'critical' => [],
- 'warning' => [],
- 'timeout' => 30, # default timeout is 30 seconds
- 'verbose' => 0,
- 'help' => 0,
- 'man' => 0,
- 'perfdata' => undef,
- 'info' => 0,
- 'extinfo' => 0,
- 'postmsg' => undef,
- 'state' => 0,
- 'short-state' => 0,
- 'okinfo' => 0, # default "ok" output level
- 'linebreak' => undef,
- 'version' => 0,
- 'global' => 0,
- 'snmp' => 0,
- 'port' => 161, # default port is the well-known SNMP port 161
- 'hostname' => 'localhost',
- 'community' => 'public', # SMNP v1 or v2c
- 'protocol' => 2,
- 'username' => undef, # SMNP v3
- 'authpassword' => undef, # SMNP v3
- 'authkey' => undef, # SMNP v3
- 'authprotocol' => undef, # SMNP v3
- 'privpassword' => undef, # SMNP v3
- 'privkey' => undef, # SMNP v3
- 'privprotocol' => undef, # SMNP v3
- );
+%opt = ( 'blacklist' => [],
+ 'check' => [],
+ 'critical' => [],
+ 'warning' => [],
+ 'timeout' => 30, # default timeout is 30 seconds
+ 'debug' => 0,
+ 'help' => 0,
+ 'perfdata' => undef,
+ 'info' => 0,
+ 'extinfo' => 0,
+ 'htmlinfo' => undef,
+ 'postmsg' => undef,
+ 'state' => 0,
+ 'short-state' => 0,
+ 'okinfo' => 0, # default "ok" output level
+ 'linebreak' => undef,
+ 'version' => 0,
+ 'all' => 0,
+ 'only' => undef,
+ 'omreport' => undef,
+ 'port' => 161, # default SNMP port
+ 'hostname' => undef,
+ 'community' => 'public', # SMNP v1 or v2c
+ 'protocol' => 2,
+ 'username' => undef, # SMNP v3
+ 'authpassword' => undef, # SMNP v3
+ 'authkey' => undef, # SMNP v3
+ 'authprotocol' => undef, # SMNP v3
+ 'privpassword' => undef, # SMNP v3
+ 'privkey' => undef, # SMNP v3
+ 'privprotocol' => undef, # SMNP v3
+ );
# Get options
GetOptions('b|blacklist=s' => \@{ $opt{blacklist} },
@@ -113,20 +166,21 @@
'c|critical=s' => \@{ $opt{critical} },
'w|warning=s' => \@{ $opt{warning} },
't|timeout=i' => \$opt{timeout},
- 'v|verbose' => \$opt{verbose},
+ 'd|debug' => \$opt{debug},
'h|help' => \$opt{help},
- 'm|man' => \$opt{man},
'V|version' => \$opt{version},
'p|perfdata:s' => \$opt{perfdata},
'i|info' => \$opt{info},
'e|extinfo' => \$opt{extinfo},
+ 'htmlinfo:s' => \$opt{htmlinfo},
'postmsg=s' => \$opt{postmsg},
- 'state' => \$opt{state},
+ 's|state' => \$opt{state},
'short-state' => \$opt{shortstate},
'o|ok-info=i' => \$opt{okinfo},
'l|linebreak=s' => \$opt{linebreak},
- 'g|global' => \$opt{global},
- 's|snmp' => \$opt{snmp},
+ 'a|all' => \$opt{all},
+ 'only=s' => \$opt{only},
+ 'omreport=s' => \$opt{omreport},
'port=i' => \$opt{port},
'H|hostname=s' => \$opt{hostname},
'C|community=s' => \$opt{community},
@@ -138,46 +192,55 @@
'privpassword=s' => \$opt{privpassword},
'privkey=s' => \$opt{privkey},
'privprotocol=s' => \$opt{privprotocol},
- ) or pod2usage(-exitstatus => $UNKNOWN, -verbose => 0);
+ ) or do { print $USAGE; exit $E_UNKNOWN };
# If user requested help
-if ($opt{'help'}) {
- pod2usage(-exitstatus => $OK, -verbose => 1);
-}
-
-# If user requested man page
-if ($opt{'man'}) {
- pod2usage(-exitstatus => $OK, -verbose => 2);
+if ($opt{help}) {
+ print $USAGE, $HELP;
+ exit $E_OK;
}
# If user requested version info
-if ($opt{'version'}) {
- print <<"END_VERSION";
-$NAME $VERSION
-This Nagios plugin comes with ABSOLUTELY NO WARRANTY.
-You may redistribute copies of this plugin under the terms of
-the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
-
-Written by $AUTHOR <$CONTACT>
-END_VERSION
- exit $OK;
-}
-
-# If user has specified the '--global' option, which implies that
-# everything should be checked.
-if ($opt{global}) {
- $check{intrusion} = 1; # turn on chassis intrusion check
+if ($opt{version}) {
+ print $LICENSE;
+ exit $E_OK;
}
# Setting timeout
$SIG{ALRM} = sub {
- print "$NAME timed out after $opt{timeout} seconds\n";
- exit $UNKNOWN;
+ print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
+ exit $E_UNKNOWN;
};
alarm $opt{timeout};
+# If we're using SNMP
+$snmp = defined $opt{hostname} ? 1 : 0;
+
+# SNMP session variables
+$snmp_session = undef;
+$snmp_error = undef;
+
+# The omreport command
+$omreport = undef;
+
+# Check flags, override available with the --check option
+%check = ( 'storage' => 1, # check storage subsystem
+ 'memory' => 1, # check memory (dimms)
+ 'fans' => 1, # check fan status
+ 'power' => 1, # check power supplies
+ 'temp' => 1, # check temperature
+ 'cpu' => 1, # check processors
+ 'voltage' => 1, # check voltage
+ 'batteries' => 1, # check battery probes
+ 'amperage' => 1, # check power consumption
+ 'intrusion' => 1, # check intrusion detection
+ 'alertlog' => 0, # check the alert log
+ 'esmlog' => 0, # check the ESM log (hardware log)
+ 'esmhealth' => 1, # check the ESM log overall health
+ );
+
# Default line break
-my $linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
+$linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
# Line break from option
if (defined $opt{linebreak}) {
@@ -193,44 +256,75 @@
}
# Exit with status=UNKNOWN if there is firmware upgrade in progress
-if (!$opt{'snmp'} && -f $firmware_lock) {
- print "MONITORING DISABLED - Firmware update in progress ($firmware_lock exists)\n";
- exit $UNKNOWN;
-}
+if (!$snmp && -f $FW_LOCK) {
+ print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n";
+ exit $E_UNKNOWN;
+}
+
+# List of controllers and enclosures
+@controllers = (); # controllers
+@enclosures = (); # enclosures
+
+# Messages
+@report_storage = (); # messages with associated nagios level (storage)
+@report_chassis = (); # messages with associated nagios level (chassis)
+@report_other = (); # messages with associated nagios level (other)
+
+# Counters for everything
+%count
+ = (
+ 'pdisk' => 0, # number of physical disks
+ 'vdisk' => 0, # number of logical drives (virtual disks)
+ 'temp' => 0, # number of temperature probes
+ 'volt' => 0, # number of voltage probes
+ 'amp' => 0, # number of amperage probes
+ 'intr' => 0, # number of intrusion probes
+ 'dimm' => 0, # number of memory modules
+ 'fan' => 0, # number of fan probes
+ 'cpu' => 0, # number of CPUs
+ 'bat' => 0, # number of batteries
+ 'power' => 0, # number of power supplies
+ 'esm' => {
+ 'Critical' => 0, # critical entries in ESM log
+ 'Non-Critical' => 0, # warning entries in ESM log
+ 'Ok' => 0, # ok entries in ESM log
+ },
+ 'alert' => {
+ 'Critical' => 0, # critical entries in alert log
+ 'Non-Critical' => 0, # warning entries in alert log
+ 'Ok' => 0, # ok entries in alert log
+ },
+ );
+
+# Performance data
+%perfdata = ();
-# Global variables used throughout the code
-my @controllers = (); # list of controllers
-my %enclosure = (); # list of enclosure (id,name) pairs
-my @report_storage = (); # messages with associated nagios level (storage)
-my @report_chassis = (); # messages with associated nagios level (chassis)
-my @report_other = (); # messages with associated nagios level (other)
-my $no_of_pdisks = 0; # counts number of physical disks
-my $no_of_vdisks = 0; # counts number of logical drives (virtual disks)
-my %perfdata = (); # performance data
-my $globalstatus = $OK; # global health status
+# Global health status
+$global = 1; # default is to check global status
+$globalstatus = $E_OK; # default global health status is "OK"
# Nagios error levels reversed
-my %ERRORCODE
+%reverse_exitcode
= (
- 0 => 'OK',
- 1 => 'WARNING',
- 2 => 'CRITICAL',
- 3 => 'UNKNOWN',
+ $E_OK => 'OK',
+ $E_WARNING => 'WARNING',
+ $E_CRITICAL => 'CRITICAL',
+ $E_UNKNOWN => 'UNKNOWN',
);
# OpenManage (omreport) and SNMP error levels
-my %status2nagios
+%status2nagios
= (
- 'Unknown' => $CRITICAL,
- 'Critical' => $CRITICAL,
- 'Non-Critical' => $WARNING,
- 'Ok' => $OK,
- 'Non-Recoverable' => $CRITICAL,
- 'Other' => $CRITICAL,
+ 'Unknown' => $E_CRITICAL,
+ 'Critical' => $E_CRITICAL,
+ 'Non-Critical' => $E_WARNING,
+ 'Ok' => $E_OK,
+ 'Non-Recoverable' => $E_CRITICAL,
+ 'Other' => $E_CRITICAL,
);
# Status via SNMP
-my %snmp_status
+%snmp_status
= (
1 => 'Other',
2 => 'Unknown',
@@ -241,37 +335,37 @@
);
# Probe Status via SNMP
-my %snmp_probestatus
+%snmp_probestatus
= (
- 1 => 'Other', # -- probe status is not one of the following:
- 2 => 'Unknown', # -- probe status is unknown (not known or monitored)
- 3 => 'Ok', # -- probe is reporting a value within the thresholds
- 4 => 'nonCriticalUpper', # -- probe has crossed upper noncritical threshold
- 5 => 'criticalUpper', # -- probe has crossed upper critical threshold
- 6 => 'nonRecoverableUpper', # -- probe has crossed upper non-recoverable threshold
- 7 => 'nonCriticalLower', # -- probe has crossed lower noncritical threshold
- 8 => 'criticalLower', # -- probe has crossed lower critical threshold
- 9 => 'nonRecoverableLower', # -- probe has crossed lower non-recoverable threshold
- 10 => 'failed', # -- probe is not functional
+ 1 => 'Other', # probe status is not one of the following:
+ 2 => 'Unknown', # probe status is unknown (not known or monitored)
+ 3 => 'Ok', # probe is reporting a value within the thresholds
+ 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold
+ 5 => 'criticalUpper', # probe has crossed upper critical threshold
+ 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold
+ 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold
+ 8 => 'criticalLower', # probe has crossed lower critical threshold
+ 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold
+ 10 => 'failed', # probe is not functional
);
# Probe status translated to Nagios alarm levels
-my %probestatus2nagios
+%probestatus2nagios
= (
- 'Other' => $CRITICAL,
- 'Unknown' => $CRITICAL,
- 'Ok' => $OK,
- 'nonCriticalUpper' => $WARNING,
- 'criticalUpper' => $CRITICAL,
- 'nonRecoverableUpper' => $CRITICAL,
- 'nonCriticalLower' => $WARNING,
- 'criticalLower' => $CRITICAL,
- 'nonRecoverableLower' => $CRITICAL,
- 'failed' => $CRITICAL,
+ 'Other' => $E_CRITICAL,
+ 'Unknown' => $E_CRITICAL,
+ 'Ok' => $E_OK,
+ 'nonCriticalUpper' => $E_WARNING,
+ 'criticalUpper' => $E_CRITICAL,
+ 'nonRecoverableUpper' => $E_CRITICAL,
+ 'nonCriticalLower' => $E_WARNING,
+ 'criticalLower' => $E_CRITICAL,
+ 'nonRecoverableLower' => $E_CRITICAL,
+ 'failed' => $E_CRITICAL,
);
# System information gathered
-my %sysinfo
+%sysinfo
= (
'bios' => 'N/A', # BIOS version
'biosdate' => 'N/A', # BIOS release date
@@ -291,15 +385,20 @@
adjust_checks() if defined $opt{check};
# Blacklisted components
-my %blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : ();
+%blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : ();
+
+# If blacklisting is in effect, don't check global health status
+if (scalar keys %blacklist > 0) {
+ $global = 0;
+}
# Take into account new hardware and blades
-my $omopt_chassis = 'chassis'; # default "chassis" option to omreport
-my $omopt_system = 'system'; # default "system" option to omreport
-my $blade = 0; # if this is a blade system
+$omopt_chassis = 'chassis'; # default "chassis" option to omreport
+$omopt_system = 'system'; # default "system" option to omreport
+$blade = 0; # if this is a blade system
# Some initializations and checking before we begin
-if ($opt{snmp}) {
+if ($snmp) {
snmp_initialize(); # initialize SNMP
snmp_check(); # check that SNMP works
snmp_detect_blade(); # detect blade via SNMP
@@ -314,10 +413,62 @@
#---------------------------------------------------------------------
-# Functions
+# Helper functions
#---------------------------------------------------------------------
#
+# Store a message in one of the message arrays
+#
+sub report {
+ my ($type, $msg, $exval, $id) = @_;
+ defined $id or $id = q{};
+
+ my %type2array
+ = (
+ 'storage' => \@report_storage,
+ 'chassis' => \@report_chassis,
+ 'other' => \@report_other,
+ );
+
+ return push @{ $type2array{$type} }, [ $msg, $exval, $id ];
+}
+
+
+#
+# Run command, put resulting output lines in an array and return a
+# pointer to that array
+#
+sub run_command {
+ my $command = shift;
+
+ open my $CMD, '-|', $command
+ or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN)
+ and return [] };
+ my @lines = <$CMD>;
+ close $CMD
+ or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN)
+ and return \@lines };
+ return \@lines;
+}
+
+#
+# Run command, put resulting output in a string variable and return it
+#
+sub slurp_command {
+ my $command = shift;
+
+ open my $CMD, '-|', $command
+ or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return };
+ my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
+ close $CMD;
+
+ # NOTE: We don't check the return value of close() since omreport
+ # does something weird sometimes.
+
+ return $rawtext;
+}
+
+#
# Initialize SNMP
#
sub snmp_initialize {
@@ -333,8 +484,8 @@
'-version' => $opt{protocol},
);
+ # Parameters for SNMP v3
if ($opt{protocol} == 3) {
- # Parameters for SNMP v3
# Username is mandatory
if (defined $opt{username}) {
@@ -342,7 +493,7 @@
}
else {
print "SNMP ERROR: With SNMPv3 the username must be specified\n";
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
# Authpassword is optional
@@ -371,8 +522,9 @@
$param{'-privprotocol'} = $opt{privprotocol};
}
else {
- print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', must be one of [des|aes|aes128|3des|3desde]\n";
- exit $UNKNOWN;
+ print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', "
+ . "must be one of [des|aes|aes128|3des|3desde]\n";
+ exit $E_UNKNOWN;
}
}
@@ -382,18 +534,19 @@
$param{'-authprotocol'} = $opt{authprotocol};
}
else {
- print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', must be one of [md5|sha]\n";
- exit $UNKNOWN;
+ print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', "
+ . "must be one of [md5|sha]\n";
+ exit $E_UNKNOWN;
}
}
}
+ # Parameters for SNMP v2c or v1
elsif ($opt{protocol} == 2 or $opt{protocol} == 1) {
- # Parameters for SNMP v2c or v1
$param{'-community'} = $opt{community};
}
else {
print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n";
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
# Try to initialize the SNMP session
@@ -401,12 +554,12 @@
($snmp_session, $snmp_error) = Net::SNMP->session( %param );
if (!defined $snmp_session) {
printf "SNMP: %s\n", $snmp_error;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
}
else {
print "You need perl module Net::SNMP to run $NAME in SNMP mode\n";
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
return;
}
@@ -422,13 +575,13 @@
# Typically if remote host isn't responding
if (!defined $result) {
printf "SNMP CRITICAL: %s\n", $snmp_session->error;
- exit $CRITICAL;
+ exit $E_CRITICAL;
}
# If OpenManage isn't installed or is not working
if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) {
- print "(SNMP) OpenManage is not installed or is not working correctly\n";
- exit $UNKNOWN;
+ print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n";
+ exit $E_UNKNOWN;
}
return;
}
@@ -443,7 +596,7 @@
# Identify blade. Older models (4th and 5th gen models) and/or old
# OMSA (4.x) don't have this OID. If we get "noSuchInstance" or
# similar, we assume that this isn't a blade
- if ($result->{$DellBaseBoardType} eq '3') {
+ if (exists $result->{$DellBaseBoardType} && $result->{$DellBaseBoardType} eq '3') {
$blade = 1;
}
return;
@@ -453,21 +606,30 @@
# Locate the omreport binary
#
sub find_omreport {
+ # If user has specified path to omreport
+ if (defined $opt{omreport} and -x $opt{omreport}) {
+ $omreport = qq{"$opt{omreport}"};
+ return;
+ }
+
# Possible full paths for omreport
my @omreport_paths
= (
'/usr/bin/omreport', # default on Linux
+ '/opt/dell/srvadmin/bin/omreport', # default on Linux with OMSA 6.2.0
'/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux
'/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux
- 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # default on Windows
- 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # default on Windows x64
+ 'C:\Program Files (x86)\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x64
+ 'C:\Program Files\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x32
+ 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x32
+ 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x64
);
# Find the one to use
OMREPORT_PATH:
foreach my $bin (@omreport_paths) {
if (-x $bin) {
- $omreport = $bin;
+ $omreport = qq{"$bin"};
last OMREPORT_PATH;
}
}
@@ -476,7 +638,7 @@
# have permission to execute the binary
if (!defined $omreport) {
print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n";
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
return;
}
@@ -487,10 +649,7 @@
# (on newer hardware), as well as blade servers.
#
sub check_omreport_options {
- open my $OMCHECK, '-|', "$omreport -? 2>&1"
- or ( push @report_other, [ "Couldn't run '$omreport -?': $!",
- $UNKNOWN, q{} ] and return );
- while (<$OMCHECK>) {
+ foreach (@{ run_command("$omreport -? 2>&1") }) {
if (m/\A servermodule /xms) {
# If "servermodule" argument to omreport exists, use it
# instead of argument "system"
@@ -507,7 +666,6 @@
$blade = 1;
}
}
- close $OMCHECK;
return;
}
@@ -524,8 +682,8 @@
my $tmp = q{};
if (-f $black) {
open my $BL, '<', $black
- or ( push @report_other, [ "Couldn't open blacklist file $black: $!",
- $UNKNOWN, q{} ] and return {} );
+ or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
+ and return {} };
$tmp = <$BL>;
close $BL;
chomp $tmp;
@@ -560,13 +718,58 @@
sub adjust_checks {
my @cl = ();
+ # Adjust checking based on the '--all' option
+ if ($opt{all}) {
+ # Check option usage
+ if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
+ print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n};
+ exit $E_UNKNOWN;
+ }
+ if (scalar @{ $opt{check} } > 0) {
+ print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n};
+ exit $E_UNKNOWN;
+ }
+
+ # set the check hash to check everything
+ map { $_ = 1 } values %check;
+
+ return;
+ }
+
+ # Adjust checking based on the '--only' option
+ if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
+ # Check option usage
+ if (scalar @{ $opt{check} } > 0) {
+ print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n};
+ exit $E_UNKNOWN;
+ }
+ if (! exists $check{$opt{only}} && $opt{only} ne 'chassis') {
+ print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n};
+ exit $E_UNKNOWN;
+ }
+
+ # reset the check hash
+ map { $_ = 0 } values %check;
+
+ # adjust the check hash
+ if ($opt{only} eq 'chassis') {
+ map { $check{$_} = 1 } qw(memory fans power temp cpu voltage
+ batteries amperage intrusion esmhealth);
+ }
+ else {
+ $check{$opt{only}} = 1;
+ }
+
+ return;
+ }
+
+ # Adjust checking based on the '--check' option
if (scalar @{ $opt{check} } >= 0) {
foreach my $check (@{ $opt{check} }) {
my $tmp = q{};
if (-f $check) {
open my $CL, '<', $check
- or ( push @report_other, [ "Couldn't open check file $check: $!",
- $UNKNOWN, q{} ] and return );
+ or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return };
$tmp = <$CL>;
close $CL;
}
@@ -585,14 +788,22 @@
foreach my $c (@checks) {
next if $c !~ m/=/xms;
my ($key, $val) = split /=/xms, $c;
- if ($opt{global} and $key !~ m/ esmlog | alertlog /xms) {
- # If the '--global' switch is specified, you're only
- # allowed to mess with the log stuff
- next;
- }
$check{$key} = $val;
}
}
+
+ # Check if we should check global health status
+ CHECK_KEY:
+ foreach (keys %check) {
+ next CHECK_KEY if $_ eq 'esmlog'; # not part of global status
+ next CHECK_KEY if $_ eq 'alertlog'; # not part of global status
+
+ if ($check{$_} == 0) { # found something with checking turned off
+ $global = 0;
+ last CHECK_KEY;
+ }
+ }
+
return;
}
@@ -616,6 +827,7 @@
| No\scontrollers\sfound # No RAID controller
| No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes
| Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?)
+# | Current\sprobes\snot\sfound # No power monitoring capability
}xms;
# Errors that are OK on blade servers
@@ -625,27 +837,27 @@
}xms;
# Run omreport and fetch output
- open my $CMD, '-|', "$omreport $command -fmt ssv 2>&1"
- or ( push @report_other, [ "Couldn't run $omreport: $!",
- $UNKNOWN, q{} ] and return @output );
- my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
- close $CMD;
+ my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
+ return [] if !defined $rawtext;
# Workaround for Openmanage BUG introduced in OMSA 5.5.0
- $rawtext =~ s/\n;/;/gxms if $command eq 'storage controller';
+ $rawtext =~ s{\n;}{;}gxms if $command eq 'storage controller';
+
+ # Openmanage sometimes puts a linebreak between "Error" and the
+ # actual error text
+ $rawtext =~ s{^Error\s*\n}{Error: }xms;
# Parse output, store in array
- for ((split /\n/xms, $rawtext)) {
- if (m/\A Error/xms) {
+ for ((split m{\n}xms, $rawtext)) {
+ if (m{\AError}xms) {
next if m{$ok_errors}xms;
next if ($blade and m{$ok_blade_errors}xms);
- push @report_other, [ "Problem running 'omreport $command': $_",
- $UNKNOWN, q{} ];
+ report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN);
}
next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields
my @vals = split /;/xms;
- if ($vals[0] =~ m/\A (Index|ID|Severity) \z/xms) {
+ if ($vals[0] =~ m/\A (Index|ID|Severity|Processor|Current\sSpeed) \z/xms) {
@keys = @vals;
}
else {
@@ -673,7 +885,7 @@
if (defined $blacklist{$name}) {
foreach my $comp (@{ $blacklist{$name} }) {
- if (defined $id and $comp eq $id) {
+ if (defined $id and ($comp eq $id or uc($comp) eq 'ALL')) {
$ret = 1;
}
}
@@ -703,8 +915,8 @@
my $tmp = q{};
if (-f $t) {
open my $F, '<', $t
- or ( push @report_other, [ "Couldn't open temperature threshold file $t: $!",
- $UNKNOWN, q{} ] and return {} );
+ or do { report('other', "Couldn't open temperature threshold file $t: $!",
+ $E_UNKNOWN) and return {} };
$tmp = <$F>;
close $F;
}
@@ -739,17 +951,26 @@
# Gets the output from SNMP result according to the OIDs checked
sub get_snmp_output {
my ($result,$oidref) = @_;
+ my @temp = ();
my @output = ();
foreach my $oid (keys %{ $result }) {
- my @dummy = split /\./xms, $oid;
- my $id = pop @dummy;
- --$id;
- my $foo = join q{.}, @dummy;
- if (exists $oidref->{$foo}) {
- $output[$id]{$oidref->{$foo}} = $result->{$oid};
+ my $short = $oid;
+ $short =~ s{\s}{}gxms; # remove whitespace
+ $short =~ s{\A (.+) \. (\d+) \z}{$1}xms; # remove last number
+ my $id = $2;
+ if (exists $oidref->{$short}) {
+ $temp[$id]{$oidref->{$short}} = $result->{$oid};
+ }
+ }
+
+ # Remove any empty indexes
+ foreach my $out (@temp) {
+ if (defined $out) {
+ push @output, $out;
}
}
+
return \@output;
}
@@ -760,28 +981,106 @@
foreach my $lst (@{ $list }) {
if (!exists $lst->{$key}) {
- $lst->{$key} = $val
+ $lst->{$key} = $val;
}
}
return;
}
+# Return the URL for official Dell documentation for a specific
+# PowerEdge server
+sub documentation_url {
+ my $model = shift;
+
+ # create model short form, e.g. "r710"
+ $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms;
+
+ # special case for blades (e.g. M600, M710), they have common
+ # documentation
+ $model =~ s{\A m\d+ \z}{m}xms;
+
+ return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/';
+}
+
+# Return the URL for warranty information for a server with a given
+# serial number (servicetag)
+sub warranty_url {
+ my $tag = shift;
+
+ # Dell support sites for different parts of the world
+ my %supportsite
+ = (
+ 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/',
+ 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?',
+ 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?',
+ );
+
+ # warranty URLs for different country codes
+ my %url
+ = (
+ # EMEA
+ 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria
+ 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium
+ 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic
+ 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany
+ 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark
+ 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain
+ 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland
+ 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France
+ 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece
+ 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy
+ 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel
+ 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East
+ 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway
+ 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands
+ 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland
+ 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal
+ 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia
+ 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden
+ 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom
+ 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa
+ # America
+ 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil
+ 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada
+ 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico
+ 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA
+ # Asia/Pacific
+ 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia
+ 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China
+ 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India
+ # default fallback
+ 'XX' => $supportsite{glob} . 'ServiceTag=', # default
+ );
+
+ if (exists $url{$opt{htmlinfo}}) {
+ return $url{$opt{htmlinfo}} . $tag;
+ }
+ else {
+ return $url{XX} . $tag;
+ }
+}
+
+
+
+#---------------------------------------------------------------------
+# Check functions
+#---------------------------------------------------------------------
#-----------------------------------------
# Check global health status
#-----------------------------------------
sub check_global {
- my $health = $OK;
+ my $health = $E_OK;
- if ($opt{snmp}) {
+ if ($snmp) {
#
# Checks global status, i.e. both storage and chassis
#
my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1';
my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]);
if (!defined $result) {
- printf "SNMP [systemStateGlobalSystemStatus]: %s\n", $snmp_error;
- exit $UNKNOWN;
+ printf "SNMP ERROR [global]: %s\n", $snmp_error;
+ exit $E_UNKNOWN;
}
$health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}};
}
@@ -789,10 +1088,7 @@
#
# NB! This does not check storage, only chassis...
#
- open my $CMD, '-|', "$omreport $omopt_system -fmt ssv"
- or ( push @report_other, [ sprintf("Couldn't run $omreport $omopt_system: $!"),
- $UNKNOWN, q{} ] and return $OK );
- while (<$CMD>) {
+ foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) {
next if !m/;/xms;
next if m/\A SEVERITY;COMPONENT/xms;
if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) {
@@ -800,7 +1096,6 @@
last;
}
}
- close $CMD;
}
# Return the status
@@ -821,9 +1116,11 @@
my $mindr = undef;
my $firmware = undef;
my $driver = undef;
+ my $minstdr = undef; # Minimum required Storport driver version (whats this?)
+ my $stdr = undef; # Storport driver version (whats this?)
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %ctrl_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber',
@@ -835,8 +1132,15 @@
'1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion',
'1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion',
'1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion',
+ '1.3.6.1.4.1.674.10893.1.20.130.1.1.55' => 'FIXME_StorportDriverVersion',
+ '1.3.6.1.4.1.674.10893.1.20.130.1.1.56' => 'FIXME_StorportMinDriverVersion',
);
- my $result = $snmp_session->get_entries(-columns => [keys %ctrl_oid]);
+
+ # We use get_table() here for the odd case where a server has
+ # two or more controllers, and where some OIDs are missing on
+ # one of the controllers.
+ my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1';
+ my $result = $snmp_session->get_table(-baseoid => $controllerTable);
# No controllers is OK
return if !defined $result;
@@ -859,19 +1163,23 @@
CTRL:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
- $id = $out->{'controllerNumber'} - 1;
- $name = $out->{'controllerName'};
- $state = $ctrl_state{$out->{'controllerState'}};
- $status = $snmp_status{$out->{'controllerComponentStatus'}};
- $minfw = exists $out->{'controllerMinFWVersion'}
- ? $out->{'controllerMinFWVersion'} : undef;
- $mindr = exists $out->{'controllerMinDriverVersion'}
- ? $out->{'controllerMinDriverVersion'} : undef;
+ if ($snmp) {
+ $id = $out->{controllerNumber} - 1;
+ $name = $out->{controllerName};
+ $state = $ctrl_state{$out->{controllerState}};
+ $status = $snmp_status{$out->{controllerComponentStatus}};
+ $minfw = exists $out->{controllerMinFWVersion}
+ ? $out->{controllerMinFWVersion} : undef;
+ $mindr = exists $out->{controllerMinDriverVersion}
+ ? $out->{controllerMinDriverVersion} : undef;
$firmware = exists $out->{controllerFWVersion}
? $out->{controllerFWVersion} : 'N/A';
$driver = exists $out->{controllerDriverVersion}
? $out->{controllerDriverVersion} : 'N/A';
+ $minstdr = exists $out->{'FIXME_StorportMinDriverVersion'}
+ ? $out->{FIXME_StorportMinDriverVersion} : undef;
+ $stdr = exists $out->{FIXME_StorportDriverVersion}
+ ? $out->{FIXME_StorportDriverVersion} : undef;
$nexus = convert_nexus($out->{controllerNexusID});
}
else {
@@ -887,6 +1195,12 @@
? $out->{'Firmware Version'} : 'N/A';
$driver = $out->{'Driver Version'} ne 'Not Applicable'
? $out->{'Driver Version'} : 'N/A';
+ $minstdr = (exists $out->{'Minimum Required Storport Driver Version'}
+ and $out->{'Minimum Required Storport Driver Version'} ne 'Not Applicable')
+ ? $out->{'Minimum Required Storport Driver Version'} : undef;
+ $stdr = (exists $out->{'Storport Driver Version'}
+ and $out->{'Storport Driver Version'} ne 'Not Applicable')
+ ? $out->{'Storport Driver Version'} : undef;
$nexus = $id;
}
@@ -898,35 +1212,43 @@
$sysinfo{'controller'}{$id}{'name'} = $name;
$sysinfo{'controller'}{$id}{'driver'} = $driver;
$sysinfo{'controller'}{$id}{'firmware'} = $firmware;
+ $sysinfo{'controller'}{$id}{'storport'} = $stdr;
next CTRL if blacklisted('ctrl', $nexus);
# Special case: old firmware
if (!blacklisted('ctrl_fw', $id) && defined $minfw) {
chomp $firmware;
- push @report_storage, [ sprintf('Controller %d (%s): Firmware is out of date (%s)',
- $id, $name, $firmware),
- $WARNING, $nexus ];
+ my $msg = sprintf q{Controller %d [%s]: Firmware '%s' is out of date},
+ $id, $name, $firmware;
+ report('storage', $msg, $E_WARNING, $nexus);
}
# Special case: old driver
if (!blacklisted('ctrl_driver', $id) && defined $mindr) {
chomp $driver;
- push @report_storage, [ sprintf('Controller %d (%s): Driver is out of date (%s)',
- $id, $name, $driver),
- $WARNING, $nexus ];
+ my $msg = sprintf q{Controller %d [%s]: Driver '%s' is out of date},
+ $id, $name, $driver;
+ report('storage', $msg, $E_WARNING, $nexus);
+ }
+ # Special case: old storport driver
+ if (!blacklisted('ctrl_stdr', $id) && defined $minstdr) {
+ chomp $stdr;
+ my $msg = sprintf q{Controller %d [%s]: Storport driver '%s' is out of date},
+ $id, $name, $stdr;
+ report('storage', $msg, $E_WARNING, $nexus);
}
# Ok
if ($status eq 'Ok' or ($status eq 'Non-Critical'
- and (defined $minfw or defined $mindr))) {
- push @report_storage, [ sprintf('Controller %d (%s) is %s',
- $id, $name, $state),
- $OK, $nexus ];
+ and (defined $minfw or defined $mindr or defined $minstdr))) {
+ my $msg = sprintf 'Controller %d [%s] is %s',
+ $id, $name, $state;
+ report('storage', $msg, $E_OK, $nexus);
}
# Default
else {
- push @report_storage, [ sprintf('Controller %d (%s) needs attention (%s)',
- $id, $name, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf 'Controller %d [%s] needs attention: %s',
+ $id, $name, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
}
return;
@@ -939,38 +1261,44 @@
sub check_physical_disks {
return if $#controllers == -1;
- my $id = undef;
- my $nexus = undef;
- my $name = undef;
- my $state = undef;
- my $status = undef;
- my $fpred = undef;
- my $progr = undef;
- my $ctrl = undef;
- my @output = ();
+ my $id = undef;
+ my $nexus = undef;
+ my $name = undef;
+ my $state = undef;
+ my $status = undef;
+ my $fpred = undef;
+ my $progr = undef;
+ my $ctrl = undef;
+ my $vendor = undef; # disk vendor
+ my $product = undef; # product ID
+ my $capacity = undef; # disk length (size) in bytes
+ my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %pdisk_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName',
+ '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState',
+ '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel',
+ '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.16' => 'arrayDiskLunID',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID',
'1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication',
- '1.3.6.1.4.1.674.10893.1.20.130.5.1.5' => 'arrayDiskEnclosureConnectionEnclosureNumber',
'1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber',
+ '1.3.6.1.4.1.674.10893.1.20.130.6.1.7' => 'arrayDiskChannelConnectionControllerNumber',
);
my $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]);
if (!defined $result) {
- printf "SNMP [storage / pdisk]: %s.\n", $snmp_session->error;
+ printf "SNMP ERROR [storage / pdisk]: %s.\n", $snmp_session->error;
$snmp_session->close;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
@output = @{ get_snmp_output($result, \%pdisk_oid) };
@@ -1008,61 +1336,91 @@
# Check physical disks on each of the controllers
PDISK:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$name = $out->{arrayDiskName};
- if ($name =~ m{.*\d+:\d+:\d+\z}xms) {
+ if (exists $out->{arrayDiskEnclosureID}) {
$id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID},
- $out->{arrayDiskTargetID});
+ $out->{arrayDiskTargetID});
}
else {
$id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID});
}
- $state = $pdisk_state{$out->{arrayDiskState}};
- $status = $snmp_status{$out->{arrayDiskComponentStatus}};
- $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0;
- $progr = q{};
- $ctrl = exists $out->{arrayDiskEnclosureConnectionControllerNumber}
- ? $out->{arrayDiskEnclosureConnectionControllerNumber} - 1
- : -1;
- $nexus = convert_nexus($out->{arrayDiskNexusID});
+ $state = $pdisk_state{$out->{arrayDiskState}};
+ $status = $snmp_status{$out->{arrayDiskComponentStatus}};
+ $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0;
+ $progr = q{};
+ $nexus = convert_nexus($out->{arrayDiskNexusID});
+ $vendor = $out->{arrayDiskVendor};
+ $product = $out->{arrayDiskProductID};
+ $capacity = $out->{arrayDiskLengthInMB} * 1024**2;
+ if (exists $out->{arrayDiskEnclosureConnectionControllerNumber}) {
+ $ctrl = $out->{arrayDiskEnclosureConnectionControllerNumber} - 1;
+ }
+ elsif (exists $out->{arrayDiskChannelConnectionControllerNumber}) {
+ $ctrl = $out->{arrayDiskChannelConnectionControllerNumber} - 1;
+ }
+ else {
+ $ctrl = -1;
+ }
}
else {
- $id = $out->{'ID'};
- $name = $out->{'Name'};
- $state = $out->{'State'};
- $status = $out->{'Status'};
- $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0;
- $progr = ' [' . $out->{'Progress'} . ']';
- $ctrl = $out->{'ctrl'};
- $nexus = join q{:}, $out->{ctrl}, $id;
+ $id = $out->{'ID'};
+ $name = $out->{'Name'};
+ $state = $out->{'State'};
+ $status = $out->{'Status'};
+ $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0;
+ $progr = ' [' . $out->{'Progress'} . ']';
+ $ctrl = $out->{'ctrl'};
+ $nexus = join q{:}, $out->{ctrl}, $id;
+ $vendor = $out->{'Vendor ID'};
+ $product = $out->{'Product ID'};
+ $capacity = $out->{'Capacity'};
+ $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms;
}
next PDISK if blacklisted('pdisk', $nexus);
- $no_of_pdisks++;
+ $count{pdisk}++;
+
+ $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace
+ $product =~ s{\s+\z}{}xms; # remove trailing whitespace
+
+ # Calculate human readable capacity
+ $capacity = ceil($capacity / 1000**3) >= 1000
+ ? sprintf '%.1fTB', ($capacity / 1000**4)
+ : sprintf '%.0fGB', ($capacity / 1000**3);
+ $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks
+ $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 300GB disks
+ $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks
+
+ # Capitalize only the first letter of the vendor name
+ $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor);
+
+ # Remove unnecessary trademark rubbish from vendor name
+ $vendor =~ s{\(tm\)\z}{}xms;
# Special case: Failure predicted
if ($status eq 'Non-Critical' and $fpred) {
- push @report_storage, [ sprintf('%s on controller %d needs attention (Failure Predicted)',
- $name, $ctrl),
- $WARNING, $nexus ];
+ my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: Failure Predicted',
+ $name, $vendor, $product, $capacity, $ctrl;
+ report('storage', $msg, $E_WARNING, $nexus);
}
# Special case: Rebuilding
elsif ($state eq 'Rebuilding') {
- push @report_storage, [ sprintf('%s on controller %d is %s%s',
- $name, $ctrl, $state, $progr),
- $WARNING, $nexus ];
+ my $msg = sprintf '%s [%s] on ctrl %d is %s%s',
+ $name, $capacity, $ctrl, $state, $progr;
+ report('storage', $msg, $E_WARNING, $nexus);
}
# Default
elsif ($status ne 'Ok') {
- push @report_storage, [ sprintf('%s on controller %d needs attention (%s)',
- $name, $ctrl, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: %s',
+ $name, $vendor, $product, $capacity, $ctrl, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
# Ok
else {
- push @report_storage, [ sprintf('%s on controller %d is %s',
- $name, $ctrl, $state),
- $OK, $nexus ];
+ my $msg = sprintf '%s [%s] on ctrl %d is %s',
+ $name, $capacity, $ctrl, $state;
+ report('storage', $msg, $E_OK, $nexus);
}
}
return;
@@ -1076,6 +1434,7 @@
return if $#controllers == -1;
my $id = undef;
+ my $name = undef;
my $nexus = undef;
my $dev = undef;
my $state = undef;
@@ -1083,17 +1442,17 @@
my $layout = undef;
my $size = undef;
my $progr = undef;
+ my $ctrl = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %vdisk_oid
= (
- '1.3.6.1.4.1.674.10893.1.20.140.1.1.1' => 'virtualDiskNumber',
- '1.3.6.1.4.1.674.10893.1.20.140.1.1.2' => 'virtualDiskName',
'1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName',
'1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState',
'1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB',
'1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout',
+ '1.3.6.1.4.1.674.10893.1.20.140.1.1.17' => 'virtualDiskTargetID',
'1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus',
'1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID',
);
@@ -1147,15 +1506,17 @@
# Check virtual disks on each of the controllers
VDISK:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
- $id = $out->{virtualDiskNumber} - 1;
+ if ($snmp) {
+ $id = $out->{virtualDiskTargetID};
$dev = $out->{virtualDiskDeviceName};
$state = $vdisk_state{$out->{virtualDiskState}};
$status = $snmp_status{$out->{virtualDiskComponentStatus}};
$layout = $vdisk_layout{$out->{virtualDiskLayout}};
$size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024;
- $progr = q{};
+ $progr = q{}; # can't get this from SNMP(?)
$nexus = convert_nexus($out->{virtualDiskNexusID});
+ $ctrl = $nexus; # We use the nexus id to get the controller id
+ $ctrl =~ s{\A (\d+):\d+ \z}{$1}xms;
}
else {
$id = $out->{ID};
@@ -1167,28 +1528,32 @@
$progr = ' [' . $out->{Progress} . ']';
$size =~ s{\A (.*GB).* \z}{$1}xms;
$nexus = join q{:}, $out->{ctrl}, $id;
+ $ctrl = $out->{ctrl};
}
next VDISK if blacklisted('vdisk', $nexus);
- $no_of_vdisks++;
+ $count{vdisk}++;
+
+ # The device name is undefined sometimes
+ $dev = q{} if !defined $dev;
# Special case: Regenerating
if ($state eq 'Regenerating') {
- push @report_storage, [ sprintf('Logical drive %s (%s - %s) is %s%s',
- $dev, $layout, $size, $state, $progr),
- $WARNING, $nexus ];
+ my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s%s},
+ $id, $dev, $layout, $size, $ctrl, $state, $progr;
+ report('storage', $msg, $E_WARNING, $nexus);
}
# Default
elsif ($status ne 'Ok') {
- push @report_storage, [ sprintf('Logical drive %s (%s - %s) needs attention (%s)',
- $dev, $layout, $size, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d needs attention: %s},
+ $id, $dev, $layout, $size, $ctrl, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
# Ok
else {
- push @report_storage, [ sprintf('Logical drive %d %s (%s - %s) is %s',
- $id, $dev, $layout, $size, $state),
- $OK, $nexus ];
+ my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s},
+ $id, $dev, $layout, $size, $ctrl, $state;
+ report('storage', $msg, $E_OK, $nexus);
}
}
return;
@@ -1210,11 +1575,9 @@
my $pred = undef; # battery's ability to be charged
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %bat_oid
= (
- '1.3.6.1.4.1.674.10893.1.20.130.15.1.1' => 'batteryNumber',
- '1.3.6.1.4.1.674.10893.1.20.130.15.1.2' => 'batteryName',
'1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState',
'1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus',
'1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID',
@@ -1250,6 +1613,7 @@
36 => 'Learning',
);
+ # Specifies the learn state activity of the battery
my %bat_learn_state
= (
1 => 'Failed',
@@ -1259,18 +1623,19 @@
16 => 'Idle',
);
+ # This property displays the battery's ability to be charged
my %bat_pred_cap
= (
- 1 => 'Failed',
- 2 => 'Ready',
- 4 => 'Unknown',
+ 1 => 'Failed', # The battery cannot be charged and needs to be replaced
+ 2 => 'Ready', # The battery can be charged to full capacity
+ 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the
+ # battery cannot be determined until the Learn cycle is complete
);
# Check battery on each of the controllers
BATTERY:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
- $id = $out->{batteryNumber} - 1;
+ if ($snmp) {
$state = $bat_state{$out->{batteryState}};
$status = $snmp_status{$out->{batteryComponentStatus}};
$learn = exists $out->{batteryLearnState}
@@ -1279,6 +1644,8 @@
? $bat_pred_cap{$out->{batteryPredictedCapacity}} : undef;
$ctrl = $out->{batteryConnectionControllerNumber} - 1;
$nexus = convert_nexus($out->{batteryNexusID});
+ $id = $nexus;
+ $id =~ s{\A \d+:(\d+) \z}{$1}xms;
}
else {
$id = $out->{'ID'};
@@ -1294,33 +1661,57 @@
# Special case: Charging
if ($state eq 'Charging') {
- push @report_storage, [ sprintf('Cache battery %d in controller %d is %s (%s) [probably harmless]',
- $id, $ctrl, $state, $pred),
- $WARNING, $nexus ];
+ if ($pred eq 'Failed') {
+ my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [replace battery]',
+ $id, $ctrl, $state, $pred;
+ report('storage', $msg, $E_CRITICAL, $nexus);
+ }
+ else {
+ next BATTERY if blacklisted('bat_charge', $nexus);
+ my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
+ $id, $ctrl, $state, $pred;
+ report('storage', $msg, $E_WARNING, $nexus);
+ }
}
- # Special case: Learning (whats this?)
+ # Special case: Learning (battery learns its capacity)
elsif ($state eq 'Learning') {
- push @report_storage, [ sprintf('Cache battery %d in controller %d is %s (%s) [probably harmless]',
- $id, $ctrl, $state, $learn),
- $WARNING, $nexus ];
+ if ($learn eq 'Failed') {
+ my $msg = sprintf 'Cache battery %d in controller %d is %s (%s)',
+ $id, $ctrl, $state, $learn;
+ report('storage', $msg, $E_CRITICAL, $nexus);
+ }
+ else {
+ next BATTERY if blacklisted('bat_charge', $nexus);
+ my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
+ $id, $ctrl, $state, $learn;
+ report('storage', $msg, $E_WARNING, $nexus);
+ }
}
- # Special case: Power Low (part of recharge cycle?)
+ # Special case: Power Low (first part of recharge cycle)
elsif ($state eq 'Power Low') {
- push @report_storage, [ sprintf('Cache battery %d in controller %d is %s [probably harmless]',
- $id, $ctrl, $state),
- $WARNING, $nexus ];
+ next BATTERY if blacklisted('bat_charge', $nexus);
+ my $msg = sprintf 'Cache battery %d in controller %d is %s [probably harmless]',
+ $id, $ctrl, $state;
+ report('storage', $msg, $E_WARNING, $nexus);
+ }
+ # Special case: Degraded and Non-Critical (usually part of recharge cycle)
+ elsif ($state eq 'Degraded' && $status eq 'Non-Critical') {
+ next BATTERY if blacklisted('bat_charge', $nexus);
+ my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
+ $id, $ctrl, $state, $status;
+ report('storage', $msg, $E_WARNING, $nexus);
}
# Default
elsif ($status ne 'Ok') {
- push @report_storage, [ sprintf('Cache battery %d in controller %d needs attention (%s / %s)',
- $id, $ctrl, $state, $status),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf 'Cache battery %d in controller %d needs attention: %s (%s)',
+ $id, $ctrl, $state, $status;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
# Ok
else {
- push @report_storage, [ sprintf('Cache battery %d in controller %d is %s',
- $id, $ctrl, $state),
- $OK, $nexus ];
+ my $msg = sprintf 'Cache battery %d in controller %d is %s',
+ $id, $ctrl, $state;
+ report('storage', $msg, $E_OK, $nexus);
}
}
return;
@@ -1342,7 +1733,7 @@
my $ctrl = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %conn_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber',
@@ -1355,9 +1746,9 @@
my $result = $snmp_session->get_entries(-columns => [keys %conn_oid]);
if (!defined $result) {
- printf "SNMP [storage / channel]: %s.\n", $snmp_session->error;
+ printf "SNMP ERROR [storage / channel]: %s.\n", $snmp_session->error;
$snmp_session->close;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
@output = @{ get_snmp_output($result, \%conn_oid) };
@@ -1393,7 +1784,7 @@
# Check connectors on each of the controllers
CHANNEL:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$id = $out->{channelNumber} - 1;
$name = $out->{channelName};
$state = $conn_state{$out->{channelState}};
@@ -1415,9 +1806,9 @@
next CHANNEL if blacklisted('conn', $nexus);
- push @report_storage, [ sprintf('%s (%s) on controller %d is %s',
- $name, $type, $ctrl, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf '%s [%s] on controller %d is %s',
+ $name, $type, $ctrl, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
return;
}
@@ -1433,9 +1824,10 @@
my $state = undef;
my $status = undef;
my $firmware = undef;
+ my $ctrl = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %encl_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber',
@@ -1472,7 +1864,7 @@
ENCLOSURE:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$id = $out->{'enclosureNumber'} - 1;
$name = $out->{'enclosureName'};
$state = $encl_state{$out->{'enclosureState'}};
@@ -1480,6 +1872,8 @@
$firmware = exists $out->{enclosureFirmwareVersion}
? $out->{enclosureFirmwareVersion} : 'N/A';
$nexus = convert_nexus($out->{enclosureNexusID});
+ $ctrl = $nexus;
+ $ctrl =~ s{\A (\d+):.* \z}{$1}xms;
}
else {
$id = $out->{ID};
@@ -1489,23 +1883,27 @@
$firmware = $out->{'Firmware Version'} ne 'Not Applicable'
? $out->{'Firmware Version'} : 'N/A';
$nexus = join q{:}, $out->{ctrl}, $id;
+ $ctrl = $out->{ctrl};
}
$name =~ s{\s+\z}{}xms; # remove trailing whitespace
$firmware =~ s{\s+\z}{}xms; # remove trailing whitespace
- $enclosure{$id} = $name;
+ # store enclosure data for future use
+ push @enclosures, { 'id' => $id,
+ 'ctrl' => $out->{ctrl},
+ 'name' => $name };
# Collecting some storage info
- $sysinfo{'enclosure'}{$id}{'id'} = $nexus;
- $sysinfo{'enclosure'}{$id}{'name'} = $name;
- $sysinfo{'enclosure'}{$id}{'firmware'} = $firmware;
+ $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus;
+ $sysinfo{'enclosure'}{$nexus}{'name'} = $name;
+ $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware;
next ENCLOSURE if blacklisted('encl', $nexus);
- push @report_storage, [ sprintf('Enclosure %s (%s) is %s',
- $id, $name, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf 'Enclosure %s [%s] on controller %d is %s',
+ $nexus, $name, $ctrl, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
return;
}
@@ -1527,7 +1925,7 @@
my $encl_name = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %fan_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber',
@@ -1548,12 +1946,11 @@
@output = @{ get_snmp_output($result, \%fan_oid) };
}
else {
- foreach my $c (@controllers) {
- foreach my $e (keys %enclosure) {
- push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=fans") };
- map_item('ctrl', $c, \@output);
- map_item('encl_id', $e, \@output);
- }
+ foreach my $enc (@enclosures) {
+ push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") };
+ map_item('ctrl', $enc->{ctrl}, \@output);
+ map_item('encl_id', $enc->{id}, \@output);
+ map_item('encl_name', $enc->{name}, \@output);
}
}
@@ -1571,7 +1968,7 @@
# Check fans on each of the enclosures
FAN:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$id = $out->{fanNumber} - 1;
$name = $out->{fanName};
$state = $fan_state{$out->{fanState}};
@@ -1587,24 +1984,24 @@
$state = $out->{'State'};
$status = $out->{'Status'};
$speed = $out->{'Speed'};
- $encl_id = $out->{'encl_id'};
- $encl_name = $enclosure{$encl_id};
- $nexus = join q{:}, $out->{ctrl}, $encl_id, $id;
+ $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
+ $encl_name = $out->{encl_name};
+ $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
}
next FAN if blacklisted('encl_fan', $nexus);
# Default
if ($status ne 'Ok') {
- push @report_storage, [ sprintf('%s in enclosure %s (%s) needs attention (%s)',
- $name, $encl_id, $encl_name, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
+ $name, $encl_id, $encl_name, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
# Ok
else {
- push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s (speed=%s)',
- $name, $encl_id, $encl_name, $state, $speed),
- $OK, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s] is %s (speed=%s)',
+ $name, $encl_id, $encl_name, $state, $speed;
+ report('storage', $msg, $E_OK, $nexus);
}
}
return;
@@ -1626,7 +2023,7 @@
my $encl_name = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %ps_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber',
@@ -1645,12 +2042,11 @@
@output = @{ get_snmp_output($result, \%ps_oid) };
}
else {
- foreach my $c (@controllers) {
- foreach my $e (keys %enclosure) {
- push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=pwrsupplies") };
- map_item('ctrl', $c, \@output);
- map_item('encl_id', $e, \@output);
- }
+ foreach my $enc (@enclosures) {
+ push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") };
+ map_item('ctrl', $enc->{ctrl}, \@output);
+ map_item('encl_id', $enc->{id}, \@output);
+ map_item('encl_name', $enc->{name}, \@output);
}
}
@@ -1668,7 +2064,7 @@
# Check power supplies on each of the enclosures
PS:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$id = $out->{powerSupplyNumber};
$name = $out->{powerSupplyName};
$state = $ps_state{$out->{powerSupplyState}};
@@ -1682,24 +2078,24 @@
$name = $out->{'Name'};
$state = $out->{'State'};
$status = $out->{'Status'};
- $encl_id = $out->{'encl_id'};
- $encl_name = $enclosure{$encl_id};
- $nexus = join q{:}, $out->{ctrl}, $encl_id, $id;
+ $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
+ $encl_name = $out->{encl_name};
+ $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
}
next PS if blacklisted('encl_ps', $nexus);
# Default
if ($status ne 'Ok') {
- push @report_storage, [ sprintf('%s in enclosure %s (%s) needs attention (%s)',
- $name, $encl_id, $encl_name, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
+ $name, $encl_id, $encl_name, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
# Ok
else {
- push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s',
- $name, $encl_id, $encl_name, $state),
- $OK, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s] is %s',
+ $name, $encl_id, $encl_name, $state;
+ report('storage', $msg, $E_OK, $nexus);
}
}
return;
@@ -1725,7 +2121,7 @@
my $encl_name = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %temp_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber',
@@ -1748,12 +2144,11 @@
@output = @{ get_snmp_output($result, \%temp_oid) };
}
else {
- foreach my $c (@controllers) {
- foreach my $e (keys %enclosure) {
- push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=temps") };
- map_item('ctrl', $c, \@output);
- map_item('encl_id', $e, \@output);
- }
+ foreach my $enc (@enclosures) {
+ push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") };
+ map_item('ctrl', $enc->{ctrl}, \@output);
+ map_item('encl_id', $enc->{id}, \@output);
+ map_item('encl_name', $enc->{name}, \@output);
}
}
@@ -1771,7 +2166,7 @@
# Check temperature probes on each of the enclosures
TEMP:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$id = $out->{temperatureProbeNumber} - 1;
$name = $out->{temperatureProbeName};
$state = $temp_state{$out->{temperatureProbeState}};
@@ -1793,26 +2188,32 @@
$reading = $out->{'Reading'}; $reading =~ s{\s*C}{}xms;
$max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\s*C}{}xms;
$max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\s*C}{}xms;
- $encl_id = $out->{'encl_id'};
- $encl_name = $enclosure{$encl_id};
- $nexus = join q{:}, $out->{ctrl}, $encl_id, $id;
+ $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
+ $encl_name = $out->{encl_name};
+ $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
}
next TEMP if blacklisted('encl_temp', $nexus);
# Default
if ($status ne 'Ok') {
- push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s at %s (%s max)',
- $name, $encl_id, $encl_name,
- $state, $reading, $max_crit),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s] is %s C at %s (%s max)',
+ $name, $encl_id, $encl_name, $state, $reading, $max_crit;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
# Ok
else {
- push @report_storage, [ sprintf('%s in enclosure %s (%s): %s (%s max)',
- $name, $encl_id, $encl_name,
- $reading, $max_crit),
- $OK, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s]: %s C (%s max)',
+ $name, $encl_id, $encl_name, $reading, $max_crit;
+ report('storage', $msg, $E_OK, $nexus);
+ }
+
+ # Collect performance data
+ if (defined $opt{perfdata}) {
+ $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms;
+ my $pkey = "enclosure_${encl_id}_${name}";
+ my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
+ $perfdata{$pkey} = $pval;
}
}
return;
@@ -1834,7 +2235,7 @@
my $encl_name = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %emms_oid
= (
'1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber',
@@ -1853,12 +2254,11 @@
@output = @{ get_snmp_output($result, \%emms_oid) };
}
else {
- foreach my $c (@controllers) {
- foreach my $e (keys %enclosure) {
- push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=emms") };
- map_item('ctrl', $c, \@output);
- map_item('encl_id', $e, \@output);
- }
+ foreach my $enc (@enclosures) {
+ push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") };
+ map_item('ctrl', $enc->{ctrl}, \@output);
+ map_item('encl_id', $enc->{id}, \@output);
+ map_item('encl_name', $enc->{name}, \@output);
}
}
@@ -1877,7 +2277,7 @@
# Check temperature probes on each of the enclosures
EMM:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$id = $out->{enclosureManagementModuleNumber} - 1;
$name = $out->{enclosureManagementModuleName};
$state = $emms_state{$out->{enclosureManagementModuleState}};
@@ -1891,24 +2291,24 @@
$name = $out->{'Name'};
$state = $out->{'State'};
$status = $out->{'Status'};
- $encl_id = $out->{'encl_id'};
- $encl_name = $enclosure{$encl_id};
- $nexus = join q{:}, $out->{ctrl}, $encl_id, $id;
+ $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
+ $encl_name = $out->{encl_name};
+ $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
}
next EMM if blacklisted('encl_emm', $nexus);
# Default
if ($status ne 'Ok') {
- push @report_storage, [ sprintf('%s in enclosure %s (%s) needs attention (%s)',
- $name, $encl_id, $encl_name, $state),
- $status2nagios{$status}, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
+ $name, $encl_id, $encl_name, $state;
+ report('storage', $msg, $status2nagios{$status}, $nexus);
}
# Ok
else {
- push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s',
- $name, $encl_id, $encl_name, $state),
- $OK, $nexus ];
+ my $msg = sprintf '%s in enclosure %s [%s] is %s',
+ $name, $encl_id, $encl_name, $state;
+ report('storage', $msg, $E_OK, $nexus);
}
}
return;
@@ -1923,22 +2323,25 @@
my $status = undef;
my $location = undef;
my $size = undef;
- my @output = ();
+ my $modes = undef;
+ my @failures = ();
+ my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %dimm_oid
= (
'1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex',
'1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus',
'1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName',
'1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize',
+ '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes',
);
my $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]);
if (!defined $result) {
- printf "SNMP [memory]: %s.\n", $snmp_session->error;
+ printf "SNMP ERROR [memory]: %s.\n", $snmp_session->error;
$snmp_session->close;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
@output = @{ get_snmp_output($result, \%dimm_oid) };
@@ -1947,15 +2350,31 @@
@output = @{ run_omreport("$omopt_chassis memory") };
}
- my $count_dimms = 0;
+ # Note: These values are bit masks, so combination values are
+ # possible. If value is 0 (zero), memory device has no faults.
+ my %failure_mode
+ = (
+ 1 => 'ECC single bit correction warning rate exceeded',
+ 2 => 'ECC single bit correction failure rate exceeded',
+ 4 => 'ECC multibit fault encountered',
+ 8 => 'ECC single bit correction logging disabled',
+ 16 => 'device disabled because of spare activation',
+ );
DIMM:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ @failures = (); # Initialize
+ if ($snmp) {
$index = $out->{memoryDeviceIndex};
$status = $snmp_status{$out->{memoryDeviceStatus}};
$location = $out->{memoryDeviceLocationName};
$size = sprintf '%d MB', $out->{memoryDeviceSize}/1024;
+ $modes = $out->{memoryDeviceFailureModes};
+ if ($modes > 0) {
+ foreach my $mask (sort keys %failure_mode) {
+ if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; }
+ }
+ }
}
else {
$index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'};
@@ -1965,6 +2384,15 @@
if (defined $size) {
$size =~ s{\s\s}{ }gxms;
}
+ # Run 'omreport chassis memory index=X' to get the failures
+ if ($status ne 'Ok' && defined $index) {
+ foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) {
+ if (m/\A Failures; (.+?) \z/xms) {
+ chop(my $fail = $1);
+ push @failures, split m{\.}xms, $fail;
+ }
+ }
+ }
}
$location =~ s{\A \s*(.*?)\s* \z}{$1}xms;
@@ -1972,21 +2400,29 @@
# Ignore empty memory slots
next DIMM if !defined $index;
- $count_dimms++;
+ $count{dimm}++;
if ($status ne 'Ok') {
- push @report_chassis, [ sprintf('Memory module %d (%s - %s) needs attention (%s)',
- $index, $location, $size, $status),
- $status2nagios{$status}, $index ];
+ my $msg = undef;
+ if (scalar @failures == 0) {
+ $msg = sprintf 'Memory module %d [%s, %s] needs attention (%s)',
+ $index, $location, $size, $status;
+ }
+ else {
+ $msg = sprintf 'Memory module %d [%s, %s] needs attention: %s',
+ $index, $location, $size, (join q{, }, @failures);
+ }
+
+ report('chassis', $msg, $status2nagios{$status}, $index);
}
# Ok
else {
- push @report_chassis, [ sprintf('Memory module %d (%s - %s) is %s',
- $index, $location, $size, $status),
- $OK, $index ];
+ my $msg = sprintf 'Memory module %d [%s, %s] is %s',
+ $index, $location, $size, $status;
+ report('chassis', $msg, $E_OK, $index);
}
}
- return $count_dimms;
+ return;
}
@@ -2002,7 +2438,7 @@
my $max_warn = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %cool_oid
= (
'1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex',
@@ -2018,9 +2454,9 @@
return 0;
}
elsif (!$blade && !defined $result) {
- printf "SNMP [cooling]: %s.\n", $snmp_session->error;
+ printf "SNMP ERROR [cooling]: %s.\n", $snmp_session->error;
$snmp_session->close;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
@output = @{ get_snmp_output($result, \%cool_oid) };
@@ -2029,13 +2465,11 @@
@output = @{ run_omreport("$omopt_chassis fans") };
}
- my $count_fans = 0;
-
FAN:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$index = $out->{coolingDeviceIndex};
- $status = $snmp_status{$out->{coolingDeviceStatus}};
+ $status = $snmp_probestatus{$out->{coolingDeviceStatus}};
$reading = $out->{coolingDeviceReading};
$location = $out->{coolingDeviceLocationName};
$max_crit = exists $out->{coolingDeviceUpperCriticalThreshold}
@@ -2058,21 +2492,22 @@
}
next FAN if blacklisted('fan', $index);
- $count_fans++;
+ $count{fan}++;
if ($status ne 'Ok') {
- push @report_chassis, [ sprintf('Chassis fan %d (%s) needs attention (%s)',
- $index, $location, $status),
- $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Chassis fan %d [%s] needs attention: %s',
+ $index, $location, $status;
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
else {
- push @report_chassis, [ sprintf('Chassis fan %d (%s): %s',
- $index, $location, $reading),
- $OK, $index ];
+ my $msg = sprintf 'Chassis fan %d [%s]: %s',
+ $index, $location, $reading;
+ report('chassis', $msg, $E_OK, $index);
}
# Collect performance data
- if (defined $opt{'perfdata'}) {
+ if (defined $opt{perfdata}) {
my $pname = lc $location;
$pname =~ s{\s}{_}gxms;
$pname =~ s{proc_}{cpu#}xms;
@@ -2081,7 +2516,7 @@
$perfdata{$pkey} = $pval;
}
}
- return $count_fans;
+ return;
}
@@ -2097,7 +2532,7 @@
my @states = ();
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %ps_oid
= (
'1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex',
@@ -2150,12 +2585,10 @@
3 => 'Processor missing',
);
- my $count_psus = 0;
-
PS:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
- my @states = (); # contains states for the PS
+ if ($snmp) {
+ @states = (); # contains states for the PS
$index = $out->{powerSupplyIndex} - 1;
$status = $snmp_status{$out->{powerSupplyStatus}};
@@ -2164,7 +2597,7 @@
? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef;
# get the combined state from the StatusReading OID
- foreach my $mask (keys %ps_state) {
+ foreach my $mask (sort keys %ps_state) {
if (($out->{powerSupplySensorState} & $mask) != 0) {
push @states, $ps_state{$mask};
}
@@ -2186,13 +2619,20 @@
}
next PS if blacklisted('ps', $index);
- $count_psus++;
+ $count{power}++;
- push @report_chassis, [ sprintf('Power Supply %d (%s): %s',
- $index, $type, $state),
- $status2nagios{$status}, $index ];
+ if ($status ne 'Ok') {
+ my $msg = sprintf 'Power Supply %d [%s] needs attention: %s',
+ $index, $type, $state;
+ report('chassis', $msg, $status2nagios{$status}, $index);
+ }
+ else {
+ my $msg = sprintf 'Power Supply %d [%s]: %s',
+ $index, $type, $state;
+ report('chassis', $msg, $E_OK, $index);
+ }
}
- return $count_psus;
+ return;
}
@@ -2216,7 +2656,7 @@
my %warn_threshold = %{ custom_temperature_thresholds('w') };
my %crit_threshold = %{ custom_temperature_thresholds('c') };
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %temp_oid
= (
'1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex',
@@ -2238,9 +2678,9 @@
my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable);
if (!defined $result) {
- printf "SNMP [temperatures]: %s.\n", $snmp_session->error;
+ printf "SNMP ERROR [temperatures]: %s.\n", $snmp_session->error;
$snmp_session->close;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
@output = @{ get_snmp_output($result, \%temp_oid) };
@@ -2251,17 +2691,15 @@
my %probe_type
= (
- 1 => 'Other', # -- type is other than following values
- 2 => 'Unknown', # -- type is unknown
- 3 => 'AmbientESM', # -- type is Ambient Embedded Systems Management temperature probe
- 16 => 'Discrete', #- - type is temperature probe with discrete reading
+ 1 => 'Other', # type is other than following values
+ 2 => 'Unknown', # type is unknown
+ 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe
+ 16 => 'Discrete', # type is temperature probe with discrete reading
);
- my $count_temps = 0;
-
TEMP:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$index = $out->{temperatureProbeIndex} - 1;
$status = $snmp_probestatus{$out->{temperatureProbeStatus}};
$reading = $out->{temperatureProbeReading} / 10;
@@ -2290,69 +2728,81 @@
}
next TEMP if blacklisted('temp', $index);
- $count_temps++;
+ $count{temp}++;
if ($type eq 'Discrete') {
- push @report_chassis, [ sprintf('Temperature probe %d (%s): is %s',
- $index, $location, $discrete),
- $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Temperature probe %d (%s): is %s',
+ $index, $location, $discrete;
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
else {
# First check according to custom thresholds
if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) {
# Custom critical MAX
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom max=%d)',
- $index, $location, $reading, $crit_threshold{$index}{max}),
- $CRITICAL, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
+ $index, $location, $reading, $crit_threshold{$index}{max};
+ report('chassis', $msg, $E_CRITICAL, $index);
}
elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) {
# Custom warning MAX
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom max=%d)',
- $index, $location, $reading, $warn_threshold{$index}{max}),
- $WARNING, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
+ $index, $location, $reading, $warn_threshold{$index}{max};
+ report('chassis', $msg, $E_WARNING, $index);
}
elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) {
# Custom critical MIN
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom min=%d)',
- $index, $location, $reading, $crit_threshold{$index}{min}),
- $CRITICAL, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
+ $index, $location, $reading, $crit_threshold{$index}{min};
+ report('chassis', $msg, $E_CRITICAL, $index);
}
elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) {
# Custom warning MIN
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom min=%d)',
- $index, $location, $reading, $warn_threshold{$index}{min}),
- $WARNING, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
+ $index, $location, $reading, $warn_threshold{$index}{min};
+ report('chassis', $msg, $E_WARNING, $index);
}
elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) {
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) is critically high at %d C',
- $index, $location, $reading),
- $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] is critically high at %d C',
+ $index, $location, $reading;
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) {
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) is too high at %d C',
- $index, $location, $reading),
- $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] is too high at %d C',
+ $index, $location, $reading;
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) {
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) is critically low at %d C',
- $index, $location, $reading),
- $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] is critically low at %d C',
+ $index, $location, $reading;
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) {
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) is too low at %d C',
- $index, $location, $reading),
- $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] is too low at %d C',
+ $index, $location, $reading;
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
# Ok
else {
- push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (min=%s/%s, max=%s/%s)',
- $index, $location, $reading,
- $min_warn, $min_crit, $max_warn, $max_crit),
- $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Temperature Probe %d [%s] reads %d C',
+ $index, $location, $reading;
+ if ($min_warn eq '[N/A]' and $min_crit eq '[N/A]') {
+ $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit;
+ }
+ else {
+ $msg .= sprintf ' (min=%s/%s, max=%s/%s)',
+ $min_warn, $min_crit, $max_warn, $max_crit;
+ }
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
# Collect performance data
- if (defined $opt{'perfdata'}) {
+ if (defined $opt{perfdata}) {
my $pname = lc $location;
$pname =~ s{\s}{_}gxms;
$pname =~ s{_temp\z}{}xms;
@@ -2363,7 +2813,7 @@
}
}
}
- return $count_temps;
+ return;
}
@@ -2374,49 +2824,41 @@
my $index = undef;
my $status = undef;
my $state = undef;
- my $oid_ver = 'new';
- my @output = ();
+ my $brand = undef;
+ my $family = undef;
+ my $man = undef;
+ my $speed = undef;
+ my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
# NOTE: For some reason, older models don't have the
- # "Processor Device Status" OIDs. We first check the newer
- # (preferred) OIDs, and if that doesn't work, check the "old"
- # OIDs.
-
- my %cpu_oid_new # for newer models
- = (
- '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex',
- '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus',
- '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading',
- );
+ # "Processor Device Status" OIDs. We check both the newer
+ # (preferred) OIDs and the old ones.
- my %cpu_oid_old # for older models
+ my %cpu_oid
= (
- '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex',
- '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus',
- '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState',
+ '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex',
+ '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus',
+ '1.3.6.1.4.1.674.10892.1.1100.30.1.8.1' => 'processorDeviceManufacturerName',
+ '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState',
+ '1.3.6.1.4.1.674.10892.1.1100.30.1.10.1' => 'processorDeviceFamily',
+ '1.3.6.1.4.1.674.10892.1.1100.30.1.12.1' => 'processorDeviceCurrentSpeed',
+ '1.3.6.1.4.1.674.10892.1.1100.30.1.23.1' => 'processorDeviceBrandName',
+ '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex',
+ '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus',
+ '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading',
);
- my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_new]);
-
- if (!defined $result) {
- $oid_ver = 'old';
- $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_old]);
- }
+ my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid]);
if (!defined $result) {
- printf "SNMP [processors]: %s.\n", $snmp_session->error;
+ printf "SNMP ERROR [processors]: %s.\n", $snmp_session->error;
$snmp_session->close;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
- if ($oid_ver eq 'new') {
- @output = @{ get_snmp_output($result, \%cpu_oid_new) };
- }
- else {
- @output = @{ get_snmp_output($result, \%cpu_oid_old) };
- }
+ @output = @{ get_snmp_output($result, \%cpu_oid) };
}
else {
@output = @{ run_omreport("$omopt_chassis processors") };
@@ -2424,74 +2866,113 @@
my %cpu_state
= (
- 1 => 'Other', # -- other than following values
- 2 => 'Unknown', # -- unknown
- 3 => 'Enabled', # -- enabled
- 4 => 'User Disabled', # -- disabled by user via BIOS setup
- 5 => 'BIOS Disabled', # -- disabled by BIOS (POST error)
- 6 => 'Idle', # -- idle
+ 1 => 'Other', # other than following values
+ 2 => 'Unknown', # unknown
+ 3 => 'Enabled', # enabled
+ 4 => 'User Disabled', # disabled by user via BIOS setup
+ 5 => 'BIOS Disabled', # disabled by BIOS (POST error)
+ 6 => 'Idle', # idle
);
my %cpu_reading
= (
- 1 => 'Internal Error', # -- Internal Error
- 2 => 'Thermal Trip', # -- Thermal Trip
- 32 => 'Configuration Error', # -- Configuration Error
- 128 => 'Present', # -- Processor Present
- 256 => 'Disabled', # -- Processor Disabled
- 512 => 'Terminator Present', # -- Terminator Present
- 1024 => 'Throttled', # -- Processor Throttled
+ 1 => 'Internal Error', # Internal Error
+ 2 => 'Thermal Trip', # Thermal Trip
+ 32 => 'Configuration Error', # Configuration Error
+ 128 => 'Present', # Processor Present
+ 256 => 'Disabled', # Processor Disabled
+ 512 => 'Terminator Present', # Terminator Present
+ 1024 => 'Throttled', # Processor Throttled
);
-# my %cpu_reading_reverse
-# = (
-# 'Present' => 128,
-# 'Throttled' => 1024,
-# );
-
- my $count_cpus = 0;
+ # Mapping between family numbers from SNMP and actual CPU family
+ my %cpu_family
+ = (
+ 1 => 'Other', 2 => 'Unknown', 3 => '8086',
+ 4 => '80286', 5 => '386', 6 => '486',
+ 7 => '8087', 8 => '80287', 9 => '80387',
+ 10 => '80487', 11 => 'Pentium', 12 => 'Pentium Pro',
+ 13 => 'Pentium II', 14 => 'Pentium with MMX', 15 => 'Celeron',
+ 16 => 'Pentium II Xeon', 17 => 'Pentium III', 18 => 'Pentium III Xeon',
+ 19 => 'Pentium III', 20 => 'Itanium', 21 => 'Xeon',
+ 22 => 'Pentium 4', 23 => 'Xeon MP', 24 => 'Itanium 2',
+ 25 => 'K5', 26 => 'K6', 27 => 'K6-2',
+ 28 => 'K6-3', 29 => 'Athlon', 30 => 'AMD2900',
+ 31 => 'K6-2+', 32 => 'Power PC', 33 => 'Power PC 601',
+ 34 => 'Power PC 603', 35 => 'Power PC 603+', 36 => 'Power PC 604',
+ 37 => 'Power PC 620', 38 => 'Power PC x704', 39 => 'Power PC 750',
+ 48 => 'Alpha', 49 => 'Alpha 21064', 50 => 'Alpha 21066',
+ 51 => 'Alpha 21164', 52 => 'Alpha 21164PC', 53 => 'Alpha 21164a',
+ 54 => 'Alpha 21264', 55 => 'Alpha 21364', 64 => 'MIPS',
+ 65 => 'MIPS R4000', 66 => 'MIPS R4200', 67 => 'MIPS R4400',
+ 68 => 'MIPS R4600', 69 => 'MIPS R10000', 80 => 'SPARC',
+ 81 => 'SuperSPARC', 82 => 'microSPARC II', 83 => 'microSPARC IIep',
+ 84 => 'UltraSPARC', 85 => 'UltraSPARC II', 86 => 'UltraSPARC IIi',
+ 87 => 'UltraSPARC III', 88 => 'UltraSPARC IIIi', 96 => '68040',
+ 97 => '68xxx', 98 => '68000', 99 => '68010',
+ 100 => '68020', 101 => '68030', 112 => 'Hobbit',
+ 120 => 'Crusoe TM5000', 121 => 'Crusoe TM3000', 122 => 'Efficeon TM8000',
+ 128 => 'Weitek', 131 => 'Athlon 64', 132 => 'Opteron',
+ 133 => 'Sempron', 134 => 'Turion 64 Mobile', 135 => 'Dual-Core Opteron',
+ 136 => 'Athlon 64 X2 DC', 137 => 'Turion 64 X2 M', 138 => 'Quad-Core Opteron',
+ 139 => '3rd gen Opteron', 144 => 'PA-RISC', 145 => 'PA-RISC 8500',
+ 146 => 'PA-RISC 8000', 147 => 'PA-RISC 7300LC', 148 => 'PA-RISC 7200',
+ 149 => 'PA-RISC 7100LC', 150 => 'PA-RISC 7100', 160 => 'V30',
+ 171 => 'Dual-Core Xeon 5200', 172 => 'Dual-Core Xeon 7200', 173 => 'Quad-Core Xeon 7300',
+ 174 => 'Quad-Core Xeon 7400', 175 => 'Multi-Core Xeon 7400', 176 => 'M1',
+ 177 => 'M2', 180 => 'AS400', 182 => 'Athlon XP',
+ 183 => 'Athlon MP', 184 => 'Duron', 185 => 'Pentium M',
+ 186 => 'Celeron D', 187 => 'Pentium D', 188 => 'Pentium Extreme',
+ 189 => 'Core Solo', 190 => 'Core2', 191 => 'Core2 Duo',
+ 198 => 'Core i7', 199 => 'Dual-Core Celeron', 200 => 'IBM390',
+ 201 => 'G4', 202 => 'G5', 203 => 'ESA/390 G6',
+ 204 => 'z/Architectur', 210 => 'C7-M', 211 => 'C7-D',
+ 212 => 'C7', 213 => 'Eden', 214 => 'Multi-Core Xeon',
+ 215 => 'Dual-Core Xeon 3xxx', 216 => 'Quad-Core Xeon 3xxx', 218 => 'Dual-Core Xeon 5xxx',
+ 219 => 'Quad-Core Xeon 5xxx', 221 => 'Dual-Core Xeon 7xxx', 222 => 'Quad-Core Xeon 7xxx',
+ 223 => 'Multi-Core Xeon 7xxx', 250 => 'i860', 251 => 'i960',
+ );
CPU:
foreach my $out (@output) {
-# my $throttled = 0; # flags if the CPU is throttled
-
- if ($opt{'snmp'}) {
- if ($oid_ver eq 'new') {
+ if ($snmp) {
+ $index = exists $out->{processorDeviceStatusIndex}
+ ? $out->{processorDeviceStatusIndex} - 1
+ : $out->{processorDeviceIndex} - 1;
+ $status = exists $out->{processorDeviceStatusStatus}
+ ? $snmp_status{$out->{processorDeviceStatusStatus}}
+ : $snmp_status{$out->{processorDeviceStatus}};
+ if (exists $out->{processorDeviceStatusReading}) {
my @states = (); # contains states for the CPU
- $index = $out->{processorDeviceStatusIndex} - 1;
- $status = $snmp_status{$out->{processorDeviceStatusStatus}};
# get the combined state from the StatusReading OID
- foreach my $mask (keys %cpu_reading) {
+ foreach my $mask (sort keys %cpu_reading) {
if (($out->{processorDeviceStatusReading} & $mask) != 0) {
push @states, $cpu_reading{$mask};
}
}
# Finally, create the state string
- $state = join q{,}, @states;
-
- # flag the CPU as throttled
-# if ($out->{processorDeviceStatusReading} == ($cpu_reading_reverse{Present}
-# + $cpu_reading_reverse{Throttled})) {
-# $throttled = 1;
-# }
+ $state = join q{, }, @states;
}
else {
- $index = $out->{processorDeviceIndex} - 1;
- $status = $snmp_status{$out->{processorDeviceStatus}};
$state = $cpu_state{$out->{processorDeviceStatusState}};
}
+ $man = $out->{processorDeviceManufacturerName};
+ $family = (exists $out->{processorDeviceFamily}
+ and exists $cpu_family{$out->{processorDeviceFamily}})
+ ? $cpu_family{$out->{processorDeviceFamily}} : undef;
+ $speed = $out->{processorDeviceCurrentSpeed};
+ $brand = $out->{processorDeviceBrandName};
}
else {
$index = $out->{'Index'};
$status = $out->{'Status'};
$state = $out->{'State'};
-
- # flag the CPU as throttled
-# if (defined $state and $state eq 'CPU Throttled') {
-# $throttled = 1;
-# }
+ $brand = exists $out->{'Processor Brand'} ? $out->{'Processor Brand'} : undef;
+ $family = exists $out->{'Processor Family'} ? $out->{'Processor Family'} : undef;
+ $man = exists $out->{'Processor Manufacturer'} ? $out->{'Processor Manufacturer'} : undef;
+ $speed = exists $out->{'Current Speed'} ? $out->{'Current Speed'} : undef;
}
next CPU if blacklisted('cpu', $index);
@@ -2502,33 +2983,41 @@
or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]');
# Ignore unoccupied CPU slots (snmp)
- if ($opt{snmp} and exists $out->{processorDeviceStatusReading}
+ if ($snmp and exists $out->{processorDeviceStatusReading}
and $out->{processorDeviceStatusReading} == 0) {
next CPU;
}
- $count_cpus++;
+ $count{cpu}++;
+
+ if (defined $brand) {
+ $brand =~ s{\s\s+}{ }gxms;
+ $brand =~ s{\((R|tm)\)}{}gxms;
+ $brand =~ s{\s(CPU|Processor)}{}xms;
+ $brand =~ s{\s\@}{}xms;
+ }
+ elsif (defined $family and defined $man and defined $speed) {
+ $speed =~ s{\A (\d+) .*}{$1}xms;
+ $brand = sprintf '%s %s %.2fGHz', $man, $family, $speed / 1000;
+ }
+ else {
+ $brand = "unknown";
+ }
- # Special case: Ignore non-critical warning about throttled
- # CPUs. Ondemand CPU throttling is normal behaviour in most
- # Linux distros and makes perfect sense in a server
-# if ($status eq 'Non-Critical' and $throttled) {
-# push @report_chassis, [ sprintf('CPU %d is %s', $index, $state),
-# $OK, $index ];
-# }
# Default
-# elsif ($status ne 'Ok') {
if ($status ne 'Ok') {
- push @report_chassis, [ sprintf('CPU %d needs attention (%s)', $index, $state),
- $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Processor %d [%s] needs attention: %s',
+ $index, $brand, $state;
+ report('chassis', $msg, $status2nagios{$status}, $index);
}
# Ok
else {
- push @report_chassis, [ sprintf('CPU %d is %s', $index, $state),
- $OK, $index ];
+ my $msg = sprintf 'Processor %d [%s] is %s',
+ $index, $brand, $state;
+ report('chassis', $msg, $E_OK, $index);
}
}
- return $count_cpus;
+ return;
}
@@ -2542,7 +3031,7 @@
my $location = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %volt_oid
= (
'1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex',
@@ -2554,12 +3043,11 @@
my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1';
my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable);
- #my $result = $snmp_session->get_entries(-columns => [keys %volt_oid]);
if (!defined $result) {
- printf "SNMP [voltage probes]: %s.\n", $snmp_session->error;
+ printf "SNMP ERROR [voltage]: %s.\n", $snmp_session->error;
$snmp_session->close;
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
@output = @{ get_snmp_output($result, \%volt_oid) };
@@ -2574,13 +3062,11 @@
2 => 'Bad',
);
- my $count_volts = 0;
-
VOLT:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$index = $out->{voltageProbeIndex} - 1;
- $status = $snmp_status{$out->{voltageProbeStatus}};
+ $status = $snmp_probestatus{$out->{voltageProbeStatus}};
$reading = exists $out->{voltageProbeReading}
? sprintf('%.3f V', $out->{voltageProbeReading}/1000)
: $volt_discrete_reading{$out->{voltageProbeDiscreteReading}};
@@ -2594,13 +3080,14 @@
}
next VOLT if blacklisted('volt', $index);
- $count_volts++;
+ $count{volt}++;
- push @report_chassis, [ sprintf('Voltage sensor %d (%s) is %s',
- $index, $location, $reading),
- $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Voltage sensor %d [%s] is %s',
+ $index, $location, $reading;
+ my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
+ report('chassis', $msg, $err, $index);
}
- return $count_volts;
+ return;
}
@@ -2614,7 +3101,7 @@
my $location = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %bat_oid
= (
'1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex',
@@ -2640,11 +3127,9 @@
4 => 'Presence Detected',
);
- my $count_bats = 0;
-
BATTERY:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$index = $out->{batteryIndex} - 1;
$status = $snmp_status{$out->{batteryStatus}};
$reading = $bat_reading{$out->{batteryReading}};
@@ -2658,13 +3143,13 @@
}
next BATTERY if blacklisted('bp', $index);
- $count_bats++;
+ $count{bat}++;
- push @report_chassis, [ sprintf('Battery probe %d (%s) is %s',
- $index, $location, $reading),
- $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Battery probe %d [%s] is %s',
+ $index, $location, $reading;
+ report('chassis', $msg, $status2nagios{$status}, $index);
}
- return $count_bats;
+ return;
}
@@ -2681,7 +3166,7 @@
my $unit = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %amp_oid
= (
'1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex',
@@ -2743,11 +3228,9 @@
'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit
);
- my $count_pwr = 0;
-
AMP:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$index = $out->{amperageProbeIndex} - 1;
$status = $snmp_status{$out->{amperageProbeStatus}};
$reading = $amp_type{$out->{amperageProbeType}} eq 'amperageProbeTypeIsDiscrete'
@@ -2769,7 +3252,7 @@
}
else {
$index = $out->{'Index'};
- next if $index !~ m/^\d+$/x;
+ next AMP if (!defined $index || $index !~ m/^\d+$/x);
$status = $out->{'Status'};
$reading = $out->{'Reading'};
$location = $out->{'Probe Name'};
@@ -2783,16 +3266,16 @@
$max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
}
- next AMP if blacklisted('pm', $index);
+ next AMP if blacklisted('amp', $index);
next AMP if $index !~ m{\A \d+ \z}xms;
- $count_pwr++;
+ $count{amp}++;
- push @report_chassis, [ sprintf('Amperage probe %d (%s) reads %s %s',
- $index, $location, $reading, $unit, $status),
- $status2nagios{$status}, $index ];
+ my $msg = sprintf 'Amperage probe %d [%s] reads %s %s',
+ $index, $location, $reading, $unit, $status;
+ report('chassis', $msg, $status2nagios{$status}, $index);
# Collect performance data
- if (defined $opt{'perfdata'}) {
+ if (defined $opt{perfdata}) {
next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number)
my $pname = lc $location;
$pname =~ s{\s}{_}gxms;
@@ -2801,7 +3284,48 @@
$perfdata{$pkey} = $pval;
}
}
- return $count_pwr;
+
+ # Collect EXTRA performance data not found at first run. This is a
+ # rather ugly hack
+ if (defined $opt{perfdata} && !$snmp) {
+ my $found = 0;
+ my $index = 0;
+ my %used = ();
+
+ # find used indexes
+ foreach (keys %perfdata) {
+ if (m/\A pwr_mon_(\d+)/xms) {
+ $used{$1} = 1;
+ }
+ }
+
+ AMP2:
+ foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) {
+ chop $line;
+ if ($line eq 'Location;Reading') {
+ $found = 1;
+ next AMP2;
+ }
+ if ($line eq q{}) {
+ $found = 0;
+ next AMP2;
+ }
+ if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s ([AW]) \z/xms) {
+ my $aname = lc $1;
+ my $aval = $2;
+ my $aunit = $3;
+ $aname =~ s{\s}{_}gxms;
+
+ # don't use an existing index
+ while (exists $used{$index}) { ++$index; }
+
+ $perfdata{"pwr_mon_${index}_${aname}"} = "$aval$aunit;0;0";
+ ++$index;
+ }
+ }
+ }
+
+ return;
}
@@ -2814,7 +3338,7 @@
my $reading = undef;
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %int_oid
= (
'1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex',
@@ -2840,11 +3364,9 @@
4 => 'Breach Sensor Failure', # intrusion sensor has failed
);
- my $count_intr = 0;
-
INTRUSION:
foreach my $out (@output) {
- if ($opt{'snmp'}) {
+ if ($snmp) {
$index = $out->{intrusionIndex} - 1;
$status = $snmp_status{$out->{intrusionStatus}};
$reading = $int_reading{$out->{intrusionReading}};
@@ -2856,21 +3378,21 @@
}
next INTRUSION if blacklisted('intr', $index);
- $count_intr++;
+ $count{intr}++;
if ($status ne 'Ok') {
- push @report_chassis, [ sprintf('Chassis intrusion %d detected: %s',
- $index, $reading),
- $WARNING, $index ];
+ my $msg = sprintf 'Chassis intrusion %d detected: %s',
+ $index, $reading;
+ report('chassis', $msg, $E_WARNING, $index);
}
# Ok
else {
- push @report_chassis, [ sprintf('Chassis intrusion %d detection: %s (%s)',
- $index, $status, $reading),
- $OK, $index ];
+ my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)',
+ $index, $status, $reading;
+ report('chassis', $msg, $E_OK, $index);
}
}
- return $count_intr;
+ return;
}
@@ -2878,29 +3400,23 @@
# CHASSIS: Check alert log
#-----------------------------------------
sub check_alertlog {
- my %count = (
- 'Ok' => 0,
- 'Non-Critical' => 0,
- 'Critical' => 0,
- );
-
- return \%count if $opt{'snmp'}; # Not supported with SNMP
+ return if $snmp; # Not supported with SNMP
my @output = @{ run_omreport("$omopt_system alertlog") };
foreach my $out (@output) {
- ++$count{$out->{Severity}};
+ ++$count{alert}{$out->{Severity}};
}
# Create error messages and set exit value if appropriate
- my $nagios_level = 0;
- if ($count{'Critical'} > 0) { $nagios_level = $CRITICAL; }
- elsif ($count{'Non-Critical'} > 0) { $nagios_level = $WARNING; }
-
- push @report_other, [ sprintf('Alert log content: %d critical, %d non-critical, %d ok',
- $count{'Critical'}, $count{'Non-Critical'}, $count{'Ok'}),
- $nagios_level, q{} ];
+ my $err = 0;
+ if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; }
+ elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; }
+
+ my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok',
+ $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'};
+ report('other', $msg, $err);
- return \%count;
+ return;
}
#-----------------------------------------
@@ -2909,44 +3425,40 @@
sub check_esmlog_health {
my $health = 'Ok';
- if ($opt{snmp}) {
+ if ($snmp) {
my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1';
my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]);
if (!defined $result) {
- push @report_other, [ sprintf('SNMP ERROR getting systemStateEventLogStatus OID: %s',
- $snmp_session->error),
- $UNKNOWN, q{} ];
+ my $msg = sprintf 'SNMP ERROR [esmhealth]: %s',
+ $snmp_session->error;
+ report('other', $msg, $E_UNKNOWN);
}
$health = $snmp_status{$result->{$systemStateEventLogStatus}};
}
else {
- open my $CMD, '-|', "$omreport $omopt_system esmlog -fmt ssv"
- or ( push @report_other, [ sprintf("Couldn't run $omreport $omopt_system esmlog: $!"),
- $UNKNOWN, q{} ] and return $OK );
- while (<$CMD>) {
+ foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) {
if (m/\A Health;(.+) \z/xms) {
$health = $1;
chop $health;
last;
}
}
- close $CMD;
}
# If the overall health of the ESM log is other than "Ok", the
# fill grade of the log is more than 80% and the log should be
# cleared
if ($health eq 'Ok') {
- push @report_other, [ sprintf('ESM log is health is OK (less than 80%% full)'),
- $OK, q{} ];
+ my $msg = sprintf 'ESM log health is Ok (less than 80%% full)';
+ report('other', $msg, $E_OK);
}
elsif ($health eq 'Critical') {
- push @report_other, [ sprintf('ESM log is 100%% full!'),
- $status2nagios{$health}, q{} ];
+ my $msg = sprintf 'ESM log is 100%% full';
+ report('other', $msg, $status2nagios{$health});
}
else {
- push @report_other, [ sprintf('ESM log is more than 80%% full'),
- $status2nagios{$health}, q{} ];
+ my $msg = sprintf 'ESM log is more than 80%% full';
+ report('other', $msg, $status2nagios{$health});
}
return;
@@ -2956,14 +3468,9 @@
# CHASSIS: Check ESM log
#-----------------------------------------
sub check_esmlog {
- my %count = (
- 'Ok' => 0,
- 'Non-Critical' => 0,
- 'Critical' => 0,
- );
my @output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %esm_oid
= (
'1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus',
@@ -2971,113 +3478,141 @@
my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]);
# No entries is OK
- return 0 if !defined $result;
+ return if !defined $result;
@output = @{ get_snmp_output($result, \%esm_oid) };
foreach my $out (@output) {
- ++$count{$snmp_status{$out->{eventLogSeverityStatus}}};
+ ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}};
}
}
else {
@output = @{ run_omreport("$omopt_system esmlog") };
foreach my $out (@output) {
- ++$count{$out->{Severity}};
+ ++$count{esm}{$out->{Severity}};
}
}
# Create error messages and set exit value if appropriate
- my $nagios_level = 0;
- if ($count{'Critical'} > 0) { $nagios_level = $CRITICAL; }
- elsif ($count{'Non-Critical'} > 0) { $nagios_level = $WARNING; }
+ my $err = 0;
+ if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; }
+ elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; }
+
+ my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok',
+ $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'};
+ report('other', $msg, $err);
- push @report_other, [ sprintf('ESM log content: %d critical, %d non-critical, %d ok',
- $count{'Critical'}, $count{'Non-Critical'}, $count{'Ok'}),
- $nagios_level, q{} ];
- return \%count;
+ return;
}
-
#
-# Collects some information about the system via omreport
+# Handy function for checking all storage components
#
-sub get_omreport_sysinfo
-{
- # Get system model and serial number
- if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") {
- while (<$INFO>) {
- next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms;
- my ($key, $val) = split /;/xms;
- $key =~ s{\s+\z}{}xms; # remove trailing whitespace
- $val =~ s{\s+\z}{}xms; # remove trailing whitespace
- if ($key eq 'Chassis Model' or $key eq 'Model') {
- $sysinfo{model} = $val;
+sub check_storage {
+ check_controllers();
+ check_physical_disks();
+ check_virtual_disks();
+ check_cache_battery();
+ check_connectors();
+ check_enclosures();
+ check_enclosure_fans();
+ check_enclosure_pwr();
+ check_enclosure_temp();
+ check_enclosure_emms();
+ return;
+}
+
+
+
+#---------------------------------------------------------------------
+# Info functions
+#---------------------------------------------------------------------
+
+#
+# Fetch output from 'omreport chassis info', put in sysinfo hash
+#
+sub get_omreport_chassis_info {
+ if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") {
+ my @lines = <$INFO>;
+ close $INFO;
+ foreach (@lines) {
+ next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms;
+ my ($key, $val) = split /;/xms;
+ $key =~ s{\s+\z}{}xms; # remove trailing whitespace
+ $val =~ s{\s+\z}{}xms; # remove trailing whitespace
+ if ($key eq 'Chassis Model' or $key eq 'Model') {
+ $sysinfo{model} = $val;
}
if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') {
$sysinfo{serial} = $val;
}
}
- close $INFO;
}
+ return;
+}
- # Get BIOS information. Only if needed
- if (($opt{okinfo} >= 1) or ($opt{verbose}) or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms)) {
- if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") {
- while (<$BIOS>) {
- next if !m/;/xms;
- my ($key, $val) = split /;/xms;
- $key =~ s{\s+\z}{}xms; # remove trailing whitespace
- $val =~ s{\s+\z}{}xms; # remove trailing whitespace
- $sysinfo{bios} = $val if $key eq 'Version';
- $sysinfo{biosdate} = $val if $key eq 'Release Date';
- }
- close $BIOS;
+#
+# Fetch output from 'omreport chassis bios', put in sysinfo hash
+#
+sub get_omreport_chassis_bios {
+ if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") {
+ my @lines = <$BIOS>;
+ close $BIOS;
+ foreach (@lines) {
+ next if !m/;/xms;
+ my ($key, $val) = split /;/xms;
+ $key =~ s{\s+\z}{}xms; # remove trailing whitespace
+ $val =~ s{\s+\z}{}xms; # remove trailing whitespace
+ $sysinfo{bios} = $val if $key eq 'Version';
+ $sysinfo{biosdate} = $val if $key eq 'Release Date';
}
}
+ return;
+}
- # Return now if verbose
- return if $opt{verbose};
-
- # Get OS information. Only if needed
- if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) {
- if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") {
- while (<$VER>) {
- next if !m/;/xms;
- my ($key, $val) = split /;/xms;
- $key =~ s{\s+\z}{}xms; # remove trailing whitespace
- $val =~ s{\s+\z}{}xms; # remove trailing whitespace
- if ($key eq 'Operating System') {
- $sysinfo{osname} = $val;
- }
- elsif ($key eq 'Operating System Version') {
- $sysinfo{osver} = $val;
- }
+#
+# Fetch output from 'omreport system operatingsystem', put in sysinfo hash
+#
+sub get_omreport_system_operatingsystem {
+ if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") {
+ my @lines = <$VER>;
+ close $VER;
+ foreach (@lines) {
+ next if !m/;/xms;
+ my ($key, $val) = split /;/xms;
+ $key =~ s{\s+\z}{}xms; # remove trailing whitespace
+ $val =~ s{\s+\z}{}xms; # remove trailing whitespace
+ if ($key eq 'Operating System') {
+ $sysinfo{osname} = $val;
+ }
+ elsif ($key eq 'Operating System Version') {
+ $sysinfo{osver} = $val;
}
- close $VER;
}
}
+ return;
+}
- # Get OMSA information. Only if needed
- if ($opt{okinfo} >= 3) {
- if (open my $OM, '-|', "$omreport about -fmt ssv") {
- while (<$OM>) {
- if (m/\A Version;(.+) \z/xms) {
- $sysinfo{om} = $1;
- chomp $sysinfo{om};
- }
+#
+# Fetch output from 'omreport about', put in sysinfo hash
+#
+sub get_omreport_about {
+ if (open my $OM, '-|', "$omreport about -fmt ssv") {
+ my @lines = <$OM>;
+ close $OM;
+ foreach (@lines) {
+ if (m/\A Version;(.+) \z/xms) {
+ $sysinfo{om} = $1;
+ chomp $sysinfo{om};
}
}
}
-
return;
}
-
#
-# Collects some information about the system via SNMP
+# Fetch chassis info via SNMP, put in sysinfo hash
#
-sub get_snmp_sysinfo
-{
- # Get system model and serial number
+sub get_snmp_chassis_info {
my %chassis_oid
= (
'1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName',
@@ -3099,96 +3634,133 @@
}
}
else {
- push @report_other, [ sprintf('SNMP ERROR getting chassis info: %s',
- $snmp_session->error),
- $UNKNOWN, q{} ];
+ my $msg = sprintf 'SNMP ERROR getting chassis info: %s',
+ $snmp_session->error;
+ report('other', $msg, $E_UNKNOWN);
}
+ return;
+}
- # Get BIOS information. Only if needed
- if (($opt{okinfo} >= 1) or ($opt{verbose}) or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms)) {
- my %bios_oid
- = (
- '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName',
- '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName',
- );
+#
+# Fetch BIOS info via SNMP, put in sysinfo hash
+#
+sub get_snmp_chassis_bios {
+ my %bios_oid
+ = (
+ '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName',
+ '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName',
+ );
- my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1';
- $result = $snmp_session->get_table(-baseoid => $systemBIOSTable);
+ my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1';
+ my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable);
- if (defined $result) {
- foreach my $oid (keys %{ $result }) {
- if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') {
- $sysinfo{biosdate} = $result->{$oid};
- $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms;
- }
- elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') {
- $sysinfo{bios} = $result->{$oid};
- }
+ if (defined $result) {
+ foreach my $oid (keys %{ $result }) {
+ if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') {
+ $sysinfo{biosdate} = $result->{$oid};
+ $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms;
+ }
+ elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') {
+ $sysinfo{bios} = $result->{$oid};
}
- }
- else {
- push @report_other, [ sprintf('SNMP ERROR getting BIOS info: %s',
- $snmp_session->error),
- $UNKNOWN, q{} ];
}
}
+ else {
+ my $msg = sprintf 'SNMP ERROR getting BIOS info: %s',
+ $snmp_session->error;
+ report('other', $msg, $E_UNKNOWN);
+ }
+ return;
+}
- # Return now if verbose
- return if $opt{verbose};
-
- # Get OS information. Only if needed
- if (($opt{okinfo} >= 3) or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms)) {
- my %os_oid
- = (
- '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName',
- '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName',
- );
+#
+# Fetch OS info via SNMP, put in sysinfo hash
+#
+sub get_snmp_system_operatingsystem {
+ my %os_oid
+ = (
+ '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName',
+ '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName',
+ );
- my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1';
- $result = $snmp_session->get_table(-baseoid => $operatingSystemTable);
+ my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1';
+ my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable);
- if (defined $result) {
- foreach my $oid (keys %{ $result }) {
- if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') {
- $sysinfo{osname} = ($result->{$oid});
- }
- elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') {
- $sysinfo{osver} = $result->{$oid};
- }
+ if (defined $result) {
+ foreach my $oid (keys %{ $result }) {
+ if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') {
+ $sysinfo{osname} = ($result->{$oid});
+ }
+ elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') {
+ $sysinfo{osver} = $result->{$oid};
}
}
- else {
- push @report_other, [ sprintf('SNMP ERROR getting OS info: %s',
- $snmp_session->error),
- $UNKNOWN, q{} ];
- }
}
+ else {
+ my $msg = sprintf 'SNMP ERROR getting OS info: %s',
+ $snmp_session->error;
+ report('other', $msg, $E_UNKNOWN);
+ }
+ return;
+}
- # Get OMSA version. Only if needed
- if ($opt{okinfo} >= 3) {
- my %omsa_oid
- = (
- '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName',
- );
- my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100';
- $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup);
- if (defined $result) {
- foreach my $oid (keys %{ $result }) {
- if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') {
- $sysinfo{om} = ($result->{$oid});
- }
+#
+# Fetch OMSA version via SNMP, put in sysinfo hash
+#
+sub get_snmp_about {
+ my %omsa_oid
+ = (
+ '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName',
+ );
+ my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100';
+ my $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup);
+ if (defined $result) {
+ foreach my $oid (keys %{ $result }) {
+ if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') {
+ $sysinfo{om} = ($result->{$oid});
}
}
- else {
- push @report_other, [ sprintf('SNMP ERROR getting OMSA info: %s',
- $snmp_session->error),
- $UNKNOWN, q{} ];
- }
+ }
+ else {
+ my $msg = sprintf 'SNMP ERROR getting OMSA info: %s',
+ $snmp_session->error;
+ report('other', $msg, $E_UNKNOWN);
+ }
+ return;
+}
+
+#
+# Collects some information about the system
+#
+sub get_sysinfo
+{
+ # Get system model and serial number
+ $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info();
+
+ # Get BIOS information. Only if needed
+ if ( $opt{okinfo} >= 1
+ or $opt{debug}
+ or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) {
+ $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios();
+ }
+
+ # Get OMSA information. Only if needed
+ if ($opt{okinfo} >= 3 or $opt{debug}) {
+ $snmp ? get_snmp_about() : get_omreport_about();
+ }
+
+ # Return now if debug
+ return if $opt{debug};
+
+ # Get OS information. Only if needed
+ if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) {
+ $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem();
}
return;
}
+
# Helper function for running omreport when the results are strictly
# name=value pairs.
sub run_omreport_info {
@@ -3197,17 +3769,13 @@
my @keys = ();
# Run omreport and fetch output
- open my $CMD, '-|', "$omreport $command -fmt ssv 2>&1"
- or ( push @report_other, [ "Couldn't run $omreport: $!",
- $UNKNOWN, q{} ] and return [] );
- my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
- close $CMD;
+ my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
# Parse output, store in array
for ((split /\n/xms, $rawtext)) {
if (m/\A Error/xms) {
- push @report_other, [ "Problem running 'omreport $command': $_",
- $UNKNOWN, q{} ];
+ my $msg = "Problem running 'omreport $command': $_";
+ report('other', $msg, $E_UNKNOWN);
}
next if !m/;/xms; # ignore lines with less than two fields
my @vals = split m/;/xms;
@@ -3223,7 +3791,7 @@
my @snmp_output = ();
my %nrpe_output = ();
- if ($opt{'snmp'}) {
+ if ($snmp) {
my %fw_oid
= (
'1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType',
@@ -3267,7 +3835,7 @@
);
- if ($opt{'snmp'}) {
+ if ($snmp) {
foreach my $out (@snmp_output) {
if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') {
$sysinfo{'bmc'} = 1;
@@ -3301,68 +3869,31 @@
}
-# Handy function for checking all storage components
-sub check_storage {
- check_controllers();
- check_physical_disks();
- check_virtual_disks();
- check_cache_battery();
- check_connectors();
- check_enclosures();
- check_enclosure_fans();
- check_enclosure_pwr();
- check_enclosure_temp();
- check_enclosure_emms();
- return;
-}
-
#=====================================================================
# Main program
#=====================================================================
-# Counters
-my $i_count = 0;
-my %h_count = ('Ok' => 0, 'Non-Critical' => 0, 'Critical' => 0);
-
# Here we do the actual checking of components
-if (defined $component) {
- # Do single selected check
- if ($component eq 'storage') { check_storage(); }
- elsif ($component eq 'fans') { $i_count = check_fans(); }
- elsif ($component eq 'temperature') { $i_count = check_temperatures(); }
- elsif ($component eq 'memory') { $i_count = check_memory(); }
- elsif ($component eq 'power') { $i_count = check_powersupplies(); }
- elsif ($component eq 'cpu') { $i_count = check_processors(); }
- elsif ($component eq 'voltage') { $i_count = check_volts(); }
- elsif ($component eq 'batteries') { $i_count = check_batteries(); }
- elsif ($component eq 'pwrmonitor') { $i_count = check_pwrmonitoring(); }
- elsif ($component eq 'intrusion') { $i_count = check_intrusion(); }
- elsif ($component eq 'alertlog') { %h_count = %{ check_alertlog() }; }
- elsif ($component eq 'esmlog') { %h_count = %{ check_esmlog() }; }
- elsif ($component eq 'esmhealth') { check_esmlog_health(); }
-}
-else {
- # Check global status if applicable
- if ($opt{global}) {
- $globalstatus = check_global();
- }
-
- # Do multiple selected checks
- if ($check{storage}) { check_storage(); }
- if ($check{memory}) { check_memory(); }
- if ($check{fans}) { check_fans(); }
- if ($check{power}) { check_powersupplies(); }
- if ($check{temperature}) { check_temperatures(); }
- if ($check{cpu}) { check_processors(); }
- if ($check{voltage}) { check_volts(); }
- if ($check{batteries}) { check_batteries(); }
- if ($check{pwrmonitor}) { check_pwrmonitoring(); }
- if ($check{intrusion}) { check_intrusion(); }
- if ($check{alertlog}) { check_alertlog(); }
- if ($check{esmlog}) { check_esmlog(); }
- if ($check{esmhealth}) { check_esmlog_health(); }
-}
+# Check global status if applicable
+if ($global) {
+ $globalstatus = check_global();
+}
+
+# Do multiple selected checks
+if ($check{storage}) { check_storage(); }
+if ($check{memory}) { check_memory(); }
+if ($check{fans}) { check_fans(); }
+if ($check{power}) { check_powersupplies(); }
+if ($check{temp}) { check_temperatures(); }
+if ($check{cpu}) { check_processors(); }
+if ($check{voltage}) { check_volts(); }
+if ($check{batteries}) { check_batteries(); }
+if ($check{amperage}) { check_pwrmonitoring(); }
+if ($check{intrusion}) { check_intrusion(); }
+if ($check{alertlog}) { check_alertlog(); }
+if ($check{esmlog}) { check_esmlog(); }
+if ($check{esmhealth}) { check_esmlog_health(); }
#---------------------------------------------------------------------
@@ -3370,29 +3901,34 @@
#---------------------------------------------------------------------
# Counter variable
-my %nagios_level_count = (
- 'OK' => 0,
- 'WARNING' => 0,
- 'CRITICAL' => 0,
- 'UNKNOWN' => 0,
- );
+%nagios_alert_count
+ = (
+ 'OK' => 0,
+ 'WARNING' => 0,
+ 'CRITICAL' => 0,
+ 'UNKNOWN' => 0,
+ );
# Get system information
-$opt{'snmp'} ? get_snmp_sysinfo() : get_omreport_sysinfo();
+get_sysinfo();
+
+# Get firmware info if requested via option
if ($opt{okinfo} >= 1) {
get_firmware_info();
}
# Close SNMP session
-if ($opt{snmp}) {
+if ($snmp) {
$snmp_session->close;
}
# Print messages
-if ($opt{verbose}) {
+if ($opt{debug}) {
print " System: $sysinfo{model}\n";
- print " ServiceTag: $sysinfo{serial}\n";
- print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}\n";
+ print " ServiceTag: $sysinfo{serial}";
+ print q{ } x (25 - length $sysinfo{serial}), "OMSA version: $sysinfo{om}\n";
+ print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}";
+ print q{ } x (25 - length "$sysinfo{bios} $sysinfo{biosdate}"), "Plugin version: $VERSION\n";
if ($#report_storage >= 0) {
print "-----------------------------------------------------------------------------\n";
print " Storage Components \n";
@@ -3401,22 +3937,22 @@
print "---------+----------+--------------------------------------------------------\n";
foreach (@report_storage) {
my ($msg, $level, $nexus) = @{$_};
- print q{ } x (8 - length $ERRORCODE{$level}) . "$ERRORCODE{$level} | "
+ print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
. q{ } x (8 - length $nexus) . "$nexus | $msg\n";
- $nagios_level_count{$ERRORCODE{$level}}++;
+ $nagios_alert_count{$reverse_exitcode{$level}}++;
}
}
if ($#report_chassis >= 0) {
print "-----------------------------------------------------------------------------\n";
print " Chassis Components \n";
print "=============================================================================\n";
- print " STATE | ID | MESSAGE TEXT \n";
+ print " STATE | ID | MESSAGE TEXT \n";
print "---------+------+------------------------------------------------------------\n";
foreach (@report_chassis) {
my ($msg, $level, $nexus) = @{$_};
- print q{ } x (8 - length $ERRORCODE{$level}) . "$ERRORCODE{$level} | "
+ print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
. q{ } x (4 - length $nexus) . "$nexus | $msg\n";
- $nagios_level_count{$ERRORCODE{$level}}++;
+ $nagios_alert_count{$reverse_exitcode{$level}}++;
}
}
if ($#report_other >= 0) {
@@ -3427,8 +3963,8 @@
print "---------+-------------------------------------------------------------------\n";
foreach (@report_other) {
my ($msg, $level, $nexus) = @{$_};
- print q{ } x (8 - length $ERRORCODE{$level}) . "$ERRORCODE{$level} | $msg\n";
- $nagios_level_count{$ERRORCODE{$level}}++;
+ print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n";
+ $nagios_alert_count{$reverse_exitcode{$level}}++;
}
}
}
@@ -3436,66 +3972,91 @@
my $c = 0; # counter to determine linebreaks
# Run through each message, sorted by severity level
+ ALERT:
foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) {
my ($msg, $level, $nexus) = @{ $_ };
- next if $level == $OK;
+ next ALERT if $level == $E_OK;
+
+ if (defined $opt{only}) {
+ # If user wants only critical alerts
+ next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING);
+
+ # If user wants only warning alerts
+ next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL);
+ }
# Prefix with service tag if specified with option '-i|--info'
- $msg = "[$sysinfo{serial}] " . $msg if $opt{info};
+ if ($opt{info}) {
+ if (defined $opt{htmlinfo}) {
+ $msg = '[<a href="' . warranty_url($sysinfo{serial})
+ . "\">$sysinfo{serial}</a>] " . $msg;
+ }
+ else {
+ $msg = "[$sysinfo{serial}] " . $msg;
+ }
+ }
# Prefix with nagios level if specified with option '--state'
- $msg = $ERRORCODE{$level} . ": $msg" if $opt{state};
+ $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
# Prefix with one-letter nagios level if specified with option '--short-state'
- $msg = (substr $ERRORCODE{$level}, 0, 1) . ": $msg" if $opt{shortstate};
+ $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
($c++ == 0) ? print $msg : print $linebreak, $msg;
- $nagios_level_count{$ERRORCODE{$level}}++;
+ $nagios_alert_count{$reverse_exitcode{$level}}++;
}
}
# Determine our exit code
-my $exit_code = $OK;
-$exit_code = $UNKNOWN if $nagios_level_count{'UNKNOWN'} > 0;
-$exit_code = $WARNING if $nagios_level_count{'WARNING'} > 0;
-$exit_code = $CRITICAL if $nagios_level_count{'CRITICAL'} > 0;
+$exit_code = $E_OK;
+$exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0;
+$exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0;
+$exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0;
# Global status via SNMP.. extra safety check
-if ($globalstatus != $OK and $exit_code == $OK) {
+if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) {
print "OOPS! Something is wrong with this server, but I don't know what. ";
- print "The global system health status is $ERRORCODE{$globalstatus}, ";
+ print "The global system health status is $reverse_exitcode{$globalstatus}, ";
print "but every component check is OK. This may be a bug in the Nagios plugin, ";
print "please file a bug report.\n";
- exit $UNKNOWN;
+ exit $E_UNKNOWN;
}
# Print OK message
-if ($exit_code == $OK && defined $component && !$opt{verbose}) {
+if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) {
my %okmsg
- = ( 'storage' => "all storage components ok, $no_of_pdisks physical drives, $no_of_vdisks logical drives",
- 'fans' => $i_count == 0 && $blade ? 'blade system with no fan probes' : "all $i_count fans ok",
- 'temperature' => "all $i_count temperatures ok",
- 'memory' => "all $i_count memory modules ok",
- 'power' => $i_count == 0 ? 'no instrumented power supplies found' : "all $i_count power supplies ok",
- 'cpu' => "all $i_count processors ok",
- 'voltage' => "all $i_count voltage probes ok",
- 'batteries' => $i_count == 0 ? 'no batteries found' : "all $i_count batteries ok",
- 'pwrmonitor' => $i_count == 0 ? 'no power monitoring probes found' : "all $i_count power monitoring probes ok",
- 'intrusion' => $i_count == 0 ? 'no intrusion detection probes found' : "all $i_count intrusion detection probes ok",
- 'alertlog' => $opt{snmp} ? 'not supported via snmp' : "all alerts: $h_count{Ok} ok, $h_count{'Non-Critical'} warning and $h_count{Critical} critical",
- 'esmlog' => "all esm log entries: $h_count{Ok} ok, $h_count{'Non-Critical'} warning and $h_count{Critical} critical",
- 'esmhealth' => "ESM log health ok",
+ = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives",
+ 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked",
+ 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked",
+ 'memory' => "MEMORY OK - $count{dimm} memory modules checked",
+ 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked",
+ 'cpu' => "PROCESSORS OK - $count{cpu} processors checked",
+ 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked",
+ 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked",
+ 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked",
+ 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked",
+ 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical",
+ 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical",
+ 'esmhealth' => "ESM LOG OK - less than 80% used",
);
- print 'OK - ' . $okmsg{$component};
+ print $okmsg{$opt{only}};
}
-elsif ($exit_code == $OK && !$opt{verbose}) {
- printf q{OK - System: '%s', SN: '%s', hardware working fine}, $sysinfo{model}, $sysinfo{serial};
+elsif ($exit_code == $E_OK && !$opt{debug}) {
+ if (defined $opt{htmlinfo}) {
+ printf q{OK - System: '<a href="%s">%s</a>', SN: '<a href="%s">%s</a>', hardware working fine},
+ documentation_url($sysinfo{model}), $sysinfo{model},
+ warranty_url($sysinfo{serial}), $sysinfo{serial};
+ }
+ else {
+ printf q{OK - System: '%s', SN: '%s', hardware working fine},
+ $sysinfo{model}, $sysinfo{serial};
+ }
if ($check{storage}) {
printf ', %d logical drives, %d physical drives',
- $no_of_vdisks, $no_of_pdisks;
+ $count{vdisk}, $count{pdisk};
}
else {
print ', not checking storage';
@@ -3503,13 +4064,13 @@
if ($opt{okinfo} >= 1) {
print $linebreak;
- printf q{----- BIOS: '%s %s'}, $sysinfo{bios}, $sysinfo{biosdate};
+ printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate};
if ($sysinfo{rac}) {
- printf q{, %s: '%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw};
+ printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw};
}
if ($sysinfo{bmc}) {
- printf q{, BMC: '%s'}, $sysinfo{bmc_fw};
+ printf q{, BMC='%s'}, $sysinfo{bmc_fw};
}
}
@@ -3518,12 +4079,16 @@
my @storageprint = ();
foreach my $id (sort keys %{ $sysinfo{controller} }) {
chomp $sysinfo{controller}{$id}{driver};
- push @storageprint, sprintf q{----- Storage ctrl %s name: '%s', firmware: '%s', driver: '%s'},
+ my $msg = sprintf q{----- Ctrl %s [%s]: Fw='%s', Dr='%s'},
$sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name},
$sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver};
+ if (defined $sysinfo{controller}{$id}{storport}) {
+ $msg .= sprintf q{, Storport: '%s'}, $sysinfo{controller}{$id}{storport};
+ }
+ push @storageprint, $msg;
}
foreach my $id (sort keys %{ $sysinfo{enclosure} }) {
- push @storageprint, sprintf q{----- Storage encl %s name: '%s', firmware: '%s'},
+ push @storageprint, sprintf q{----- Encl %s [%s]: Fw='%s'},
$sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name},
$sysinfo{enclosure}{$id}->{firmware};
}
@@ -3543,8 +4108,15 @@
else {
if ($opt{extinfo}) {
print $linebreak;
- printf '------ SYSTEM: %s, SN: %s',
- $sysinfo{model}, $sysinfo{serial};
+ if (defined $opt{htmlinfo}) {
+ printf '------ SYSTEM: <a href="%s">%s</a>, SN: <a href="%s">%s</a>',
+ documentation_url($sysinfo{model}), $sysinfo{model},
+ warranty_url($sysinfo{serial}), $sysinfo{serial};
+ }
+ else {
+ printf '------ SYSTEM: %s, SN: %s',
+ $sysinfo{model}, $sysinfo{serial};
+ }
}
if (defined $opt{postmsg}) {
my $post = undef;
@@ -3552,7 +4124,7 @@
open my $POST, '<', $opt{postmsg}
or ( print $linebreak
and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n"
- and exit $UNKNOWN );
+ and exit $E_UNKNOWN );
$post = <$POST>;
close $POST;
chomp $post;
@@ -3568,8 +4140,8 @@
$post =~ s{[%]d}{$sysinfo{biosdate}}gxms;
$post =~ s{[%]o}{$sysinfo{osname}}gxms;
$post =~ s{[%]r}{$sysinfo{osver}}gxms;
- $post =~ s{[%]p}{$no_of_pdisks}gxms;
- $post =~ s{[%]l}{$no_of_vdisks}gxms;
+ $post =~ s{[%]p}{$count{pdisk}}gxms;
+ $post =~ s{[%]l}{$count{vdisk}}gxms;
$post =~ s{[%]n}{$linebreak}gxms;
$post =~ s{[%]{2}}{%}gxms;
print $post;
@@ -3577,683 +4149,39 @@
}
}
-# Print performance data
-if (defined $opt{perfdata} && !$opt{verbose} && %perfdata) {
- my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata
- print q{| };
- print join $lb, map { "'$_'=$perfdata{$_}" } sort keys %perfdata;
+# Print any perl warnings that have occured
+if (@perl_warnings) {
+ foreach (@perl_warnings) {
+ chop @$_;
+ print "${linebreak}INTERNAL ERROR: @$_";
+ }
+ $exit_code = $E_UNKNOWN;
}
-print "\n" if !$opt{verbose};
-
-# Exit with proper exit code
-exit $exit_code;
-
-
-# Man page created with:
-#
-# pod2man -s 3pm -r "`./check_openmanage -V | head -n 1`" -c 'Nagios plugin' check_openmanage check_openmanage.3pm
-#
-
-__END__
-
-=head1 NAME
-
-check_openmanage - Nagios plugin for checking the hardware status on
- Dell servers running OpenManage
-
-=head1 SYNOPSIS
-
-check_openmanage [I<OPTION>]...
-
-=head1 DESCRIPTION
-
-check_openmanage is a plugin for Nagios which checks the hardware
-health of Dell PowerEdge and PowerVault servers. It uses the Dell
-OpenManage Server Administrator (OMSA) software to accomplish this
-task. check_openmanage can be used with SNMP or NRPE, whichever suits
-your needs and particular taste. The plugin checks the health of the
-storage subsystem, power supplies, memory modules, temperature probes
-etc., and gives an alert if any of the components are faulty or
-operate outside normal parameters.
-
-check_openmanage is designed to be used by either locally (using NRPE)
-or remotely (using SNMP). In either mode, the output is (nearly) the
-same. Note that checking the alert log is not supported in SNMP mode.
-
-=head2 Alternate Basename
-
-=over 4
-
-The normal basename is C<check_openmanage>. With this every component
-in the server is checked (modifiable via the B<--check> option). You
-can create symbolic links C<check_openmanage_COMPONENT> which changes
-the behaviour of the plugin. The C<COMPONENT> part may be one of
-
-=over 4
-
-=item B<storage>
-
-Only check storage
-
-=item B<memory>
-
-Only check memory modules
-
-=item B<fans>
-
-Only check fans
-
-=item B<power>
-
-Only check power supplies
-
-=item B<temperature>
-
-Only check temperatures
-
-=item B<cpu>
-
-Only check processors
-
-=item B<voltage>
-
-Only check voltage probes
-
-=item B<batteries>
-
-Only check batteries
-
-=item B<pwrmonitor>
-
-Only check power usage
-
-=item B<intrusion>
-
-Only check chassis intrusion
-
-=item B<esmhealth>
-
-Only check ESM log overall health, i.e. fill grade
-
-=item B<esmlog>
-
-Only check the event log (ESM) content
-
-=item B<alertlog>
-
-Only check the alert log content
-
-=back
-
-=back
-
-=head1 OPTIONS
-
-=head2 General Options
-
-=over 4
-
-=over 4
-=item -t, --timeout I<SECONDS>
+# Reset the WARN signal
+$SIG{__WARN__} = $original_sigwarn;
-The number of seconds after which the plugin will abort. Default
-timeout is 30 seconds if the option is not present.
-
-=item -g, --global
-
-Check everything except logs. By default log content and chassis
-intrusion sensor are skipped. With this option everything will be
-checked except for log contents.
-
-If used with SNMP, the global system health OID is also probed. This
-gives an added security against bugs in the plugin. The plugin will
-produce an special error message in cases where 1) the global status
-is not OK, and 2) a hardware error has not been detected by the rest
-of the plugin.
-
-If used with omreport, i.e. via NRPE or similar, the output from
-C<omreport system> is used to find the global chassis health. Note
-that storage health is excluded. Not as good as with SNMP, but it
-still means added security against plugin bugs.
-
-This option negates the C<--check> option described below, for all
-checks but the esmlog and alertlog. If used with alternate basenames,
-the option has no effect.
-
-=item -p, --perfdata [I<multline>]
-
-Collect performance data. Performance data collected include
-temperatures (in Celcius) and fan speeds (in rpm). On systems that
-support it, power consumption is also collected (in Watts).
-
-If given the argument C<multiline>, the plugin will output the
-performance data on multiple lines, for Nagios 3.x and above.
-
-=item -w, --warning I<STRING> or I<FILE>
-
-Override the machine-default temperature warning thresholds. Syntax is
-C<id1=max[/min],id2=max[/min],...>. The following example sets warning
-limits to max 50C for probe 0, and max 45C and min 10C for probe 1:
-
-check_openmanage -w 0=50,1=45/10
-
-The minimum limit can be omitted, if desired. Most often, you are only
-interested in setting the maximum thresholds.
-
-This parameter can be either a string with the limits, or a file
-containing the limits string. The option can be specified multiple
-times.
-
-=item -c, --critical I<STRING> or I<FILE>
-
-Override the machine-default temperature critical thresholds. Syntax
-and behaviour is the same as for warning thresholds described above.
-
-=item -o, --ok-info I<NUMBER>
-
-This option lets you define how much output you want the plugin to
-give when everything is OK, i.e. the verbosity level. The default
-value is 0 (one line of output). The output levels are cumulative.
-
-=over 4
-
-=item B<0>
-
-- Only one line (default)
-
-=item B<1>
-
-- BIOS and firmware info on a separate line
-
-=item B<2>
-
-- Storage controller and enclosure info on separate lines
-
-=item B<3>
-
-- OMSA version on separate line
-
-=back
-
-The reason that OMSA version is separated from the rest is that
-finding it requires running a really slow omreport command, when the
-plugin is run locally via NRPE.
-
-=item -i, --info
-
-Prefix any alerts with the service tag.
-
-=item -e, --extinfo
-
-Display a short summary of system information (model and service tag)
-in case of an alert.
-
-=item --postmsg I<STRING> or I<FILE>
-
-User specified post message. Useful for displaying arbitrary or
-various system information at the end of alerts. The argument is
-either a string with the message, or a file containing that
-string. You can control the format with the following interpreted
-sequences:
-
-=over 4
-
-=item B<%m>
-
-System model
-
-=item B<%s>
-
-Service tag
-
-=item B<%b>
-
-BIOS version
-
-=item B<%d>
-
-BIOS release date
-
-=item B<%o>
-
-Operating system name
-
-=item B<%r>
-
-Operating system release
-
-=item B<%p>
-
-Number of physical drives
-
-=item B<%l>
-
-Number of logical drives
-
-=item B<%n>
-
-Line break. Will be a regular line break if run from a TTY, else an
-HTML line break.
-
-=item B<%%>
-
-A literal C<%>
-
-=back
-
-=item --state
-
-Prefix each alert with its corresponding service state (i.e. warning,
-critical etc.). This is useful in case of several alerts from the same
-monitored system.
-
-=item --short-state
-
-Same as the B<--state> option above, except that the state is
-abbreviated to a single letter (W=warning, C=critical etc.).
-
-=item --linebreak=I<STRING>
-
-check_openmanage will sometimes report more than one line, e.g. if
-there are several alerts. If the script has a TTY, it will use regular
-linebreaks. If not (which is the case with NRPE) it will use HTML
-linebreaks. Sometimes it can be useful to control what the plugin uses
-as a line separator, and this option provides that control.
-
-The argument is the exact string to be used as the line
-separator. There are two exceptions, i.e. two keywords that translates
-to the following:
-
-=over 4
-
-=item B<REG>
-
-Regular linebreaks, i.e. "\n".
-
-=item B<HTML>
-
-HTML linebreaks, i.e. "<br/>".
-
-=back
-
-This is a rather special option that is normally not needed. The
-default behaviour should be sufficient for most users.
-
-=item -v, --verbose
-
-Verbose output. Will report status on everything, even if status is
-ok. Blacklisted or unchecked components are ignored (i.e. no output).
-
-=item -h, --help
-
-Display help text.
-
-=item -m, --man
-
-Display man page.
-
-=item -V, --version
-
-Display version info.
-
-=back
-
-=back
-
-=head2 SNMP Options
-
-=over 4
-
-=over 4
-
-=item -s, --snmp
-
-Trigger SNMP mode.
-
-=item -H, --hostname I<HOSTNAME>
-
-The transport address of the destination SNMP device. This argument
-is optional and defaults to C<localhost>.
-
-=item -P, --protocol I<PROTOCOL>
-
-SNMP protocol version. This option is optional and expects a digit
-(i.e. C<1>, C<2> or C<3>) to define the SNMP version. The default is
-C<2>, i.e. SNMP version 2c.
-
-=item -C, --community I<COMMUNITY>
-
-This option expects a string that is to be used as the SNMP community
-name when using SNMP version 1 or 2c. By default the community name
-is set to C<public> if the option is not present.
-
-=item --port I<PORT>
-
-SNMP port of the remote (monitored) system. Defaults to the well-known
-SNMP port 161.
-
-=item -U, --username I<SECURITYNAME>
-
-[SNMPv3] The User-based Security Model (USM) used by SNMPv3 requires
-that a securityName be specified. This option is required when using
-SNMP version 3, and expects a string 1 to 32 octets in lenght.
-
-=item --authpassword I<PASSWORD>, --authkey I<KEY>
-
-[SNMPv3] By default a securityLevel of C<noAuthNoPriv> is assumed. If
-the --authpassword option is specified, the securityLevel becomes
-C<authNoPriv>. The --authpassword option expects a string which is at
-least 1 octet in length as argument.
-
-Optionally, instead of the --authpassword option, the --authkey option
-can be used so that a plain text password does not have to be
-specified in a script. The --authkey option expects a hexadecimal
-string produced by localizing the password with the
-authoritativeEngineID for the specific destination device. The
-C<snmpkey> utility included with the Net::SNMP distribution can be
-used to create the hexadecimal string (see L<snmpkey>).
-
-=item --authprotocol I<ALGORITHM>
-
-[SNMPv3] Two different hash algorithms are defined by SNMPv3 which can
-be used by the Security Model for authentication. These algorithms are
-HMAC-MD5-96 C<MD5> (RFC 1321) and HMAC-SHA-96 C<SHA-1> (NIST FIPS PUB
-180-1). The default algorithm used by the plugin is HMAC-MD5-96. This
-behavior can be changed by using this option. The option expects
-either the string C<md5> or C<sha> to be passed as argument to modify
-the hash algorithm.
-
-=item --privpassword I<PASSWORD>, --privkey I<KEY>
-
-[SNMPv3] By specifying the options --privkey or --privpassword, the
-securityLevel associated with the object becomes
-C<authPriv>. According to SNMPv3, privacy requires the use of
-authentication. Therefore, if either of these two options are present
-and the --authkey or --authpassword arguments are missing, the
-creation of the object fails. The --privkey and --privpassword
-options expect the same input as the --authkey and --authpassword
-options respectively.
-
-=item --privprotocol I<ALGORITHM>
-
-[SNMPv3] The User-based Security Model described in RFC 3414 defines a
-single encryption protocol to be used for privacy. This protocol,
-CBC-DES C<DES> (NIST FIPS PUB 46-1), is used by default or if the
-string C<des> is passed to the --privprotocol option. The Net::SNMP
-module also supports RFC 3826 which describes the use of
-CFB128-AES-128 C<AES> (NIST FIPS PUB 197) in the USM. The AES
-encryption protocol can be selected by passing C<aes> or C<aes128> to
-the --privprotocol option.
-
-One of the following arguments are required: des, aes, aes128, 3des,
-3desde
-
-=back
-
-=back
-
-=head2 Blacklisting
-
-=over 4
-
-=over 4
-
-=item -b, --blacklist I<STRING> or I<FILE>
-
-Blacklist missing and/or failed components, if you do not plan to fix
-them. The parameter is either the blacklist string, or a file (that
-may or may not exist) containing the string. The blacklist string
-contains component names with component IDs separated by slash
-(/). Blacklisted components are left unchecked.
-
-TIP: Use the option C<-v> (or C<--verbose>) to get the blacklist ID for
-devices. The ID is listed in a separate column in the verbose output.
-
-=over 9
-
-=item B<Syntax:>
-
-component1=id1[,id2,...]/component2=id1[,id2,...]/...
-
-=item B<Example:>
-
-check_openmanage -b ps=0/fan=3,5/pdisk=1:0:0:1
-
-=back
-
-In the example we blacklist powersupply 0, fans 3 and 5, and
-physical disk 1:0:0:1. Legal component names include:
-
-=over 8
-
-=item B<ctrl>
-
-Controller
-
-=item B<ctrl_fw>
-
-Suppress the special warning message about old controller
-firmware. Use this if you can not or will not upgrade the firmware.
-
-=item B<ctrl_driver>
-
-Suppress the special warning message about old controller driver.
-Particularly useful on systems where you can not upgrade the driver.
-
-=item B<pdisk>
-
-Physical disk.
-
-=item B<vdisk>
-
-Logical drive (virtual disk)
-
-=item B<bat>
-
-Controller cache battery
-
-=item B<conn>
-
-Connector (channel)
-
-=item B<encl>
-
-Enclosure
-
-=item B<encl_fan>
-
-Enclosure fan
-
-=item B<encl_ps>
-
-Enclosure power supply
-
-=item B<encl_temp>
-
-Enclosure temperature probe
-
-=item B<encl_emm>
-
-Enclosure management module (EMM)
-
-=item B<dimm>
-
-Memory module
-
-=item B<fan>
-
-Fan
-
-=item B<ps>
-
-Powersupply
-
-=item B<temp>
-
-Temperature sensor
-
-=item B<cpu>
-
-Processor (CPU)
-
-=item B<volt>
-
-Voltage probe
-
-=item B<bp>
-
-System battery
-
-=item B<pm>
-
-Amperage probe (power consumption monitoring)
-
-=item B<intr>
-
-Intrusion sensor
-
-=back
-
-=back
-
-=back
-
-=head2 Check Control
-
-=over 4
-
-=over 4
-
-=item --check I<STRING> or I<FILE>
-
-This parameter allows you to adjust which components that should be
-checked at all. This is a rougher approach than blacklisting, which
-require that you specify component id or index. The parameter should
-be either a string containing the adjustments, or a file containing
-the string. No errors are raised if the file does not exist.
-
-Note: This option is ignored with alternate basenames.
-
-=over 9
-
-=item B<Example:>
-
-check_openmanage --check storage=0,intrusion=1
-
-=back
-
-Legal values are described below, along with the default value.
-
-=over 4
-
-=item B<storage>
-
-Check storage subsystem (controllers, disks etc.). Default: ON
-
-=item B<memory>
-
-Check memory (dimms). Default: ON
-
-=item B<fans>
-
-Check chassis fans. Default: ON
-
-=item B<power>
-
-Check power supplies. Default: ON
-
-=item B<temperature>
-
-Check temperature sensors. Default: ON
-
-=item B<cpu>
-
-Check CPUs. Default: ON
-
-=item B<voltage>
-
-Check voltage sensors. Default: ON
-
-=item B<batteries>
-
-Check system batteries. Default: ON
-
-=item B<pwrmonitor>
-
-Check power consumption monitoring. Default: ON
-
-=item B<intrusion>
-
-Check chassis intrusion. Default: OFF
-
-=item B<esmhealth>
-
-Check the ESM log health, i.e. fill grade. Default: ON
-
-=item B<esmlog>
-
-Check the ESM log content. Default: OFF
-
-=item B<alertlog>
-
-Check the alert log content. Default: OFF
-
-=back
-
-=back
-
-=back
-
-=head1 DIAGNOSTICS
-
-The option C<--verbose> (or C<-v>) can be specified to display all
-monitored components.
-
-=head1 DEPENDENCIES
-
-If SNMP is requested, the perl module Net::SNMP is
-required. Otherwise, only a regular perl distribution is required to
-run the script. On the target (monitored) system, Dell Openmanage
-Server Administrator (OMSA) must be installed and running.
-
-=head1 EXIT STATUS
-
-If no errors are discovered, a value of 0 (OK) is returned. An exit
-value of 1 (WARNING) signifies one or more non-critical errors, while
-2 (CRITICAL) signifies one or more critical errors.
-
-The exit value 3 (UNKNOWN) is reserved for errors within the script,
-or errors getting values from Dell OMSA.
-
-=head1 AUTHOR
-
-Written by Trond H. Amundsen <t.h.amundsen@usit.uio.no>
-
-=head1 BUGS AND LIMITATIONS
-
-Storage info is not collected or checked on very old PowerEdge models
-and/or old OMSA versions, due to limitations in OMSA. The overall
-support on those models/versions by this plugin is not well tested.
-
-=head1 INCOMPATIBILITIES
-
-The plugin does not work with the Nagios embedded perl interpreter
-(ePN). You should specify C<perl /path/to/check_openmanage> in your
-Nagios config if you have ePN enabled.
-
-=head1 REPORTING BUGS
-
-Report bugs to <t.h.amundsen@usit.uio.no>
-
-=head1 LICENSE AND COPYRIGHT
+# Print performance data
+if (defined $opt{perfdata} && !$opt{debug} && %perfdata) {
+ my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata
+ print q{|};
-This nagios plugin comes with ABSOLUTELY NO WARRANTY.
-You may redistribute copies of this plugin under the terms of
-the GNU General Public License L<http://www.gnu.org/licenses/gpl.html>.
+ sub perfdata {
+ my %order
+ = (
+ fan => 0,
+ pwr => 1,
+ temp => 2,
+ enclosure => 3,
+ );
+ return ($order{(split /_/, $a, 2)[0]} cmp $order{(split /_/, $b, 2)[0]}) || $a cmp $b;
+ }
-=head1 SEE ALSO
+ print join $lb, map { "'$_'=$perfdata{$_}" } sort perfdata keys %perfdata;
+}
-L<http://folk.uio.no/trondham/software/check_openmanage.html>
+# Print a linebreak at the end
+print "\n" if !$opt{debug};
-=cut
+# Exit with proper exit code
+exit $exit_code;
|