Changes of Revision 12
[-] | Changed | check_openmanage.spec |
x 1
2 Summary: A Nagios plugin to check hardware health on Dell servers 3 Name: check_openmanage 4 -Version: 3.5.4 5 +Version: 3.5.5 6 Release: 1%{?dist} 7 License: GPL 8 Packager: Trond Hasle Amundsen <t.h.amundsen@usit.uio.no> 9
10 %attr(0755, root, root) %{_mandir}/man8/%{name}.8* 11 12 %changelog 13 +* Sat Jan 23 2010 Carsten Schoene <cs@linux-administrator.com> - 3.5.5-1 14 +- update to version 3.5.5 15 + - Fixed an SNMP bug where the plugin didn't handle OID indexes that were not sequential. Thanks to Gianluca Varenni for reporting. 16 + - Fixed an SNMP bug when checking old hardware such as the PE 2650 and PE 750. The controller id for physical drives were 17 + collected and displayed incorrectly. This release uses an additional OID to fetch this info, which would otherwise be unavailable. 18 + Thanks to Gianluca Varenni for reporting this bug. 19 + - Should use %snmp_probestatus, not %snmp_status when checking the status of voltage probes. Thanks to Ken McKinlay for a patch. 20 + - Fix when identifying blades via SNMP with very old OMSA versions. Patch from Ken McKinlay. 21 + - Better way of finding the ID of physical drives via SNMP 22 + 23 * Wed Jan 13 2010 Carsten Schoene <cs@linux-administrator.com> - 3.5.4-1 24 - update to version 3.5.4 25 - Added support for storport driver version for controllers, only applicable on servers running Windows. A new blacklisting keyword for suppressing storport driver messages was added. 26 |
||
[+] | Deleted | check_openmanage-3.3.1.tar.gz/check_openmanage.3pm ^ |
@@ -1,660 +0,0 @@ -.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 -.\" -.\" Standard preamble: -.\" ======================================================================== -.de Sh \" Subsection heading -.br -.if t .Sp -.ne 5 -.PP -\fB\\$1\fR -.PP -.. -.de Sp \" Vertical space (when we can't use .PP) -.if t .sp .5v -.if n .sp -.. -.de Vb \" Begin verbatim text -.ft CW -.nf -.ne \\$1 -.. -.de Ve \" End verbatim text -.ft R -.fi -.. -.\" Set up some character translations and predefined strings. \*(-- will -.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left -.\" double quote, and \*(R" will give a right double quote. | will give a -.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to -.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' -.\" expand to `' in nroff, nothing in troff, for use with C<>. -.tr \(*W-|\(bv\*(Tr -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' -.ie n \{\ -. ds -- \(*W- -. ds PI pi -. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -. ds L" "" -. ds R" "" -. ds C` "" -. ds C' "" -'br\} -.el\{\ -. ds -- \|\(em\| -. ds PI \(*p -. ds L" `` -. ds R" '' -'br\} -.\" -.\" If the F register is turned on, we'll generate index entries on stderr for -.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index -.\" entries marked with X<> in POD. Of course, you'll have to process the -.\" output yourself in some meaningful fashion. -.if \nF \{\ -. de IX -. tm Index:\\$1\t\\n%\t"\\$2" -.. -. nr % 0 -. rr F -.\} -.\" -.\" For nroff, turn off justification. Always turn off hyphenation; it makes -.\" way too many mistakes in technical documents. -.hy 0 -.if n .na -.\" -.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). -.\" Fear. Run. Save yourself. No user-serviceable parts. -. \" fudge factors for nroff and troff -.if n \{\ -. ds #H 0 -. ds #V .8m -. ds #F .3m -. ds #[ \f1 -. ds #] \fP -.\} -.if t \{\ -. ds #H ((1u-(\\\\n(.fu%2u))*.13m) -. ds #V .6m -. ds #F 0 -. ds #[ \& -. ds #] \& -.\} -. \" simple accents for nroff and troff -.if n \{\ -. ds ' \& -. ds ` \& -. ds ^ \& -. ds , \& -. ds ~ ~ -. ds / -.\} -.if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" -. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' -. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' -. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' -. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' -. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' -.\} -. \" troff and (daisy-wheel) nroff accents -.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' -.ds 8 \h'\*(#H'\(*b\h'-\*(#H' -.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] -.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' -.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' -.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] -.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] -.ds ae a\h'-(\w'a'u*4/10)'e -.ds Ae A\h'-(\w'A'u*4/10)'E -. \" corrections for vroff -.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' -.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' -. \" for low resolution devices (crt and lpr) -.if \n(.H>23 .if \n(.V>19 \ -\{\ -. ds : e -. ds 8 ss -. ds o a -. ds d- d\h'-1'\(ga -. ds D- D\h'-1'\(hy -. ds th \o'bp' -. ds Th \o'LP' -. ds ae ae -. ds Ae AE -.\} -.rm #[ #] #H #V #F C -.\" ======================================================================== -.\" -.IX Title "check_openmanage 3pm" -.TH check_openmanage 3pm "2009-04-28" "check_openmanage 3.3.1" "Nagios plugin" -.SH "NAME" -check_openmanage \- Nagios plugin for checking the hardware status on - Dell servers running OpenManage -.SH "SYNOPSIS" -.IX Header "SYNOPSIS" -check_openmanage [\fI\s-1OPTION\s0\fR]... -.SH "DESCRIPTION" -.IX Header "DESCRIPTION" -check_openmanage is a plugin for Nagios which checks the hardware -health of Dell PowerEdge and PowerVault servers. It uses the Dell -OpenManage Server Administrator (\s-1OMSA\s0) software to accomplish this -task. check_openmanage can be used with \s-1SNMP\s0 or \s-1NRPE\s0, whichever suits -your needs and particular taste. The plugin checks the health of the -storage subsystem, power supplies, memory modules, temperature probes -etc., and gives an alert if any of the components are faulty or -operate outside normal parameters. -.PP -check_openmanage is designed to be used by either locally (using \s-1NRPE\s0) -or remotely (using \s-1SNMP\s0). In either mode, the output is (nearly) the -same. Note that checking the alert log is not supported in \s-1SNMP\s0 mode. -.Sh "Alternate Basename" -.IX Subsection "Alternate Basename" -.RS 4 -The normal basename is \f(CW\*(C`check_openmanage\*(C'\fR. With this every component -in the server is checked (modifiable via the \fB\-\-check\fR option). You -can create symbolic links \f(CW\*(C`check_openmanage_COMPONENT\*(C'\fR which changes -the behaviour of the plugin. The \f(CW\*(C`COMPONENT\*(C'\fR part may be one of -.IP "\fBstorage\fR" 4 -.IX Item "storage" -Only check storage -.IP "\fBmemory\fR" 4 -.IX Item "memory" -Only check memory modules -.IP "\fBfans\fR" 4 -.IX Item "fans" -Only check fans -.IP "\fBpower\fR" 4 -.IX Item "power" -Only check power supplies -.IP "\fBtemperature\fR" 4 -.IX Item "temperature" -Only check temperatures -.IP "\fBcpu\fR" 4 -.IX Item "cpu" -Only check processors -.IP "\fBvoltage\fR" 4 -.IX Item "voltage" -Only check voltage probes -.IP "\fBbatteries\fR" 4 -.IX Item "batteries" -Only check batteries -.IP "\fBpwrmonitor\fR" 4 -.IX Item "pwrmonitor" -Only check power usage -.IP "\fBintrusion\fR" 4 -.IX Item "intrusion" -Only check chassis intrusion -.IP "\fBesmhealth\fR" 4 -.IX Item "esmhealth" -Only check \s-1ESM\s0 log overall health, i.e. fill grade -.IP "\fBesmlog\fR" 4 -.IX Item "esmlog" -Only check the event log (\s-1ESM\s0) content -.IP "\fBalertlog\fR" 4 -.IX Item "alertlog" -Only check the alert log content -.RE -.RS 4 -.RE -.SH "OPTIONS" -.IX Header "OPTIONS" -.Sh "General Options" -.IX Subsection "General Options" -.RS 4 -.IP "\-t, \-\-timeout \fI\s-1SECONDS\s0\fR" 4 -.IX Item "-t, --timeout SECONDS" -The number of seconds after which the plugin will abort. Default -timeout is 30 seconds if the option is not present. -.IP "\-g, \-\-global" 4 -.IX Item "-g, --global" -Check everything except logs. By default log content and chassis -intrusion sensor are skipped. With this option everything will be -checked except for log contents. -.Sp -If used with \s-1SNMP\s0, the global system health \s-1OID\s0 is also probed. This -gives an added security against bugs in the plugin. The plugin will -produce an special error message in cases where 1) the global status -is not \s-1OK\s0, and 2) a hardware error has not been detected by the rest -of the plugin. -.Sp -If used with omreport, i.e. via \s-1NRPE\s0 or similar, the output from -\&\f(CW\*(C`omreport system\*(C'\fR is used to find the global chassis health. Note -that storage health is excluded. Not as good as with \s-1SNMP\s0, but it -still means added security against plugin bugs. -.Sp -This option negates the \f(CW\*(C`\-\-check\*(C'\fR option described below, for all -checks but the esmlog and alertlog. If used with alternate basenames, -the option has no effect. -.IP "\-p, \-\-perfdata [\fImultline\fR]" 4 -.IX Item "-p, --perfdata [multline]" -Collect performance data. Performance data collected include -temperatures (in Celcius) and fan speeds (in rpm). On systems that -support it, power consumption is also collected (in Watts). -.Sp -If given the argument \f(CW\*(C`multiline\*(C'\fR, the plugin will output the -performance data on multiple lines, for Nagios 3.x and above. -.IP "\-w, \-\-warning \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 -.IX Item "-w, --warning STRING or FILE" -Override the machine-default temperature warning thresholds. Syntax is -\&\f(CW\*(C`id1=max[/min],id2=max[/min],...\*(C'\fR. The following example sets warning -limits to max 50C for probe 0, and max 45C and min 10C for probe 1: -.Sp -check_openmanage \-w 0=50,1=45/10 -.Sp -The minimum limit can be omitted, if desired. Most often, you are only -interested in setting the maximum thresholds. -.Sp -This parameter can be either a string with the limits, or a file -containing the limits string. The option can be specified multiple -times. -.IP "\-c, \-\-critical \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 -.IX Item "-c, --critical STRING or FILE" -Override the machine-default temperature critical thresholds. Syntax -and behaviour is the same as for warning thresholds described above. -.IP "\-o, \-\-ok\-info \fI\s-1NUMBER\s0\fR" 4 -.IX Item "-o, --ok-info NUMBER" -This option lets you define how much output you want the plugin to -give when everything is \s-1OK\s0, i.e. the verbosity level. The default -value is 0 (one line of output). The output levels are cumulative. -.RS 4 -.IP "\fB0\fR" 4 -.IX Item "0" -\&\- Only one line (default) -.IP "\fB1\fR" 4 -.IX Item "1" -\&\- \s-1BIOS\s0 and firmware info on a separate line -.IP "\fB2\fR" 4 -.IX Item "2" -\&\- Storage controller and enclosure info on separate lines -.IP "\fB3\fR" 4 -.IX Item "3" -\&\- \s-1OMSA\s0 version on separate line -.RE -.RS 4 -.Sp -The reason that \s-1OMSA\s0 version is separated from the rest is that -finding it requires running a really slow omreport command, when the -plugin is run locally via \s-1NRPE\s0. -.RE -.IP "\-i, \-\-info" 4 -.IX Item "-i, --info" -Prefix any alerts with the service tag. -.IP "\-e, \-\-extinfo" 4 -.IX Item "-e, --extinfo" -Display a short summary of system information (model and service tag) -in case of an alert. -.IP "\-\-postmsg \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 -.IX Item "--postmsg STRING or FILE" -User specified post message. Useful for displaying arbitrary or -various system information at the end of alerts. The argument is -either a string with the message, or a file containing that -string. You can control the format with the following interpreted -sequences: -.RS 4 -.IP "\fB%m\fR" 4 -.IX Item "%m" -System model -.IP "\fB%s\fR" 4 -.IX Item "%s" -Service tag -.IP "\fB%b\fR" 4 -.IX Item "%b" -\&\s-1BIOS\s0 version -.IP "\fB%d\fR" 4 -.IX Item "%d" -\&\s-1BIOS\s0 release date -.IP "\fB%o\fR" 4 -.IX Item "%o" -Operating system name -.IP "\fB%r\fR" 4 -.IX Item "%r" -Operating system release -.IP "\fB%p\fR" 4 -.IX Item "%p" -Number of physical drives -.IP "\fB%l\fR" 4 -.IX Item "%l" -Number of logical drives -.IP "\fB%n\fR" 4 -.IX Item "%n" -Line break. Will be a regular line break if run from a \s-1TTY\s0, else an -\&\s-1HTML\s0 line break. -.IP "\fB%%\fR" 4 -.IX Item "%%" -A literal \f(CW\*(C`%\*(C'\fR -.RE -.RS 4 -.RE -.IP "\-\-state" 4 -.IX Item "--state" -Prefix each alert with its corresponding service state (i.e. warning, -critical etc.). This is useful in case of several alerts from the same -monitored system. -.IP "\-\-short\-state" 4 -.IX Item "--short-state" -Same as the \fB\-\-state\fR option above, except that the state is -abbreviated to a single letter (W=warning, C=critical etc.). -.IP "\-\-linebreak=\fI\s-1STRING\s0\fR" 4 -.IX Item "--linebreak=STRING" -check_openmanage will sometimes report more than one line, e.g. if -there are several alerts. If the script has a \s-1TTY\s0, it will use regular -linebreaks. If not (which is the case with \s-1NRPE\s0) it will use \s-1HTML\s0 -linebreaks. Sometimes it can be useful to control what the plugin uses -as a line separator, and this option provides that control. -.Sp -The argument is the exact string to be used as the line -separator. There are two exceptions, i.e. two keywords that translates -to the following: -.RS 4 -.IP "\fB\s-1REG\s0\fR" 4 -.IX Item "REG" -Regular linebreaks, i.e. \*(L"\en\*(R". -.IP "\fB\s-1HTML\s0\fR" 4 -.IX Item "HTML" -\&\s-1HTML\s0 linebreaks, i.e. \*(L"<br/>\*(R". -.RE -.RS 4 -.Sp -This is a rather special option that is normally not needed. The -default behaviour should be sufficient for most users. -.RE -.IP "\-v, \-\-verbose" 4 -.IX Item "-v, --verbose" -Verbose output. Will report status on everything, even if status is -ok. Blacklisted or unchecked components are ignored (i.e. no output). -.IP "\-h, \-\-help" 4 -.IX Item "-h, --help" -Display help text. -.IP "\-m, \-\-man" 4 -.IX Item "-m, --man" -Display man page. -.IP "\-V, \-\-version" 4 -.IX Item "-V, --version" -Display version info. -.RE -.RS 4 -.RE -.Sh "\s-1SNMP\s0 Options" -.IX Subsection "SNMP Options" -.RS 4 -.IP "\-s, \-\-snmp" 4 -.IX Item "-s, --snmp" -Trigger \s-1SNMP\s0 mode. -.IP "\-H, \-\-hostname \fI\s-1HOSTNAME\s0\fR" 4 -.IX Item "-H, --hostname HOSTNAME" -The transport address of the destination \s-1SNMP\s0 device. This argument -is optional and defaults to \f(CW\*(C`localhost\*(C'\fR. -.IP "\-P, \-\-protocol \fI\s-1PROTOCOL\s0\fR" 4 -.IX Item "-P, --protocol PROTOCOL" -\&\s-1SNMP\s0 protocol version. This option is optional and expects a digit -(i.e. \f(CW1\fR, \f(CW2\fR or \f(CW3\fR) to define the \s-1SNMP\s0 version. The default is -\&\f(CW2\fR, i.e. \s-1SNMP\s0 version 2c. -.IP "\-C, \-\-community \fI\s-1COMMUNITY\s0\fR" 4 -.IX Item "-C, --community COMMUNITY" -This option expects a string that is to be used as the \s-1SNMP\s0 community -name when using \s-1SNMP\s0 version 1 or 2c. By default the community name -is set to \f(CW\*(C`public\*(C'\fR if the option is not present. -.IP "\-\-port \fI\s-1PORT\s0\fR" 4 -.IX Item "--port PORT" -\&\s-1SNMP\s0 port of the remote (monitored) system. Defaults to the well-known -\&\s-1SNMP\s0 port 161. -.IP "\-U, \-\-username \fI\s-1SECURITYNAME\s0\fR" 4 -.IX Item "-U, --username SECURITYNAME" -[SNMPv3] The User-based Security Model (\s-1USM\s0) used by SNMPv3 requires -that a securityName be specified. This option is required when using -\&\s-1SNMP\s0 version 3, and expects a string 1 to 32 octets in lenght. -.IP "\-\-authpassword \fI\s-1PASSWORD\s0\fR, \-\-authkey \fI\s-1KEY\s0\fR" 4 -.IX Item "--authpassword PASSWORD, --authkey KEY" -[SNMPv3] By default a securityLevel of \f(CW\*(C`noAuthNoPriv\*(C'\fR is assumed. If -the \-\-authpassword option is specified, the securityLevel becomes -\&\f(CW\*(C`authNoPriv\*(C'\fR. The \-\-authpassword option expects a string which is at -least 1 octet in length as argument. -.Sp -Optionally, instead of the \-\-authpassword option, the \-\-authkey option -can be used so that a plain text password does not have to be -specified in a script. The \-\-authkey option expects a hexadecimal -string produced by localizing the password with the -authoritativeEngineID for the specific destination device. The -\&\f(CW\*(C`snmpkey\*(C'\fR utility included with the Net::SNMP distribution can be -used to create the hexadecimal string (see snmpkey). -.IP "\-\-authprotocol \fI\s-1ALGORITHM\s0\fR" 4 -.IX Item "--authprotocol ALGORITHM" -[SNMPv3] Two different hash algorithms are defined by SNMPv3 which can -be used by the Security Model for authentication. These algorithms are -\&\s-1HMAC\-MD5\-96\s0 \f(CW\*(C`MD5\*(C'\fR (\s-1RFC\s0 1321) and \s-1HMAC\-SHA\-96\s0 \f(CW\*(C`SHA\-1\*(C'\fR (\s-1NIST\s0 \s-1FIPS\s0 \s-1PUB\s0 -180\-1). The default algorithm used by the plugin is \s-1HMAC\-MD5\-96\s0. This -behavior can be changed by using this option. The option expects -either the string \f(CW\*(C`md5\*(C'\fR or \f(CW\*(C`sha\*(C'\fR to be passed as argument to modify -the hash algorithm. -.IP "\-\-privpassword \fI\s-1PASSWORD\s0\fR, \-\-privkey \fI\s-1KEY\s0\fR" 4 -.IX Item "--privpassword PASSWORD, --privkey KEY" -[SNMPv3] By specifying the options \-\-privkey or \-\-privpassword, the -securityLevel associated with the object becomes -\&\f(CW\*(C`authPriv\*(C'\fR. According to SNMPv3, privacy requires the use of -authentication. Therefore, if either of these two options are present -and the \-\-authkey or \-\-authpassword arguments are missing, the -creation of the object fails. The \-\-privkey and \-\-privpassword -options expect the same input as the \-\-authkey and \-\-authpassword -options respectively. -.IP "\-\-privprotocol \fI\s-1ALGORITHM\s0\fR" 4 -.IX Item "--privprotocol ALGORITHM" -[SNMPv3] The User-based Security Model described in \s-1RFC\s0 3414 defines a -single encryption protocol to be used for privacy. This protocol, -CBC-DES \f(CW\*(C`DES\*(C'\fR (\s-1NIST\s0 \s-1FIPS\s0 \s-1PUB\s0 46\-1), is used by default or if the -string \f(CW\*(C`des\*(C'\fR is passed to the \-\-privprotocol option. The Net::SNMP -module also supports \s-1RFC\s0 3826 which describes the use of -\&\s-1CFB128\-AES\-128\s0 \f(CW\*(C`AES\*(C'\fR (\s-1NIST\s0 \s-1FIPS\s0 \s-1PUB\s0 197) in the \s-1USM\s0. The \s-1AES\s0 -encryption protocol can be selected by passing \f(CW\*(C`aes\*(C'\fR or \f(CW\*(C`aes128\*(C'\fR to -the \-\-privprotocol option. -.Sp -One of the following arguments are required: des, aes, aes128, 3des, -3desde -.RE -.RS 4 -.RE -.Sh "Blacklisting" -.IX Subsection "Blacklisting" -.RS 4 -.IP "\-b, \-\-blacklist \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 -.IX Item "-b, --blacklist STRING or FILE" -Blacklist missing and/or failed components, if you do not plan to fix -them. The parameter is either the blacklist string, or a file (that -may or may not exist) containing the string. The blacklist string -contains component names with component IDs separated by slash -(/). Blacklisted components are left unchecked. -.Sp -\&\s-1TIP:\s0 Use the option \f(CW\*(C`\-v\*(C'\fR (or \f(CW\*(C`\-\-verbose\*(C'\fR) to get the blacklist \s-1ID\s0 for -devices. The \s-1ID\s0 is listed in a separate column in the verbose output. -.RS 4 -.IP "\fBSyntax:\fR" 9 -.IX Item "Syntax:" -component1=id1[,id2,...]/component2=id1[,id2,...]/... -.IP "\fBExample:\fR" 9 -.IX Item "Example:" -check_openmanage \-b ps=0/fan=3,5/pdisk=1:0:0:1 -.RE -.RS 4 -.Sp -In the example we blacklist powersupply 0, fans 3 and 5, and -physical disk 1:0:0:1. Legal component names include: -.IP "\fBctrl\fR" 8 -.IX Item "ctrl" -Controller -.IP "\fBctrl_fw\fR" 8 -.IX Item "ctrl_fw" -Suppress the special warning message about old controller -firmware. Use this if you can not or will not upgrade the firmware. -.IP "\fBctrl_driver\fR" 8 -.IX Item "ctrl_driver" -Suppress the special warning message about old controller driver. -Particularly useful on systems where you can not upgrade the driver. -.IP "\fBpdisk\fR" 8 -.IX Item "pdisk" -Physical disk. -.IP "\fBvdisk\fR" 8 -.IX Item "vdisk" -Logical drive (virtual disk) -.IP "\fBbat\fR" 8 -.IX Item "bat" -Controller cache battery -.IP "\fBconn\fR" 8 -.IX Item "conn" -Connector (channel) -.IP "\fBencl\fR" 8 -.IX Item "encl" -Enclosure -.IP "\fBencl_fan\fR" 8 -.IX Item "encl_fan" -Enclosure fan -.IP "\fBencl_ps\fR" 8 -.IX Item "encl_ps" -Enclosure power supply -.IP "\fBencl_temp\fR" 8 -.IX Item "encl_temp" -Enclosure temperature probe -.IP "\fBencl_emm\fR" 8 -.IX Item "encl_emm" -Enclosure management module (\s-1EMM\s0) -.IP "\fBdimm\fR" 8 -.IX Item "dimm" -Memory module -.IP "\fBfan\fR" 8 -.IX Item "fan" -Fan -.IP "\fBps\fR" 8 -.IX Item "ps" -Powersupply -.IP "\fBtemp\fR" 8 -.IX Item "temp" -Temperature sensor -.IP "\fBcpu\fR" 8 -.IX Item "cpu" -Processor (\s-1CPU\s0) -.IP "\fBvolt\fR" 8 -.IX Item "volt" -Voltage probe -.IP "\fBbp\fR" 8 -.IX Item "bp" -System battery -.IP "\fBpm\fR" 8 -.IX Item "pm" -Amperage probe (power consumption monitoring) -.IP "\fBintr\fR" 8 -.IX Item "intr" -Intrusion sensor -.RE -.RS 4 -.RE -.RE -.RS 4 -.RE -.Sh "Check Control" -.IX Subsection "Check Control" -.RS 4 -.IP "\-\-check \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 -.IX Item "--check STRING or FILE" -This parameter allows you to adjust which components that should be -checked at all. This is a rougher approach than blacklisting, which -require that you specify component id or index. The parameter should -be either a string containing the adjustments, or a file containing -the string. No errors are raised if the file does not exist. -.Sp -Note: This option is ignored with alternate basenames. -.RS 4 -.IP "\fBExample:\fR" 9 -.IX Item "Example:" -check_openmanage \-\-check storage=0,intrusion=1 -.RE -.RS 4 -.Sp -Legal values are described below, along with the default value. -.IP "\fBstorage\fR" 4 -.IX Item "storage" -Check storage subsystem (controllers, disks etc.). Default: \s-1ON\s0 -.IP "\fBmemory\fR" 4 -.IX Item "memory" -Check memory (dimms). Default: \s-1ON\s0 -.IP "\fBfans\fR" 4 -.IX Item "fans" -Check chassis fans. Default: \s-1ON\s0 -.IP "\fBpower\fR" 4 -.IX Item "power" -Check power supplies. Default: \s-1ON\s0 -.IP "\fBtemperature\fR" 4 -.IX Item "temperature" -Check temperature sensors. Default: \s-1ON\s0 -.IP "\fBcpu\fR" 4 -.IX Item "cpu" -Check CPUs. Default: \s-1ON\s0 -.IP "\fBvoltage\fR" 4 -.IX Item "voltage" -Check voltage sensors. Default: \s-1ON\s0 -.IP "\fBbatteries\fR" 4 -.IX Item "batteries" -Check system batteries. Default: \s-1ON\s0 -.IP "\fBpwrmonitor\fR" 4 -.IX Item "pwrmonitor" -Check power consumption monitoring. Default: \s-1ON\s0 -.IP "\fBintrusion\fR" 4 -.IX Item "intrusion" -Check chassis intrusion. Default: \s-1OFF\s0 -.IP "\fBesmhealth\fR" 4 -.IX Item "esmhealth" -Check the \s-1ESM\s0 log health, i.e. fill grade. Default: \s-1ON\s0 -.IP "\fBesmlog\fR" 4 -.IX Item "esmlog" -Check the \s-1ESM\s0 log content. Default: \s-1OFF\s0 -.IP "\fBalertlog\fR" 4 -.IX Item "alertlog" -Check the alert log content. Default: \s-1OFF\s0 -.RE -.RS 4 -.RE -.RE -.RS 4 -.RE -.SH "DIAGNOSTICS" -.IX Header "DIAGNOSTICS" -The option \f(CW\*(C`\-\-verbose\*(C'\fR (or \f(CW\*(C`\-v\*(C'\fR) can be specified to display all -monitored components. -.SH "DEPENDENCIES" -.IX Header "DEPENDENCIES" -If \s-1SNMP\s0 is requested, the perl module Net::SNMP is -required. Otherwise, only a regular perl distribution is required to -run the script. On the target (monitored) system, Dell Openmanage -Server Administrator (\s-1OMSA\s0) must be installed and running. -.SH "EXIT STATUS" -.IX Header "EXIT STATUS" -If no errors are discovered, a value of 0 (\s-1OK\s0) is returned. An exit -value of 1 (\s-1WARNING\s0) signifies one or more non-critical errors, while -2 (\s-1CRITICAL\s0) signifies one or more critical errors. -.PP -The exit value 3 (\s-1UNKNOWN\s0) is reserved for errors within the script, -or errors getting values from Dell \s-1OMSA\s0. -.SH "AUTHOR" -.IX Header "AUTHOR" -Written by Trond H. Amundsen <t.h.amundsen@usit.uio.no> -.SH "BUGS AND LIMITATIONS" -.IX Header "BUGS AND LIMITATIONS" -Storage info is not collected or checked on very old PowerEdge models -and/or old \s-1OMSA\s0 versions, due to limitations in \s-1OMSA\s0. The overall -support on those models/versions by this plugin is not well tested. -.SH "INCOMPATIBILITIES" -.IX Header "INCOMPATIBILITIES" -The plugin does not work with the Nagios embedded perl interpreter -(ePN). You should specify \f(CW\*(C`perl /path/to/check_openmanage\*(C'\fR in your -Nagios config if you have ePN enabled. -.SH "REPORTING BUGS" -.IX Header "REPORTING BUGS" -Report bugs to <t.h.amundsen@usit.uio.no> -.SH "LICENSE AND COPYRIGHT" -.IX Header "LICENSE AND COPYRIGHT" -This nagios plugin comes with \s-1ABSOLUTELY\s0 \s-1NO\s0 \s-1WARRANTY\s0. -You may redistribute copies of this plugin under the terms of -the \s-1GNU\s0 General Public License <http://www.gnu.org/licenses/gpl.html>. -.SH "SEE ALSO" -.IX Header "SEE ALSO" -<http://folk.uio.no/trondham/software/check_openmanage.html> | ||
Deleted | check_openmanage-3.4.7.tar.gz ^ | |
Deleted | check_openmanage-3.4.9.tar.gz ^ | |
Deleted | check_openmanage-3.5.0.tar.gz ^ | |
Deleted | check_openmanage-3.5.1.tar.gz ^ | |
Deleted | check_openmanage-3.5.2.tar.gz ^ | |
Deleted | check_openmanage-3.5.3.tar.gz ^ | |
[+] | Added | check_openmanage-3.5.5.tar.gz/CHANGES ^ |
@@ -0,0 +1,497 @@ +3.5.5 2010-01-22 +------------------ + +* Fixed an SNMP bug where the plugin didn't handle OID indexes that + were not sequential. Thanks to Gianluca Varenni for reporting. +* Fixed an SNMP bug when checking old hardware such as the PE 2650 and + PE 750. The controller id for physical drives were collected and + displayed incorrectly. This release uses an additional OID to fetch + this info, which would otherwise be unavailable. Thanks to Gianluca + Varenni for reporting this bug. +* Should use %snmp_probestatus, not %snmp_status when checking the + status of voltage probes. Thanks to Ken McKinlay for a patch. +* Fix when identifying blades via SNMP with very old OMSA + versions. Patch from Ken McKinlay. +* Better way of finding the ID of physical drives via SNMP + + +3.5.4 2010-01-13 +------------------ + +* Added support for storport driver version for controllers, only + applicable on servers running Windows. A new blacklisting keyword + for suppressing storport driver messages was added. +* The "all" keyword in blacklisting is now case insensitive. +* More fine-grained reporting in the rare case where a controller + battery fails during learning and charging states. +* New improved way of reporting perl warnings during execution of the + plugin. + + +3.5.3 2009-12-17 +------------------ + +* Fix for path to omreport on Linux with OMSA 6.2.0 +* A couple of other small fixes + + +3.5.2 2009-11-17 +------------------ + +* Fix for undefined device name for logical drives (thanks to Pontus + Fuchs for a patch) +* Fixed a bug in the PNP4Nagios template, that prevented the template + from working with PNP4Nagios 0.6. Thanks to the PNP4Nagios team for + the fix. +* Other small fixes + +3.5.1 2009-10-22 +------------------ + +* CPU type, family etc. are now reported in case of a CPU failure (and + in the debug output) +* The debug output now reports Openmanage version and plugin version + + +3.5.0 2009-10-13 +------------------ + +* New option '-a' or '--all' turns on checking of everything +* The manual page (POD info) is removed from the script and is now in + a separate file, to make check_openmanage fully ePN compatible +* ePN is no longer disabled by default, check_openmanage no longer has + an opinion on whether it should run under ePN or not +* The '-m' or '--man' option is no longer available +* The option '-v' or '--verbose' is renamed to '-d' or '--debug', + which makes more sense wrt. its usage +* The '-g' or '--global' option is removed. Checking the global health + status is now default if applicable +* Checking intrusion detection is now turned on by default +* The obsolete option '--snmp' is removed +* The option '--state' now has a shorter equivalent '-s' +* The basename stuff and options '--only-critical' and + '--only-warning' are now replaced by an option '--only' +* If plugin is run by Nagios, redirect stderr to stdout +* Added option '--omreport', that lets the user specify the full path + to the omreport binary +* Added non-8bit-legacy default search paths for omreport.exe for + Windows boxen +* Minor changes to the plugin output, for consistency +* New blacklisting keyword 'bat_charge' disables warning messages + related to controller cache battery charging. Thanks to Robert + Heinzmann for a patch. +* For blacklisting, the component ID kan now be "ALL", in which all + components of that type is blacklisted. +* Man page is moved to manual section 8 + + +3.4.9 2009-08-07 +------------------ + +* Fixed a bug that could cause errors and weird results when checking + cooling devices (fans) via SNMP. Thanks to Ken McKinlay for spotting + this bug and reporting it. + +3.4.8 2009-07-31 +------------------ + +* For failed physical drives, check_openmanage will now output the + drive's vendor, model and size in GB or TB. + +3.4.7 2009-07-24 +------------------ + +* The -s|--snmp option was redundant and no longer does anything. SNMP + is triggered automatically if the -H|--hostname option is + present. The -s|--snmp option is kept for compatibility, but has no + effect. + +3.4.6 2009-07-07 +------------------ + +* Added support for performance data (temperatures) from attached + storage enclosures such as the MD1000 + +3.4.5 2009-06-22 +------------------ + +* Fixed a regression in the --htmlinfo option when it is not supplied + with an argument + +3.4.4 2009-06-22 +------------------ + +* New option --htmlinfo adds clickable HTML links in the plugin's + output + +3.4.3 2009-06-11 +------------------ + +* Fixed a regression bug in CPU and power supply reporting that only + affects verbose output +* If blacklisting is used, the global health check (via the --global + option) is now negated. Checking the global health doesn't make + sense when one or more components is blacklisted. Thanks to Rene + Beaulieu for reporting this bug +* The PNP4Nagios template is now included in the tarball and zip + archive + +3.4.2 2009-06-03 +------------------ + +* Improved memory error reporting, when using omreport +* Collect performance data from pwrmonitoring (amperage probes) that + were previously ignored when using omreport + +3.4.1 2009-05-25 +------------------ + +* Improved memory error reporting, when using SNMP +* Other small ehnancements + +3.4.0 2009-05-25 +------------------ + +* The plugin is now compatible with the Nagios embedded Perl + interpreter (ePN) in theory. However, the plugin will not not use + ePN by default. We don't want any "accidents". +* License is now GPLv3, previously only specified as "GPL" +* New options --only-critical and --only-warning. With these options + the plugin will only print critical or warning alerts, respectively. +* Bugfixes and speed enhancements in the storage section, when + checking enclosure components via omreport +* The --okinfo option is now less verbose and more to the point +* Lots of code refactoring for readability, maintainability and + robustness + +3.3.2 2009-05-05 +------------------ + +* Fixed a bug in the storage section, when checking controllers. This + is an obscure bug that only manifests itself in the odd case where a + server has multiple controllers, and one of the controllers are + missing some of the OIDs, in which case these OIDs will be missing + for the other controllers as well. The change is minor and only + includes using get_table() instead of get_entries() to collect the + SNMP result. Thanks to Stephan Bovet for reporting this bug. + +3.3.1 2009-04-28 +------------------ + +* The --perfdata option can now optionally take an argument + "multiline", which makes the plugin produce multiline performance + data output in a Nagios 3.x way. Not really needed, but the plugin + output is prettier. +* Added comment within the 10 first lines to disable the nagios + embedded perl (ePN) interpreter by default for Nagios 3.x +* Improvements in the performance data output. Units are now included + +3.3.0 2009-04-07 +------------------ + +* Added --global option, which turns on checking of everything. If + used with SNMP, the global system health status is also probed, to + protect the user against bugs in the plugin. If used with omreport, + the overall chassis health is used. +* Support for SNMP version 3 +* New check added: esmhealth. This checks the overall health of the + ESM log, i.e. the fill grade. More than 80% means a warning message +* Fixed alert log reporting to use the same format as for the ESM log +* Output messages are now sorted by severity +* Minor changes in how out-of-date controller firmware/driver is + reported +* Code refactoring and cleanup + +3.2.7 2009-03-29 +------------------ + +* Use "omreport about" to collect OMSA version. Slightly faster than + "omreport system version". This should give a small speedup in + certain configurations +* Fixed typo in output when a logical drive is rebuilding. Thanks to + Andreas Olsson for reporting +* Improved reporting of ESM log content +* Added omreport.sh as alternate omreport path +* Lots of other small fixes and enhancements + +Plus: A few changes to make the plugin work with old PowerEdge models +(e.g. 2550, 2450) and/or old OMSA versions (e.g. version 4.5): + +* Use the chassisModelName OID to determine if SNMP works (instead of + BaseboardType) +* No longer require a response when checking baseboard type via + SNMP. If there is no response, we assume that we're not dealing with + a blade server + +Thanks to Christian McHugh for help with testing and debugging this +stuff + +3.2.6 2009-03-05 +------------------ + +* Use 'omreport system operatingsystem' to collect OS info, instead of + 'omreport system' version which is incredibly slow. This should speed + things up in certain configurations. +* A few speedups, don't collect information that isn't needed +* Man page fixes + +3.2.5 2009-02-24 +------------------ + +* New option --linebreak to specify the separator between line in case + of multiline output +* Added support for 64bit Windows. Thanks to Patrick Hemmen for a patch +* [Patrick Hemmen] Added install.bat for Windows installation +* [Patrick Hemmen] Improvements on install.sh. Will now install in + /usr/lib64 for x86_64 +* RPMs are now architecture dependent, because of different libdir + +3.2.4 2009-02-17 +------------------ + +* New option -o|--ok-info to display extra information when everything + is ok. The plugin can now display storage firmware and driver info, + DRAC and BMC firmware, and OMSA version +* Support for setting custom minimum temperature thresholds via the + -c|--critical and -w|--warning options +* Better and more detailed temperature error reporting +* Bugfix in the amperage report (including performance data). The + plugin now takes into account the correct unit and measurement for + amperage probes (other than watts) +* New option --port lets the user specify the remote SNMP port number + +3.2.3 2009-02-09 +------------------ + +* Regression fix: Use the older Processor Device SNMP OIDs for older + PowerEdge models, that don't have the new Processor Device Status + OIDs. Thanks to Nicole Hähnel for reporting this bug. +* Default output (when there are no alerts) now shows RAC firmware, + BMC firmware, info about controllers and enclosures (firmware, + driver). + +3.2.2 2009-02-03 +------------------ + +* Regression fix: Ignore unoccupied CPU slots with SNMP probing. This + fixes a bug introduced in versjon 3.2.1, which would output + something like this if one or more CPU slots were empty: CPU 1 needs + attention () + +3.2.1 2009-02-03 +------------------ + +* Use Processor Device Status Table OIDs instead of Processor Device + Table when checking CPUs via SNMP +* Bugfix: don't report throttled CPUs as warnings when checking via + SNMP (same as for checking locally) + +3.2.0 2009-01-27 +------------------ + +* New options --state and --short-state for displaying service state + along with the alert +* Lots of small fixes for code readability and maintainability + +3.1.1 2009-01-12 +------------------ + +* Support for running natively on Windows (using omreport.exe). Thanks + to Peter Jestico for a patch. +* Support for compiled Windows version, i.e. check_openmanage.exe is + now a legal script name. +* Exit with error if script basename is illegal/unknown +* Various small fixes + +3.1.0 2008-12-26 +------------------ + +* Use of alternate basenames for checking only one class of components +* Added support for checking the ESM log via SNMP +* Code refactoring for robustness and maintainability +* Numerous small fixes and enhancements +* Added install script in distribution tarball + +3.0.2 2008-12-20 +------------------ + +* The script no longer aborts if it can't get system information via + SNMP. Give a warning instead, as this is not a critical error +* Increased robustness when checking controllers + +3.0.1 2008-12-11 +------------------ + +* Man page fix in the 'check' section. Thanks to Ansgar Dahlen for + reporting this. +* Allow invalid command error from 'omreport chassis pwrmonitoring' +* Various small fixes + +3.0.0 2008-12-04 +------------------ + +* Use unique IDs for storage components with regard to blacklisting, + which means that the blacklisting API has changed +* Added checks for storage components: connectors (channels), + enclosures, enclosure fans, enclosure power supplies, enclosure + temperature probes and enclosure management modules (EMMs) +* Improved verbose output +* New option -t|--timeout for setting the plugin timeout +* New option -w|--warning for setting custom temperature warning + thresholds +* New option -c|--critical for setting custom temperature critical + thresholds +* Option --check can no longer be specified in its short form (-c) +* Code cleanup and improvements + +2.1.1 2008-11-24 +------------------ + +* The workaround for the OMSA bug introduced in OMSA 5.5.0 didn't take + multiple controllers into account. This has been fixed. + +2.1.0 2008-11-19 +------------------ + +* New option -i|--info prefixes all alerts with the service tag +* New option -e|--extinfo gives and extra line of output in case of an + alert (model and service tag) +* New option --postmsg lets the user specify a post message string, + with info such as model, service tag etc. +* Options -b|--blacklist and -c|--check can now be specified multiple + times (actually quite useful) + +2.0.9 2008-11-17 +------------------ + +* Slightly improved output for alerts on logical drives (vdisks) +* Now shows a rebuilding physical disk as a warning, as this is + usually accompanied by a degraded vdisk. Previous versions didn't + show this at all (omreport classifies it as "OK"). + +2.0.8 2008-11-14 +------------------ + +* Slightly improved output for charging controller batteries + +2.0.7 2008-11-12 +------------------ + +* Bugfix for reporting physical drives with predictive failure (both + via NRPE and SNMP) + +2.0.6 2008-10-30 +------------------ + +* Fix bug in option handling (ambiguous options) +* Slightly improved output if checking the storage subsystem is turned + off +* Don't complain if there are no logical drives. This is OK. Thanks to + Jamie Henderson for reporting this + +2.0.5 2008-10-29 +------------------ + +* Fix bug in SNMP status level table + +2.0.4 2008-10-29 +------------------ + +* Added workaround for a BUG introduced in OpenManage 5.5.0. OM + sometimes adds a newline in the controller driver version name, + which leads to problems parsing the output. Thanks to Hiren Patel + for bringing this to my attention. + +2.0.3 2008-10-28 +------------------ + +* (snmp) Improved handling of cases where OM is not working properly + +2.0.2 2008-10-27 +------------------ + +* Fixed issue where controller number for physical disks can't be + established via SNMP (now identifies as controller no. -1) + +2.0.1 2008-10-23 +------------------ + +* Correctly identifies and reports error condition in which OpenManage + has stopped working (it happens) + +2.0.0 2008-10-23 +------------------ + +* SNMP support +* Same options for checking, blacklisting etc. supported with SNMP +* Same output with SNMP as with NRPE + +1.2.1 2008-09-25 +------------------ + +* Collects performance data if the option '-p' or '--perfdata' is + supplied. +* New blacklisting directives ctrl_fw and ctrl_driver + added. Suppresses the "special" warning messages concerning outdated + controller firmware and driver. Useful if you can't or won't + upgrade. + +1.1.2 2008-08-06 +------------------ + +* Fix getting system model and serial number for newer blades + +1.1.1 2008-08-06 +------------------ + +* Three new checks added: + - System battery probes (typical CMOS battery). Newer poweredge + models have these + - Power consumption monitoring (if the server supports it) + - ESM log, with same functionality as the alert log check. Disabled + by default. + +1.1.0 2008-08-04 +------------------ + +* Internal refactoring: use ssv-formatted output from openmanage, + resulting in slightly faster execution and increased robustness. +* If /usr/bin/omreport doesn't exist, try + /opt/dell/srvadmin/oma/bin/omreport. +* Allow for no instrumented/redundant power supplies. Needed on + low-end poweredge models and blades. + +1.0.3 2008-07-25 +------------------ + +* Openmanage reports non-critical warning about throttled CPUs on new + hardware models. Most og us use ondemand CPU frequency scaling (with + throttled CPUs as a result). This specific non-critical warning (CPU + Throttled) is ignored from now on. +* Remove superfluous Celcius sign when reporting temperatures. + +1.0.2 2008-07-25 +------------------ + +* Accommodate blade systems with no fans or powersupplies, i.e. accept + errors from omreport when trying to check fans or powersupplies on + blade servers. +* Accommodate newer hardware with slightly different omreport + options. Use the newer options if they exist. Not really necessary + yet, but deprecated options may be removed in future versions of + Dell OpenManage. + +1.0.1 2008-07-18 +------------------ + +* When everything is OK, check_openmanage now outputs the same info as + Gerhard Lausser's excellent check_hpasm plugin does for HP servers: + + OK - System: 'poweredge 2850', S/N: 'XXXXXXX', ROM: 'A06 + 10/03/2006', hardware working fine, 2 logical drives, 4 physical + drives + +1.0.0 2008-07-15 +------------------ + +* Initial release | ||
[+] | Added | check_openmanage-3.5.5.tar.gz/COPYING ^ |
@@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. | ||
[+] | Changed | check_openmanage-3.5.5.tar.gz/INSTALL ^ |
@@ -10,30 +10,11 @@ ------------------- To install check_openmanage, copy the file check_openmanage to the -Nagios plugin directory (usually /usr/lib/nagios/plugins/contrib): +Nagios plugin directory (usually /usr/lib/nagios/plugins): - cp check_openmanage /usr/lib/nagios/plugins/contrib - -To create the symlinks for single checks (alternate basenames): - - cd /usr/lib/nagios/plugins/contrib - ln -s check_openmanage check_openmanage_alertlog - ln -s check_openmanage check_openmanage_batteries - ln -s check_openmanage check_openmanage_cpu - ln -s check_openmanage check_openmanage_esmlog - ln -s check_openmanage check_openmanage_esmhealth - ln -s check_openmanage check_openmanage_fans - ln -s check_openmanage check_openmanage_intrusion - ln -s check_openmanage check_openmanage_memory - ln -s check_openmanage check_openmanage_power - ln -s check_openmanage check_openmanage_pwrmonitor - ln -s check_openmanage check_openmanage_storage - ln -s check_openmanage check_openmanage_temperature + cp check_openmanage /usr/lib/nagios/plugins To install the manual page: - cp check_openmanage.3pm.gz /usr/share/man/man3 - -This is all. + cp check_openmanage.8 /usr/share/man/man8 --trond | ||
[+] | Changed | check_openmanage-3.5.5.tar.gz/README ^ |
@@ -1,4 +1,83 @@ -Information avilable online: +README file for check_openmanage +================================ + +Files included in the tarball and zip archive: + + check_openmanage - The plugin + check_openmanage.exe - Win32 standalone executable + check_openmanage.8 - Man page + check_openmanage.pod - Man page source + check_openmanage.php - PNP4Nagios template + check_openmanage.spec - RPM spec file + COPYING - License information + INSTALL - Install HOWTO + install.bat - Windows install script + install.sh - Linux/UNIX install script + README - This file + + +About this program +------------------ + +check_openmanage is a plugin for Nagios which checks the hardware +health of Dell PowerEdge and some PowerVault servers. It uses the Dell +OpenManage Server Administrator (OMSA) software, which must be running +on the monitored system. check_openmanage can be used remotely with +SNMP or locally with NRPE, check_by_ssh or similar, whichever suits +your needs and particular taste. The plugin checks the health of the +storage subsystem, power supplies, memory modules, temperature probes +etc., and gives an alert if any of the components are faulty or +operate outside normal parameters. + +The plugin can give performance data with the '-p' or '--perfdata' +switch. Performance data collected include temperatures, fan speeds +and power usage (on servers that support it). + +Storage components checked: + +* Controllers +* Physical drives +* Logical drives +* Cache batteries +* Connectors (channels) +* Enclosures +* Enclosure fans +* Enclosure power supplies +* Enclosure temperature probes +* Enclosure management modules (EMMs) + +Chassis components checked: + +* Processors +* Memory modules +* Cooling fans +* Temperature probes +* Power supplies +* Batteries +* Voltage probes +* Power usage +* Chassis intrusion + +Other: + +* ESM Log health +* ESM Log content (default disabled) +* Alert Log (default disabled, not SNMP) + +check_openmanage will identify blades and will not report "missing" +power supplies, cooling fans etc. on blades. It will also accept that +other components are "missing", unless for components that should be +present in all servers. For example, all servers should have at least +one temperature probe, but not all servers have logical drives +(depends on the type and configuration of the controller). + +This nagios plugin is designed to be used by either NRPE or with +SNMP. It is written in perl. In NRPE mode, it uses omreport to display +status on various hardware components. In SNMP mode, it checks the +same components as with omreport. Output is parsed and reported in a +Nagios friendly way. + +More information avilable online: http://folk.uio.no/trondham/software/check_openmanage.html | ||
[+] | Changed | check_openmanage-3.5.5.tar.gz/check_openmanage ^ |
@@ -1,111 +1,164 @@ #!/usr/bin/perl # -# The following line disables the Nagios embedded Perl interpreter: -# nagios: -epn -# # Nagios plugin # # Monitor Dell server hardware status using Dell OpenManage Server # Administrator, either locally via NRPE, or remotely via SNMP. # -# $Id: check_openmanage 13896 2009-04-28 09:43:53Z trondham $ +# $Id: check_openmanage 16302 2010-01-22 10:09:58Z trondham $ +# +# Copyright (C) 2010 Trond H. Amundsen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. # +require 5.006; # Perl v5.6.0 or newer is required use strict; use warnings; -use POSIX qw(isatty); +use POSIX qw(isatty ceil); use Getopt::Long qw(:config no_ignore_case); -use Pod::Usage; -use File::Basename qw(basename); + +# Global (package) variables used throughout the code +use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL + $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE + $snmp_session $snmp_error $omreport $globalstatus $global + $linebreak $omopt_chassis $omopt_system $blade + $exit_code $snmp $original_sigwarn + %check %opt %perfdata %reverse_exitcode %status2nagios + %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo + %blacklist %nagios_alert_count %count + @perl_warnings @controllers @enclosures + @report_storage @report_chassis @report_other + ); #--------------------------------------------------------------------- # Initialization and global variables #--------------------------------------------------------------------- -# Version and similar info -my $NAME = 'check_openmanage'; -my $VERSION = '3.3.1'; -my $AUTHOR = 'Trond H. Amundsen'; -my $CONTACT = 't.h.amundsen@usit.uio.no'; - -# Exit codes -my $OK = 0; -my $WARNING = 1; -my $CRITICAL = 2; -my $UNKNOWN = 3; +# Small subroutine to collect any perl warnings during execution +sub collect_perl_warning { + push @perl_warnings, [@_]; +} -# SNMP session variables -my $snmp_session = undef; -my $snmp_error = undef; +# Set the WARN signal to use our collect subroutine above +$original_sigwarn = $SIG{__WARN__}; +$SIG{__WARN__} = \&collect_perl_warning; -# Firmware update lock file -my $firmware_lock = '/var/lock/.spsetup'; # FIXME: location on Windows? +# Version and similar info +$NAME = 'check_openmanage'; +$VERSION = '3.5.5'; +$AUTHOR = 'Trond H. Amundsen'; +$CONTACT = 't.h.amundsen@usit.uio.no'; -# The omreport command -my $omreport = undef; +# Exit codes +$E_OK = 0; +$E_WARNING = 1; +$E_CRITICAL = 2; +$E_UNKNOWN = 3; + +# Firmware update lock file [FIXME: location on Windows?] +$FW_LOCK = '/var/lock/.spsetup'; # default on Linux + +# Usage text +$USAGE = <<"END_USAGE"; +Usage: $NAME [OPTION]... +END_USAGE + +# Help text +$HELP = <<'END_HELP'; + +GENERAL OPTIONS: + + -p, --perfdata Output performance data + -t, --timeout Plugin timeout in seconds + -c, --critical Customise temperature critical limits + -w, --warning Customise temperature warning limits + -d, --debug Debug output, reports everything + -h, --help Display this help text + -V, --version Display version info + +SNMP OPTIONS: + + -H, --hostname Hostname or IP of the server (needed for SNMP) + -C, --community SNMP community string + -P, --protocol SNMP protocol version + --port SNMP port number + +OUTPUT OPTIONS: + + -i, --info Prefix any alerts with the service tag + -e, --extinfo Append system info to alerts + -s, --state Prefix alerts with alert state + --short-state Prefix alerts with alert state (abbreviated) + -o, --okinfo Verbosity when check result is OK + --htmlinfo HTML output with clickable links + +CHECK CONTROL AND BLACKLISTING: + + -a, --all Check everything, even log content + -b, --blacklist Blacklist missing and/or failed components + --only Only check a certain component or alert type + --check Fine-tune which components are checked + +For more information and advanced options, see the manual page or URL: + http://folk.uio.no/trondham/software/check_openmanage.html +END_HELP -# Check flags, override available with the --check option -my %check - = ( - 'storage' => 1, # check storage subsystem - 'memory' => 1, # check memory (dimms) - 'fans' => 1, # check fan status - 'power' => 1, # check power supplies - 'temperature' => 1, # check temperature - 'cpu' => 1, # check processors - 'voltage' => 1, # check voltage - 'batteries' => 1, # check battery probes - 'pwrmonitor' => 1, # check power consumption - 'intrusion' => 0, # check intrusion detection - 'alertlog' => 0, # check the alert log - 'esmlog' => 0, # check the ESM log (hardware log) - 'esmhealth' => 1, # check the ESM log overall health - ); +# Version and license text +$LICENSE = <<"END_LICENSE"; +$NAME $VERSION +Copyright (C) 2010 $AUTHOR +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. -# If we were called with alternate basename, check to see which -# component should be checked -my $self = basename($0); -my $component = undef; -if ($self =~ m/\A ${NAME}_(.+?)(\.exe)? \z/xms) { # matches "$NAME_foo" and "$NAME_foo.exe" - $component = $1; - if (!exists $check{$component}) { - print "CONFIGURATION ERROR: Unknown component '$component'. Check plugin filename\n"; - exit $UNKNOWN; - } -} +Written by $AUTHOR <$CONTACT> +END_LICENSE # Options with default values -my %opt = ( 'blacklist' => [], - 'check' => [], - 'critical' => [], - 'warning' => [], - 'timeout' => 30, # default timeout is 30 seconds - 'verbose' => 0, - 'help' => 0, - 'man' => 0, - 'perfdata' => undef, - 'info' => 0, - 'extinfo' => 0, - 'postmsg' => undef, - 'state' => 0, - 'short-state' => 0, - 'okinfo' => 0, # default "ok" output level - 'linebreak' => undef, - 'version' => 0, - 'global' => 0, - 'snmp' => 0, - 'port' => 161, # default port is the well-known SNMP port 161 - 'hostname' => 'localhost', - 'community' => 'public', # SMNP v1 or v2c - 'protocol' => 2, - 'username' => undef, # SMNP v3 - 'authpassword' => undef, # SMNP v3 - 'authkey' => undef, # SMNP v3 - 'authprotocol' => undef, # SMNP v3 - 'privpassword' => undef, # SMNP v3 - 'privkey' => undef, # SMNP v3 - 'privprotocol' => undef, # SMNP v3 - ); +%opt = ( 'blacklist' => [], + 'check' => [], + 'critical' => [], + 'warning' => [], + 'timeout' => 30, # default timeout is 30 seconds + 'debug' => 0, + 'help' => 0, + 'perfdata' => undef, + 'info' => 0, + 'extinfo' => 0, + 'htmlinfo' => undef, + 'postmsg' => undef, + 'state' => 0, + 'short-state' => 0, + 'okinfo' => 0, # default "ok" output level + 'linebreak' => undef, + 'version' => 0, + 'all' => 0, + 'only' => undef, + 'omreport' => undef, + 'port' => 161, # default SNMP port + 'hostname' => undef, + 'community' => 'public', # SMNP v1 or v2c + 'protocol' => 2, + 'username' => undef, # SMNP v3 + 'authpassword' => undef, # SMNP v3 + 'authkey' => undef, # SMNP v3 + 'authprotocol' => undef, # SMNP v3 + 'privpassword' => undef, # SMNP v3 + 'privkey' => undef, # SMNP v3 + 'privprotocol' => undef, # SMNP v3 + ); # Get options GetOptions('b|blacklist=s' => \@{ $opt{blacklist} }, @@ -113,20 +166,21 @@ 'c|critical=s' => \@{ $opt{critical} }, 'w|warning=s' => \@{ $opt{warning} }, 't|timeout=i' => \$opt{timeout}, - 'v|verbose' => \$opt{verbose}, + 'd|debug' => \$opt{debug}, 'h|help' => \$opt{help}, - 'm|man' => \$opt{man}, 'V|version' => \$opt{version}, 'p|perfdata:s' => \$opt{perfdata}, 'i|info' => \$opt{info}, 'e|extinfo' => \$opt{extinfo}, + 'htmlinfo:s' => \$opt{htmlinfo}, 'postmsg=s' => \$opt{postmsg}, - 'state' => \$opt{state}, + 's|state' => \$opt{state}, 'short-state' => \$opt{shortstate}, 'o|ok-info=i' => \$opt{okinfo}, 'l|linebreak=s' => \$opt{linebreak}, - 'g|global' => \$opt{global}, - 's|snmp' => \$opt{snmp}, + 'a|all' => \$opt{all}, + 'only=s' => \$opt{only}, + 'omreport=s' => \$opt{omreport}, 'port=i' => \$opt{port}, 'H|hostname=s' => \$opt{hostname}, 'C|community=s' => \$opt{community}, @@ -138,46 +192,55 @@ 'privpassword=s' => \$opt{privpassword}, 'privkey=s' => \$opt{privkey}, 'privprotocol=s' => \$opt{privprotocol}, - ) or pod2usage(-exitstatus => $UNKNOWN, -verbose => 0); + ) or do { print $USAGE; exit $E_UNKNOWN }; # If user requested help -if ($opt{'help'}) { - pod2usage(-exitstatus => $OK, -verbose => 1); -} - -# If user requested man page -if ($opt{'man'}) { - pod2usage(-exitstatus => $OK, -verbose => 2); +if ($opt{help}) { + print $USAGE, $HELP; + exit $E_OK; } # If user requested version info -if ($opt{'version'}) { - print <<"END_VERSION"; -$NAME $VERSION -This Nagios plugin comes with ABSOLUTELY NO WARRANTY. -You may redistribute copies of this plugin under the terms of -the GNU General Public License <http://www.gnu.org/licenses/gpl.html>. - -Written by $AUTHOR <$CONTACT> -END_VERSION - exit $OK; -} - -# If user has specified the '--global' option, which implies that -# everything should be checked. -if ($opt{global}) { - $check{intrusion} = 1; # turn on chassis intrusion check +if ($opt{version}) { + print $LICENSE; + exit $E_OK; } # Setting timeout $SIG{ALRM} = sub { - print "$NAME timed out after $opt{timeout} seconds\n"; - exit $UNKNOWN; + print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n"; + exit $E_UNKNOWN; }; alarm $opt{timeout}; +# If we're using SNMP +$snmp = defined $opt{hostname} ? 1 : 0; + +# SNMP session variables +$snmp_session = undef; +$snmp_error = undef; + +# The omreport command +$omreport = undef; + +# Check flags, override available with the --check option +%check = ( 'storage' => 1, # check storage subsystem + 'memory' => 1, # check memory (dimms) + 'fans' => 1, # check fan status + 'power' => 1, # check power supplies + 'temp' => 1, # check temperature + 'cpu' => 1, # check processors + 'voltage' => 1, # check voltage + 'batteries' => 1, # check battery probes + 'amperage' => 1, # check power consumption + 'intrusion' => 1, # check intrusion detection + 'alertlog' => 0, # check the alert log + 'esmlog' => 0, # check the ESM log (hardware log) + 'esmhealth' => 1, # check the ESM log overall health + ); + # Default line break -my $linebreak = isatty(*STDOUT) ? "\n" : '<br/>'; +$linebreak = isatty(*STDOUT) ? "\n" : '<br/>'; # Line break from option if (defined $opt{linebreak}) { @@ -193,44 +256,75 @@ } # Exit with status=UNKNOWN if there is firmware upgrade in progress -if (!$opt{'snmp'} && -f $firmware_lock) { - print "MONITORING DISABLED - Firmware update in progress ($firmware_lock exists)\n"; - exit $UNKNOWN; -} +if (!$snmp && -f $FW_LOCK) { + print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n"; + exit $E_UNKNOWN; +} + +# List of controllers and enclosures +@controllers = (); # controllers +@enclosures = (); # enclosures + +# Messages +@report_storage = (); # messages with associated nagios level (storage) +@report_chassis = (); # messages with associated nagios level (chassis) +@report_other = (); # messages with associated nagios level (other) + +# Counters for everything +%count + = ( + 'pdisk' => 0, # number of physical disks + 'vdisk' => 0, # number of logical drives (virtual disks) + 'temp' => 0, # number of temperature probes + 'volt' => 0, # number of voltage probes + 'amp' => 0, # number of amperage probes + 'intr' => 0, # number of intrusion probes + 'dimm' => 0, # number of memory modules + 'fan' => 0, # number of fan probes + 'cpu' => 0, # number of CPUs + 'bat' => 0, # number of batteries + 'power' => 0, # number of power supplies + 'esm' => { + 'Critical' => 0, # critical entries in ESM log + 'Non-Critical' => 0, # warning entries in ESM log + 'Ok' => 0, # ok entries in ESM log + }, + 'alert' => { + 'Critical' => 0, # critical entries in alert log + 'Non-Critical' => 0, # warning entries in alert log + 'Ok' => 0, # ok entries in alert log + }, + ); + +# Performance data +%perfdata = (); -# Global variables used throughout the code -my @controllers = (); # list of controllers -my %enclosure = (); # list of enclosure (id,name) pairs -my @report_storage = (); # messages with associated nagios level (storage) -my @report_chassis = (); # messages with associated nagios level (chassis) -my @report_other = (); # messages with associated nagios level (other) -my $no_of_pdisks = 0; # counts number of physical disks -my $no_of_vdisks = 0; # counts number of logical drives (virtual disks) -my %perfdata = (); # performance data -my $globalstatus = $OK; # global health status +# Global health status +$global = 1; # default is to check global status +$globalstatus = $E_OK; # default global health status is "OK" # Nagios error levels reversed -my %ERRORCODE +%reverse_exitcode = ( - 0 => 'OK', - 1 => 'WARNING', - 2 => 'CRITICAL', - 3 => 'UNKNOWN', + $E_OK => 'OK', + $E_WARNING => 'WARNING', + $E_CRITICAL => 'CRITICAL', + $E_UNKNOWN => 'UNKNOWN', ); # OpenManage (omreport) and SNMP error levels -my %status2nagios +%status2nagios = ( - 'Unknown' => $CRITICAL, - 'Critical' => $CRITICAL, - 'Non-Critical' => $WARNING, - 'Ok' => $OK, - 'Non-Recoverable' => $CRITICAL, - 'Other' => $CRITICAL, + 'Unknown' => $E_CRITICAL, + 'Critical' => $E_CRITICAL, + 'Non-Critical' => $E_WARNING, + 'Ok' => $E_OK, + 'Non-Recoverable' => $E_CRITICAL, + 'Other' => $E_CRITICAL, ); # Status via SNMP -my %snmp_status +%snmp_status = ( 1 => 'Other', 2 => 'Unknown', @@ -241,37 +335,37 @@ ); # Probe Status via SNMP -my %snmp_probestatus +%snmp_probestatus = ( - 1 => 'Other', # -- probe status is not one of the following: - 2 => 'Unknown', # -- probe status is unknown (not known or monitored) - 3 => 'Ok', # -- probe is reporting a value within the thresholds - 4 => 'nonCriticalUpper', # -- probe has crossed upper noncritical threshold - 5 => 'criticalUpper', # -- probe has crossed upper critical threshold - 6 => 'nonRecoverableUpper', # -- probe has crossed upper non-recoverable threshold - 7 => 'nonCriticalLower', # -- probe has crossed lower noncritical threshold - 8 => 'criticalLower', # -- probe has crossed lower critical threshold - 9 => 'nonRecoverableLower', # -- probe has crossed lower non-recoverable threshold - 10 => 'failed', # -- probe is not functional + 1 => 'Other', # probe status is not one of the following: + 2 => 'Unknown', # probe status is unknown (not known or monitored) + 3 => 'Ok', # probe is reporting a value within the thresholds + 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold + 5 => 'criticalUpper', # probe has crossed upper critical threshold + 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold + 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold + 8 => 'criticalLower', # probe has crossed lower critical threshold + 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold + 10 => 'failed', # probe is not functional ); # Probe status translated to Nagios alarm levels -my %probestatus2nagios +%probestatus2nagios = ( - 'Other' => $CRITICAL, - 'Unknown' => $CRITICAL, - 'Ok' => $OK, - 'nonCriticalUpper' => $WARNING, - 'criticalUpper' => $CRITICAL, - 'nonRecoverableUpper' => $CRITICAL, - 'nonCriticalLower' => $WARNING, - 'criticalLower' => $CRITICAL, - 'nonRecoverableLower' => $CRITICAL, - 'failed' => $CRITICAL, + 'Other' => $E_CRITICAL, + 'Unknown' => $E_CRITICAL, + 'Ok' => $E_OK, + 'nonCriticalUpper' => $E_WARNING, + 'criticalUpper' => $E_CRITICAL, + 'nonRecoverableUpper' => $E_CRITICAL, + 'nonCriticalLower' => $E_WARNING, + 'criticalLower' => $E_CRITICAL, + 'nonRecoverableLower' => $E_CRITICAL, + 'failed' => $E_CRITICAL, ); # System information gathered -my %sysinfo +%sysinfo = ( 'bios' => 'N/A', # BIOS version 'biosdate' => 'N/A', # BIOS release date @@ -291,15 +385,20 @@ adjust_checks() if defined $opt{check}; # Blacklisted components -my %blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : (); +%blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : (); + +# If blacklisting is in effect, don't check global health status +if (scalar keys %blacklist > 0) { + $global = 0; +} # Take into account new hardware and blades -my $omopt_chassis = 'chassis'; # default "chassis" option to omreport -my $omopt_system = 'system'; # default "system" option to omreport -my $blade = 0; # if this is a blade system +$omopt_chassis = 'chassis'; # default "chassis" option to omreport +$omopt_system = 'system'; # default "system" option to omreport +$blade = 0; # if this is a blade system # Some initializations and checking before we begin -if ($opt{snmp}) { +if ($snmp) { snmp_initialize(); # initialize SNMP snmp_check(); # check that SNMP works snmp_detect_blade(); # detect blade via SNMP @@ -314,10 +413,62 @@ #--------------------------------------------------------------------- -# Functions +# Helper functions #--------------------------------------------------------------------- # +# Store a message in one of the message arrays +# +sub report { + my ($type, $msg, $exval, $id) = @_; + defined $id or $id = q{}; + + my %type2array + = ( + 'storage' => \@report_storage, + 'chassis' => \@report_chassis, + 'other' => \@report_other, + ); + + return push @{ $type2array{$type} }, [ $msg, $exval, $id ]; +} + + +# +# Run command, put resulting output lines in an array and return a +# pointer to that array +# +sub run_command { + my $command = shift; + + open my $CMD, '-|', $command + or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) + and return [] }; + my @lines = <$CMD>; + close $CMD + or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN) + and return \@lines }; + return \@lines; +} + +# +# Run command, put resulting output in a string variable and return it +# +sub slurp_command { + my $command = shift; + + open my $CMD, '-|', $command + or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return }; + my $rawtext = do { local $/ = undef; <$CMD> }; # slurping + close $CMD; + + # NOTE: We don't check the return value of close() since omreport + # does something weird sometimes. + + return $rawtext; +} + +# # Initialize SNMP # sub snmp_initialize { @@ -333,8 +484,8 @@ '-version' => $opt{protocol}, ); + # Parameters for SNMP v3 if ($opt{protocol} == 3) { - # Parameters for SNMP v3 # Username is mandatory if (defined $opt{username}) { @@ -342,7 +493,7 @@ } else { print "SNMP ERROR: With SNMPv3 the username must be specified\n"; - exit $UNKNOWN; + exit $E_UNKNOWN; } # Authpassword is optional @@ -371,8 +522,9 @@ $param{'-privprotocol'} = $opt{privprotocol}; } else { - print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', must be one of [des|aes|aes128|3des|3desde]\n"; - exit $UNKNOWN; + print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', " + . "must be one of [des|aes|aes128|3des|3desde]\n"; + exit $E_UNKNOWN; } } @@ -382,18 +534,19 @@ $param{'-authprotocol'} = $opt{authprotocol}; } else { - print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', must be one of [md5|sha]\n"; - exit $UNKNOWN; + print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', " + . "must be one of [md5|sha]\n"; + exit $E_UNKNOWN; } } } + # Parameters for SNMP v2c or v1 elsif ($opt{protocol} == 2 or $opt{protocol} == 1) { - # Parameters for SNMP v2c or v1 $param{'-community'} = $opt{community}; } else { print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n"; - exit $UNKNOWN; + exit $E_UNKNOWN; } # Try to initialize the SNMP session @@ -401,12 +554,12 @@ ($snmp_session, $snmp_error) = Net::SNMP->session( %param ); if (!defined $snmp_session) { printf "SNMP: %s\n", $snmp_error; - exit $UNKNOWN; + exit $E_UNKNOWN; } } else { print "You need perl module Net::SNMP to run $NAME in SNMP mode\n"; - exit $UNKNOWN; + exit $E_UNKNOWN; } return; } @@ -422,13 +575,13 @@ # Typically if remote host isn't responding if (!defined $result) { printf "SNMP CRITICAL: %s\n", $snmp_session->error; - exit $CRITICAL; + exit $E_CRITICAL; } # If OpenManage isn't installed or is not working if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) { - print "(SNMP) OpenManage is not installed or is not working correctly\n"; - exit $UNKNOWN; + print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n"; + exit $E_UNKNOWN; } return; } @@ -443,7 +596,7 @@ # Identify blade. Older models (4th and 5th gen models) and/or old # OMSA (4.x) don't have this OID. If we get "noSuchInstance" or # similar, we assume that this isn't a blade - if ($result->{$DellBaseBoardType} eq '3') { + if (exists $result->{$DellBaseBoardType} && $result->{$DellBaseBoardType} eq '3') { $blade = 1; } return; @@ -453,21 +606,30 @@ # Locate the omreport binary # sub find_omreport { + # If user has specified path to omreport + if (defined $opt{omreport} and -x $opt{omreport}) { + $omreport = qq{"$opt{omreport}"}; + return; + } + # Possible full paths for omreport my @omreport_paths = ( '/usr/bin/omreport', # default on Linux + '/opt/dell/srvadmin/bin/omreport', # default on Linux with OMSA 6.2.0 '/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux '/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux - 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # default on Windows - 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # default on Windows x64 + 'C:\Program Files (x86)\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x64 + 'C:\Program Files\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x32 + 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x32 + 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x64 ); # Find the one to use OMREPORT_PATH: foreach my $bin (@omreport_paths) { if (-x $bin) { - $omreport = $bin; + $omreport = qq{"$bin"}; last OMREPORT_PATH; } } @@ -476,7 +638,7 @@ # have permission to execute the binary if (!defined $omreport) { print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n"; - exit $UNKNOWN; + exit $E_UNKNOWN; } return; } @@ -487,10 +649,7 @@ # (on newer hardware), as well as blade servers. # sub check_omreport_options { - open my $OMCHECK, '-|', "$omreport -? 2>&1" - or ( push @report_other, [ "Couldn't run '$omreport -?': $!", - $UNKNOWN, q{} ] and return ); - while (<$OMCHECK>) { + foreach (@{ run_command("$omreport -? 2>&1") }) { if (m/\A servermodule /xms) { # If "servermodule" argument to omreport exists, use it # instead of argument "system" @@ -507,7 +666,6 @@ $blade = 1; } } - close $OMCHECK; return; } @@ -524,8 +682,8 @@ my $tmp = q{}; if (-f $black) { open my $BL, '<', $black - or ( push @report_other, [ "Couldn't open blacklist file $black: $!", - $UNKNOWN, q{} ] and return {} ); + or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN) + and return {} }; $tmp = <$BL>; close $BL; chomp $tmp; @@ -560,13 +718,58 @@ sub adjust_checks { my @cl = (); + # Adjust checking based on the '--all' option + if ($opt{all}) { + # Check option usage + if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) { + print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n}; + exit $E_UNKNOWN; + } + if (scalar @{ $opt{check} } > 0) { + print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n}; + exit $E_UNKNOWN; + } + + # set the check hash to check everything + map { $_ = 1 } values %check; + + return; + } + + # Adjust checking based on the '--only' option + if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) { + # Check option usage + if (scalar @{ $opt{check} } > 0) { + print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n}; + exit $E_UNKNOWN; + } + if (! exists $check{$opt{only}} && $opt{only} ne 'chassis') { + print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n}; + exit $E_UNKNOWN; + } + + # reset the check hash + map { $_ = 0 } values %check; + + # adjust the check hash + if ($opt{only} eq 'chassis') { + map { $check{$_} = 1 } qw(memory fans power temp cpu voltage + batteries amperage intrusion esmhealth); + } + else { + $check{$opt{only}} = 1; + } + + return; + } + + # Adjust checking based on the '--check' option if (scalar @{ $opt{check} } >= 0) { foreach my $check (@{ $opt{check} }) { my $tmp = q{}; if (-f $check) { open my $CL, '<', $check - or ( push @report_other, [ "Couldn't open check file $check: $!", - $UNKNOWN, q{} ] and return ); + or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return }; $tmp = <$CL>; close $CL; } @@ -585,14 +788,22 @@ foreach my $c (@checks) { next if $c !~ m/=/xms; my ($key, $val) = split /=/xms, $c; - if ($opt{global} and $key !~ m/ esmlog | alertlog /xms) { - # If the '--global' switch is specified, you're only - # allowed to mess with the log stuff - next; - } $check{$key} = $val; } } + + # Check if we should check global health status + CHECK_KEY: + foreach (keys %check) { + next CHECK_KEY if $_ eq 'esmlog'; # not part of global status + next CHECK_KEY if $_ eq 'alertlog'; # not part of global status + + if ($check{$_} == 0) { # found something with checking turned off + $global = 0; + last CHECK_KEY; + } + } + return; } @@ -616,6 +827,7 @@ | No\scontrollers\sfound # No RAID controller | No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes | Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?) +# | Current\sprobes\snot\sfound # No power monitoring capability }xms; # Errors that are OK on blade servers @@ -625,27 +837,27 @@ }xms; # Run omreport and fetch output - open my $CMD, '-|', "$omreport $command -fmt ssv 2>&1" - or ( push @report_other, [ "Couldn't run $omreport: $!", - $UNKNOWN, q{} ] and return @output ); - my $rawtext = do { local $/ = undef; <$CMD> }; # slurping - close $CMD; + my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1"); + return [] if !defined $rawtext; # Workaround for Openmanage BUG introduced in OMSA 5.5.0 - $rawtext =~ s/\n;/;/gxms if $command eq 'storage controller'; + $rawtext =~ s{\n;}{;}gxms if $command eq 'storage controller'; + + # Openmanage sometimes puts a linebreak between "Error" and the + # actual error text + $rawtext =~ s{^Error\s*\n}{Error: }xms; # Parse output, store in array - for ((split /\n/xms, $rawtext)) { - if (m/\A Error/xms) { + for ((split m{\n}xms, $rawtext)) { + if (m{\AError}xms) { next if m{$ok_errors}xms; next if ($blade and m{$ok_blade_errors}xms); - push @report_other, [ "Problem running 'omreport $command': $_", - $UNKNOWN, q{} ]; + report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN); } next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields my @vals = split /;/xms; - if ($vals[0] =~ m/\A (Index|ID|Severity) \z/xms) { + if ($vals[0] =~ m/\A (Index|ID|Severity|Processor|Current\sSpeed) \z/xms) { @keys = @vals; } else { @@ -673,7 +885,7 @@ if (defined $blacklist{$name}) { foreach my $comp (@{ $blacklist{$name} }) { - if (defined $id and $comp eq $id) { + if (defined $id and ($comp eq $id or uc($comp) eq 'ALL')) { $ret = 1; } } @@ -703,8 +915,8 @@ my $tmp = q{}; if (-f $t) { open my $F, '<', $t - or ( push @report_other, [ "Couldn't open temperature threshold file $t: $!", - $UNKNOWN, q{} ] and return {} ); + or do { report('other', "Couldn't open temperature threshold file $t: $!", + $E_UNKNOWN) and return {} }; $tmp = <$F>; close $F; } @@ -739,17 +951,26 @@ # Gets the output from SNMP result according to the OIDs checked sub get_snmp_output { my ($result,$oidref) = @_; + my @temp = (); my @output = (); foreach my $oid (keys %{ $result }) { - my @dummy = split /\./xms, $oid; - my $id = pop @dummy; - --$id; - my $foo = join q{.}, @dummy; - if (exists $oidref->{$foo}) { - $output[$id]{$oidref->{$foo}} = $result->{$oid}; + my $short = $oid; + $short =~ s{\s}{}gxms; # remove whitespace + $short =~ s{\A (.+) \. (\d+) \z}{$1}xms; # remove last number + my $id = $2; + if (exists $oidref->{$short}) { + $temp[$id]{$oidref->{$short}} = $result->{$oid}; + } + } + + # Remove any empty indexes + foreach my $out (@temp) { + if (defined $out) { + push @output, $out; } } + return \@output; } @@ -760,28 +981,106 @@ foreach my $lst (@{ $list }) { if (!exists $lst->{$key}) { - $lst->{$key} = $val + $lst->{$key} = $val; } } return; } +# Return the URL for official Dell documentation for a specific +# PowerEdge server +sub documentation_url { + my $model = shift; + + # create model short form, e.g. "r710" + $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms; + + # special case for blades (e.g. M600, M710), they have common + # documentation + $model =~ s{\A m\d+ \z}{m}xms; + + return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/'; +} + +# Return the URL for warranty information for a server with a given +# serial number (servicetag) +sub warranty_url { + my $tag = shift; + + # Dell support sites for different parts of the world + my %supportsite + = ( + 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/', + 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?', + 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?', + ); + + # warranty URLs for different country codes + my %url + = ( + # EMEA + 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria + 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium + 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic + 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany + 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark + 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain + 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland + 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France + 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece + 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy + 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel + 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East + 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway + 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands + 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland + 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal + 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia + 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden + 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom + 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa + # America + 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil + 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada + 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico + 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA + # Asia/Pacific + 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia + 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China + 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India + # default fallback + 'XX' => $supportsite{glob} . 'ServiceTag=', # default + ); + + if (exists $url{$opt{htmlinfo}}) { + return $url{$opt{htmlinfo}} . $tag; + } + else { + return $url{XX} . $tag; + } +} + + + +#--------------------------------------------------------------------- +# Check functions +#--------------------------------------------------------------------- #----------------------------------------- # Check global health status #----------------------------------------- sub check_global { - my $health = $OK; + my $health = $E_OK; - if ($opt{snmp}) { + if ($snmp) { # # Checks global status, i.e. both storage and chassis # my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1'; my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]); if (!defined $result) { - printf "SNMP [systemStateGlobalSystemStatus]: %s\n", $snmp_error; - exit $UNKNOWN; + printf "SNMP ERROR [global]: %s\n", $snmp_error; + exit $E_UNKNOWN; } $health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}}; } @@ -789,10 +1088,7 @@ # # NB! This does not check storage, only chassis... # - open my $CMD, '-|', "$omreport $omopt_system -fmt ssv" - or ( push @report_other, [ sprintf("Couldn't run $omreport $omopt_system: $!"), - $UNKNOWN, q{} ] and return $OK ); - while (<$CMD>) { + foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) { next if !m/;/xms; next if m/\A SEVERITY;COMPONENT/xms; if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) { @@ -800,7 +1096,6 @@ last; } } - close $CMD; } # Return the status @@ -821,9 +1116,11 @@ my $mindr = undef; my $firmware = undef; my $driver = undef; + my $minstdr = undef; # Minimum required Storport driver version (whats this?) + my $stdr = undef; # Storport driver version (whats this?) my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %ctrl_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber', @@ -835,8 +1132,15 @@ '1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion', '1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion', '1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion', + '1.3.6.1.4.1.674.10893.1.20.130.1.1.55' => 'FIXME_StorportDriverVersion', + '1.3.6.1.4.1.674.10893.1.20.130.1.1.56' => 'FIXME_StorportMinDriverVersion', ); - my $result = $snmp_session->get_entries(-columns => [keys %ctrl_oid]); + + # We use get_table() here for the odd case where a server has + # two or more controllers, and where some OIDs are missing on + # one of the controllers. + my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1'; + my $result = $snmp_session->get_table(-baseoid => $controllerTable); # No controllers is OK return if !defined $result; @@ -859,19 +1163,23 @@ CTRL: foreach my $out (@output) { - if ($opt{'snmp'}) { - $id = $out->{'controllerNumber'} - 1; - $name = $out->{'controllerName'}; - $state = $ctrl_state{$out->{'controllerState'}}; - $status = $snmp_status{$out->{'controllerComponentStatus'}}; - $minfw = exists $out->{'controllerMinFWVersion'} - ? $out->{'controllerMinFWVersion'} : undef; - $mindr = exists $out->{'controllerMinDriverVersion'} - ? $out->{'controllerMinDriverVersion'} : undef; + if ($snmp) { + $id = $out->{controllerNumber} - 1; + $name = $out->{controllerName}; + $state = $ctrl_state{$out->{controllerState}}; + $status = $snmp_status{$out->{controllerComponentStatus}}; + $minfw = exists $out->{controllerMinFWVersion} + ? $out->{controllerMinFWVersion} : undef; + $mindr = exists $out->{controllerMinDriverVersion} + ? $out->{controllerMinDriverVersion} : undef; $firmware = exists $out->{controllerFWVersion} ? $out->{controllerFWVersion} : 'N/A'; $driver = exists $out->{controllerDriverVersion} ? $out->{controllerDriverVersion} : 'N/A'; + $minstdr = exists $out->{'FIXME_StorportMinDriverVersion'} + ? $out->{FIXME_StorportMinDriverVersion} : undef; + $stdr = exists $out->{FIXME_StorportDriverVersion} + ? $out->{FIXME_StorportDriverVersion} : undef; $nexus = convert_nexus($out->{controllerNexusID}); } else { @@ -887,6 +1195,12 @@ ? $out->{'Firmware Version'} : 'N/A'; $driver = $out->{'Driver Version'} ne 'Not Applicable' ? $out->{'Driver Version'} : 'N/A'; + $minstdr = (exists $out->{'Minimum Required Storport Driver Version'} + and $out->{'Minimum Required Storport Driver Version'} ne 'Not Applicable') + ? $out->{'Minimum Required Storport Driver Version'} : undef; + $stdr = (exists $out->{'Storport Driver Version'} + and $out->{'Storport Driver Version'} ne 'Not Applicable') + ? $out->{'Storport Driver Version'} : undef; $nexus = $id; } @@ -898,35 +1212,43 @@ $sysinfo{'controller'}{$id}{'name'} = $name; $sysinfo{'controller'}{$id}{'driver'} = $driver; $sysinfo{'controller'}{$id}{'firmware'} = $firmware; + $sysinfo{'controller'}{$id}{'storport'} = $stdr; next CTRL if blacklisted('ctrl', $nexus); # Special case: old firmware if (!blacklisted('ctrl_fw', $id) && defined $minfw) { chomp $firmware; - push @report_storage, [ sprintf('Controller %d (%s): Firmware is out of date (%s)', - $id, $name, $firmware), - $WARNING, $nexus ]; + my $msg = sprintf q{Controller %d [%s]: Firmware '%s' is out of date}, + $id, $name, $firmware; + report('storage', $msg, $E_WARNING, $nexus); } # Special case: old driver if (!blacklisted('ctrl_driver', $id) && defined $mindr) { chomp $driver; - push @report_storage, [ sprintf('Controller %d (%s): Driver is out of date (%s)', - $id, $name, $driver), - $WARNING, $nexus ]; + my $msg = sprintf q{Controller %d [%s]: Driver '%s' is out of date}, + $id, $name, $driver; + report('storage', $msg, $E_WARNING, $nexus); + } + # Special case: old storport driver + if (!blacklisted('ctrl_stdr', $id) && defined $minstdr) { + chomp $stdr; + my $msg = sprintf q{Controller %d [%s]: Storport driver '%s' is out of date}, + $id, $name, $stdr; + report('storage', $msg, $E_WARNING, $nexus); } # Ok if ($status eq 'Ok' or ($status eq 'Non-Critical' - and (defined $minfw or defined $mindr))) { - push @report_storage, [ sprintf('Controller %d (%s) is %s', - $id, $name, $state), - $OK, $nexus ]; + and (defined $minfw or defined $mindr or defined $minstdr))) { + my $msg = sprintf 'Controller %d [%s] is %s', + $id, $name, $state; + report('storage', $msg, $E_OK, $nexus); } # Default else { - push @report_storage, [ sprintf('Controller %d (%s) needs attention (%s)', - $id, $name, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf 'Controller %d [%s] needs attention: %s', + $id, $name, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } } return; @@ -939,38 +1261,44 @@ sub check_physical_disks { return if $#controllers == -1; - my $id = undef; - my $nexus = undef; - my $name = undef; - my $state = undef; - my $status = undef; - my $fpred = undef; - my $progr = undef; - my $ctrl = undef; - my @output = (); + my $id = undef; + my $nexus = undef; + my $name = undef; + my $state = undef; + my $status = undef; + my $fpred = undef; + my $progr = undef; + my $ctrl = undef; + my $vendor = undef; # disk vendor + my $product = undef; # product ID + my $capacity = undef; # disk length (size) in bytes + my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %pdisk_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber', '1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName', + '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor', '1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState', + '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID', '1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID', '1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel', + '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB', '1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID', '1.3.6.1.4.1.674.10893.1.20.130.4.1.16' => 'arrayDiskLunID', '1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus', '1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID', '1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication', - '1.3.6.1.4.1.674.10893.1.20.130.5.1.5' => 'arrayDiskEnclosureConnectionEnclosureNumber', '1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber', + '1.3.6.1.4.1.674.10893.1.20.130.6.1.7' => 'arrayDiskChannelConnectionControllerNumber', ); my $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]); if (!defined $result) { - printf "SNMP [storage / pdisk]: %s.\n", $snmp_session->error; + printf "SNMP ERROR [storage / pdisk]: %s.\n", $snmp_session->error; $snmp_session->close; - exit $UNKNOWN; + exit $E_UNKNOWN; } @output = @{ get_snmp_output($result, \%pdisk_oid) }; @@ -1008,61 +1336,91 @@ # Check physical disks on each of the controllers PDISK: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $name = $out->{arrayDiskName}; - if ($name =~ m{.*\d+:\d+:\d+\z}xms) { + if (exists $out->{arrayDiskEnclosureID}) { $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID}, - $out->{arrayDiskTargetID}); + $out->{arrayDiskTargetID}); } else { $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID}); } - $state = $pdisk_state{$out->{arrayDiskState}}; - $status = $snmp_status{$out->{arrayDiskComponentStatus}}; - $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0; - $progr = q{}; - $ctrl = exists $out->{arrayDiskEnclosureConnectionControllerNumber} - ? $out->{arrayDiskEnclosureConnectionControllerNumber} - 1 - : -1; - $nexus = convert_nexus($out->{arrayDiskNexusID}); + $state = $pdisk_state{$out->{arrayDiskState}}; + $status = $snmp_status{$out->{arrayDiskComponentStatus}}; + $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0; + $progr = q{}; + $nexus = convert_nexus($out->{arrayDiskNexusID}); + $vendor = $out->{arrayDiskVendor}; + $product = $out->{arrayDiskProductID}; + $capacity = $out->{arrayDiskLengthInMB} * 1024**2; + if (exists $out->{arrayDiskEnclosureConnectionControllerNumber}) { + $ctrl = $out->{arrayDiskEnclosureConnectionControllerNumber} - 1; + } + elsif (exists $out->{arrayDiskChannelConnectionControllerNumber}) { + $ctrl = $out->{arrayDiskChannelConnectionControllerNumber} - 1; + } + else { + $ctrl = -1; + } } else { - $id = $out->{'ID'}; - $name = $out->{'Name'}; - $state = $out->{'State'}; - $status = $out->{'Status'}; - $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0; - $progr = ' [' . $out->{'Progress'} . ']'; - $ctrl = $out->{'ctrl'}; - $nexus = join q{:}, $out->{ctrl}, $id; + $id = $out->{'ID'}; + $name = $out->{'Name'}; + $state = $out->{'State'}; + $status = $out->{'Status'}; + $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0; + $progr = ' [' . $out->{'Progress'} . ']'; + $ctrl = $out->{'ctrl'}; + $nexus = join q{:}, $out->{ctrl}, $id; + $vendor = $out->{'Vendor ID'}; + $product = $out->{'Product ID'}; + $capacity = $out->{'Capacity'}; + $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms; } next PDISK if blacklisted('pdisk', $nexus); - $no_of_pdisks++; + $count{pdisk}++; + + $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace + $product =~ s{\s+\z}{}xms; # remove trailing whitespace + + # Calculate human readable capacity + $capacity = ceil($capacity / 1000**3) >= 1000 + ? sprintf '%.1fTB', ($capacity / 1000**4) + : sprintf '%.0fGB', ($capacity / 1000**3); + $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks + $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 300GB disks + $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks + + # Capitalize only the first letter of the vendor name + $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor); + + # Remove unnecessary trademark rubbish from vendor name + $vendor =~ s{\(tm\)\z}{}xms; # Special case: Failure predicted if ($status eq 'Non-Critical' and $fpred) { - push @report_storage, [ sprintf('%s on controller %d needs attention (Failure Predicted)', - $name, $ctrl), - $WARNING, $nexus ]; + my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: Failure Predicted', + $name, $vendor, $product, $capacity, $ctrl; + report('storage', $msg, $E_WARNING, $nexus); } # Special case: Rebuilding elsif ($state eq 'Rebuilding') { - push @report_storage, [ sprintf('%s on controller %d is %s%s', - $name, $ctrl, $state, $progr), - $WARNING, $nexus ]; + my $msg = sprintf '%s [%s] on ctrl %d is %s%s', + $name, $capacity, $ctrl, $state, $progr; + report('storage', $msg, $E_WARNING, $nexus); } # Default elsif ($status ne 'Ok') { - push @report_storage, [ sprintf('%s on controller %d needs attention (%s)', - $name, $ctrl, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: %s', + $name, $vendor, $product, $capacity, $ctrl, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } # Ok else { - push @report_storage, [ sprintf('%s on controller %d is %s', - $name, $ctrl, $state), - $OK, $nexus ]; + my $msg = sprintf '%s [%s] on ctrl %d is %s', + $name, $capacity, $ctrl, $state; + report('storage', $msg, $E_OK, $nexus); } } return; @@ -1076,6 +1434,7 @@ return if $#controllers == -1; my $id = undef; + my $name = undef; my $nexus = undef; my $dev = undef; my $state = undef; @@ -1083,17 +1442,17 @@ my $layout = undef; my $size = undef; my $progr = undef; + my $ctrl = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %vdisk_oid = ( - '1.3.6.1.4.1.674.10893.1.20.140.1.1.1' => 'virtualDiskNumber', - '1.3.6.1.4.1.674.10893.1.20.140.1.1.2' => 'virtualDiskName', '1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName', '1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState', '1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB', '1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout', + '1.3.6.1.4.1.674.10893.1.20.140.1.1.17' => 'virtualDiskTargetID', '1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus', '1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID', ); @@ -1147,15 +1506,17 @@ # Check virtual disks on each of the controllers VDISK: foreach my $out (@output) { - if ($opt{'snmp'}) { - $id = $out->{virtualDiskNumber} - 1; + if ($snmp) { + $id = $out->{virtualDiskTargetID}; $dev = $out->{virtualDiskDeviceName}; $state = $vdisk_state{$out->{virtualDiskState}}; $status = $snmp_status{$out->{virtualDiskComponentStatus}}; $layout = $vdisk_layout{$out->{virtualDiskLayout}}; $size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024; - $progr = q{}; + $progr = q{}; # can't get this from SNMP(?) $nexus = convert_nexus($out->{virtualDiskNexusID}); + $ctrl = $nexus; # We use the nexus id to get the controller id + $ctrl =~ s{\A (\d+):\d+ \z}{$1}xms; } else { $id = $out->{ID}; @@ -1167,28 +1528,32 @@ $progr = ' [' . $out->{Progress} . ']'; $size =~ s{\A (.*GB).* \z}{$1}xms; $nexus = join q{:}, $out->{ctrl}, $id; + $ctrl = $out->{ctrl}; } next VDISK if blacklisted('vdisk', $nexus); - $no_of_vdisks++; + $count{vdisk}++; + + # The device name is undefined sometimes + $dev = q{} if !defined $dev; # Special case: Regenerating if ($state eq 'Regenerating') { - push @report_storage, [ sprintf('Logical drive %s (%s - %s) is %s%s', - $dev, $layout, $size, $state, $progr), - $WARNING, $nexus ]; + my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s%s}, + $id, $dev, $layout, $size, $ctrl, $state, $progr; + report('storage', $msg, $E_WARNING, $nexus); } # Default elsif ($status ne 'Ok') { - push @report_storage, [ sprintf('Logical drive %s (%s - %s) needs attention (%s)', - $dev, $layout, $size, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d needs attention: %s}, + $id, $dev, $layout, $size, $ctrl, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } # Ok else { - push @report_storage, [ sprintf('Logical drive %d %s (%s - %s) is %s', - $id, $dev, $layout, $size, $state), - $OK, $nexus ]; + my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s}, + $id, $dev, $layout, $size, $ctrl, $state; + report('storage', $msg, $E_OK, $nexus); } } return; @@ -1210,11 +1575,9 @@ my $pred = undef; # battery's ability to be charged my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %bat_oid = ( - '1.3.6.1.4.1.674.10893.1.20.130.15.1.1' => 'batteryNumber', - '1.3.6.1.4.1.674.10893.1.20.130.15.1.2' => 'batteryName', '1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState', '1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus', '1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID', @@ -1250,6 +1613,7 @@ 36 => 'Learning', ); + # Specifies the learn state activity of the battery my %bat_learn_state = ( 1 => 'Failed', @@ -1259,18 +1623,19 @@ 16 => 'Idle', ); + # This property displays the battery's ability to be charged my %bat_pred_cap = ( - 1 => 'Failed', - 2 => 'Ready', - 4 => 'Unknown', + 1 => 'Failed', # The battery cannot be charged and needs to be replaced + 2 => 'Ready', # The battery can be charged to full capacity + 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the + # battery cannot be determined until the Learn cycle is complete ); # Check battery on each of the controllers BATTERY: foreach my $out (@output) { - if ($opt{'snmp'}) { - $id = $out->{batteryNumber} - 1; + if ($snmp) { $state = $bat_state{$out->{batteryState}}; $status = $snmp_status{$out->{batteryComponentStatus}}; $learn = exists $out->{batteryLearnState} @@ -1279,6 +1644,8 @@ ? $bat_pred_cap{$out->{batteryPredictedCapacity}} : undef; $ctrl = $out->{batteryConnectionControllerNumber} - 1; $nexus = convert_nexus($out->{batteryNexusID}); + $id = $nexus; + $id =~ s{\A \d+:(\d+) \z}{$1}xms; } else { $id = $out->{'ID'}; @@ -1294,33 +1661,57 @@ # Special case: Charging if ($state eq 'Charging') { - push @report_storage, [ sprintf('Cache battery %d in controller %d is %s (%s) [probably harmless]', - $id, $ctrl, $state, $pred), - $WARNING, $nexus ]; + if ($pred eq 'Failed') { + my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [replace battery]', + $id, $ctrl, $state, $pred; + report('storage', $msg, $E_CRITICAL, $nexus); + } + else { + next BATTERY if blacklisted('bat_charge', $nexus); + my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]', + $id, $ctrl, $state, $pred; + report('storage', $msg, $E_WARNING, $nexus); + } } - # Special case: Learning (whats this?) + # Special case: Learning (battery learns its capacity) elsif ($state eq 'Learning') { - push @report_storage, [ sprintf('Cache battery %d in controller %d is %s (%s) [probably harmless]', - $id, $ctrl, $state, $learn), - $WARNING, $nexus ]; + if ($learn eq 'Failed') { + my $msg = sprintf 'Cache battery %d in controller %d is %s (%s)', + $id, $ctrl, $state, $learn; + report('storage', $msg, $E_CRITICAL, $nexus); + } + else { + next BATTERY if blacklisted('bat_charge', $nexus); + my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]', + $id, $ctrl, $state, $learn; + report('storage', $msg, $E_WARNING, $nexus); + } } - # Special case: Power Low (part of recharge cycle?) + # Special case: Power Low (first part of recharge cycle) elsif ($state eq 'Power Low') { - push @report_storage, [ sprintf('Cache battery %d in controller %d is %s [probably harmless]', - $id, $ctrl, $state), - $WARNING, $nexus ]; + next BATTERY if blacklisted('bat_charge', $nexus); + my $msg = sprintf 'Cache battery %d in controller %d is %s [probably harmless]', + $id, $ctrl, $state; + report('storage', $msg, $E_WARNING, $nexus); + } + # Special case: Degraded and Non-Critical (usually part of recharge cycle) + elsif ($state eq 'Degraded' && $status eq 'Non-Critical') { + next BATTERY if blacklisted('bat_charge', $nexus); + my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]', + $id, $ctrl, $state, $status; + report('storage', $msg, $E_WARNING, $nexus); } # Default elsif ($status ne 'Ok') { - push @report_storage, [ sprintf('Cache battery %d in controller %d needs attention (%s / %s)', - $id, $ctrl, $state, $status), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf 'Cache battery %d in controller %d needs attention: %s (%s)', + $id, $ctrl, $state, $status; + report('storage', $msg, $status2nagios{$status}, $nexus); } # Ok else { - push @report_storage, [ sprintf('Cache battery %d in controller %d is %s', - $id, $ctrl, $state), - $OK, $nexus ]; + my $msg = sprintf 'Cache battery %d in controller %d is %s', + $id, $ctrl, $state; + report('storage', $msg, $E_OK, $nexus); } } return; @@ -1342,7 +1733,7 @@ my $ctrl = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %conn_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber', @@ -1355,9 +1746,9 @@ my $result = $snmp_session->get_entries(-columns => [keys %conn_oid]); if (!defined $result) { - printf "SNMP [storage / channel]: %s.\n", $snmp_session->error; + printf "SNMP ERROR [storage / channel]: %s.\n", $snmp_session->error; $snmp_session->close; - exit $UNKNOWN; + exit $E_UNKNOWN; } @output = @{ get_snmp_output($result, \%conn_oid) }; @@ -1393,7 +1784,7 @@ # Check connectors on each of the controllers CHANNEL: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $id = $out->{channelNumber} - 1; $name = $out->{channelName}; $state = $conn_state{$out->{channelState}}; @@ -1415,9 +1806,9 @@ next CHANNEL if blacklisted('conn', $nexus); - push @report_storage, [ sprintf('%s (%s) on controller %d is %s', - $name, $type, $ctrl, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf '%s [%s] on controller %d is %s', + $name, $type, $ctrl, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } return; } @@ -1433,9 +1824,10 @@ my $state = undef; my $status = undef; my $firmware = undef; + my $ctrl = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %encl_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber', @@ -1472,7 +1864,7 @@ ENCLOSURE: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $id = $out->{'enclosureNumber'} - 1; $name = $out->{'enclosureName'}; $state = $encl_state{$out->{'enclosureState'}}; @@ -1480,6 +1872,8 @@ $firmware = exists $out->{enclosureFirmwareVersion} ? $out->{enclosureFirmwareVersion} : 'N/A'; $nexus = convert_nexus($out->{enclosureNexusID}); + $ctrl = $nexus; + $ctrl =~ s{\A (\d+):.* \z}{$1}xms; } else { $id = $out->{ID}; @@ -1489,23 +1883,27 @@ $firmware = $out->{'Firmware Version'} ne 'Not Applicable' ? $out->{'Firmware Version'} : 'N/A'; $nexus = join q{:}, $out->{ctrl}, $id; + $ctrl = $out->{ctrl}; } $name =~ s{\s+\z}{}xms; # remove trailing whitespace $firmware =~ s{\s+\z}{}xms; # remove trailing whitespace - $enclosure{$id} = $name; + # store enclosure data for future use + push @enclosures, { 'id' => $id, + 'ctrl' => $out->{ctrl}, + 'name' => $name }; # Collecting some storage info - $sysinfo{'enclosure'}{$id}{'id'} = $nexus; - $sysinfo{'enclosure'}{$id}{'name'} = $name; - $sysinfo{'enclosure'}{$id}{'firmware'} = $firmware; + $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus; + $sysinfo{'enclosure'}{$nexus}{'name'} = $name; + $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware; next ENCLOSURE if blacklisted('encl', $nexus); - push @report_storage, [ sprintf('Enclosure %s (%s) is %s', - $id, $name, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf 'Enclosure %s [%s] on controller %d is %s', + $nexus, $name, $ctrl, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } return; } @@ -1527,7 +1925,7 @@ my $encl_name = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %fan_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber', @@ -1548,12 +1946,11 @@ @output = @{ get_snmp_output($result, \%fan_oid) }; } else { - foreach my $c (@controllers) { - foreach my $e (keys %enclosure) { - push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=fans") }; - map_item('ctrl', $c, \@output); - map_item('encl_id', $e, \@output); - } + foreach my $enc (@enclosures) { + push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") }; + map_item('ctrl', $enc->{ctrl}, \@output); + map_item('encl_id', $enc->{id}, \@output); + map_item('encl_name', $enc->{name}, \@output); } } @@ -1571,7 +1968,7 @@ # Check fans on each of the enclosures FAN: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $id = $out->{fanNumber} - 1; $name = $out->{fanName}; $state = $fan_state{$out->{fanState}}; @@ -1587,24 +1984,24 @@ $state = $out->{'State'}; $status = $out->{'Status'}; $speed = $out->{'Speed'}; - $encl_id = $out->{'encl_id'}; - $encl_name = $enclosure{$encl_id}; - $nexus = join q{:}, $out->{ctrl}, $encl_id, $id; + $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; + $encl_name = $out->{encl_name}; + $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; } next FAN if blacklisted('encl_fan', $nexus); # Default if ($status ne 'Ok') { - push @report_storage, [ sprintf('%s in enclosure %s (%s) needs attention (%s)', - $name, $encl_id, $encl_name, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s', + $name, $encl_id, $encl_name, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } # Ok else { - push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s (speed=%s)', - $name, $encl_id, $encl_name, $state, $speed), - $OK, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s] is %s (speed=%s)', + $name, $encl_id, $encl_name, $state, $speed; + report('storage', $msg, $E_OK, $nexus); } } return; @@ -1626,7 +2023,7 @@ my $encl_name = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %ps_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber', @@ -1645,12 +2042,11 @@ @output = @{ get_snmp_output($result, \%ps_oid) }; } else { - foreach my $c (@controllers) { - foreach my $e (keys %enclosure) { - push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=pwrsupplies") }; - map_item('ctrl', $c, \@output); - map_item('encl_id', $e, \@output); - } + foreach my $enc (@enclosures) { + push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") }; + map_item('ctrl', $enc->{ctrl}, \@output); + map_item('encl_id', $enc->{id}, \@output); + map_item('encl_name', $enc->{name}, \@output); } } @@ -1668,7 +2064,7 @@ # Check power supplies on each of the enclosures PS: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $id = $out->{powerSupplyNumber}; $name = $out->{powerSupplyName}; $state = $ps_state{$out->{powerSupplyState}}; @@ -1682,24 +2078,24 @@ $name = $out->{'Name'}; $state = $out->{'State'}; $status = $out->{'Status'}; - $encl_id = $out->{'encl_id'}; - $encl_name = $enclosure{$encl_id}; - $nexus = join q{:}, $out->{ctrl}, $encl_id, $id; + $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; + $encl_name = $out->{encl_name}; + $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; } next PS if blacklisted('encl_ps', $nexus); # Default if ($status ne 'Ok') { - push @report_storage, [ sprintf('%s in enclosure %s (%s) needs attention (%s)', - $name, $encl_id, $encl_name, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s', + $name, $encl_id, $encl_name, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } # Ok else { - push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s', - $name, $encl_id, $encl_name, $state), - $OK, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s] is %s', + $name, $encl_id, $encl_name, $state; + report('storage', $msg, $E_OK, $nexus); } } return; @@ -1725,7 +2121,7 @@ my $encl_name = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %temp_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber', @@ -1748,12 +2144,11 @@ @output = @{ get_snmp_output($result, \%temp_oid) }; } else { - foreach my $c (@controllers) { - foreach my $e (keys %enclosure) { - push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=temps") }; - map_item('ctrl', $c, \@output); - map_item('encl_id', $e, \@output); - } + foreach my $enc (@enclosures) { + push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") }; + map_item('ctrl', $enc->{ctrl}, \@output); + map_item('encl_id', $enc->{id}, \@output); + map_item('encl_name', $enc->{name}, \@output); } } @@ -1771,7 +2166,7 @@ # Check temperature probes on each of the enclosures TEMP: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $id = $out->{temperatureProbeNumber} - 1; $name = $out->{temperatureProbeName}; $state = $temp_state{$out->{temperatureProbeState}}; @@ -1793,26 +2188,32 @@ $reading = $out->{'Reading'}; $reading =~ s{\s*C}{}xms; $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\s*C}{}xms; $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\s*C}{}xms; - $encl_id = $out->{'encl_id'}; - $encl_name = $enclosure{$encl_id}; - $nexus = join q{:}, $out->{ctrl}, $encl_id, $id; + $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; + $encl_name = $out->{encl_name}; + $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; } next TEMP if blacklisted('encl_temp', $nexus); # Default if ($status ne 'Ok') { - push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s at %s (%s max)', - $name, $encl_id, $encl_name, - $state, $reading, $max_crit), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s] is %s C at %s (%s max)', + $name, $encl_id, $encl_name, $state, $reading, $max_crit; + report('storage', $msg, $status2nagios{$status}, $nexus); } # Ok else { - push @report_storage, [ sprintf('%s in enclosure %s (%s): %s (%s max)', - $name, $encl_id, $encl_name, - $reading, $max_crit), - $OK, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s]: %s C (%s max)', + $name, $encl_id, $encl_name, $reading, $max_crit; + report('storage', $msg, $E_OK, $nexus); + } + + # Collect performance data + if (defined $opt{perfdata}) { + $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms; + my $pkey = "enclosure_${encl_id}_${name}"; + my $pval = join q{;}, "${reading}C", $max_warn, $max_crit; + $perfdata{$pkey} = $pval; } } return; @@ -1834,7 +2235,7 @@ my $encl_name = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %emms_oid = ( '1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber', @@ -1853,12 +2254,11 @@ @output = @{ get_snmp_output($result, \%emms_oid) }; } else { - foreach my $c (@controllers) { - foreach my $e (keys %enclosure) { - push @output, @{ run_omreport("storage enclosure controller=$c enclosure=$e info=emms") }; - map_item('ctrl', $c, \@output); - map_item('encl_id', $e, \@output); - } + foreach my $enc (@enclosures) { + push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") }; + map_item('ctrl', $enc->{ctrl}, \@output); + map_item('encl_id', $enc->{id}, \@output); + map_item('encl_name', $enc->{name}, \@output); } } @@ -1877,7 +2277,7 @@ # Check temperature probes on each of the enclosures EMM: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $id = $out->{enclosureManagementModuleNumber} - 1; $name = $out->{enclosureManagementModuleName}; $state = $emms_state{$out->{enclosureManagementModuleState}}; @@ -1891,24 +2291,24 @@ $name = $out->{'Name'}; $state = $out->{'State'}; $status = $out->{'Status'}; - $encl_id = $out->{'encl_id'}; - $encl_name = $enclosure{$encl_id}; - $nexus = join q{:}, $out->{ctrl}, $encl_id, $id; + $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; + $encl_name = $out->{encl_name}; + $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; } next EMM if blacklisted('encl_emm', $nexus); # Default if ($status ne 'Ok') { - push @report_storage, [ sprintf('%s in enclosure %s (%s) needs attention (%s)', - $name, $encl_id, $encl_name, $state), - $status2nagios{$status}, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s', + $name, $encl_id, $encl_name, $state; + report('storage', $msg, $status2nagios{$status}, $nexus); } # Ok else { - push @report_storage, [ sprintf('%s in enclosure %s (%s) is %s', - $name, $encl_id, $encl_name, $state), - $OK, $nexus ]; + my $msg = sprintf '%s in enclosure %s [%s] is %s', + $name, $encl_id, $encl_name, $state; + report('storage', $msg, $E_OK, $nexus); } } return; @@ -1923,22 +2323,25 @@ my $status = undef; my $location = undef; my $size = undef; - my @output = (); + my $modes = undef; + my @failures = (); + my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %dimm_oid = ( '1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex', '1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus', '1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName', '1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize', + '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes', ); my $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]); if (!defined $result) { - printf "SNMP [memory]: %s.\n", $snmp_session->error; + printf "SNMP ERROR [memory]: %s.\n", $snmp_session->error; $snmp_session->close; - exit $UNKNOWN; + exit $E_UNKNOWN; } @output = @{ get_snmp_output($result, \%dimm_oid) }; @@ -1947,15 +2350,31 @@ @output = @{ run_omreport("$omopt_chassis memory") }; } - my $count_dimms = 0; + # Note: These values are bit masks, so combination values are + # possible. If value is 0 (zero), memory device has no faults. + my %failure_mode + = ( + 1 => 'ECC single bit correction warning rate exceeded', + 2 => 'ECC single bit correction failure rate exceeded', + 4 => 'ECC multibit fault encountered', + 8 => 'ECC single bit correction logging disabled', + 16 => 'device disabled because of spare activation', + ); DIMM: foreach my $out (@output) { - if ($opt{'snmp'}) { + @failures = (); # Initialize + if ($snmp) { $index = $out->{memoryDeviceIndex}; $status = $snmp_status{$out->{memoryDeviceStatus}}; $location = $out->{memoryDeviceLocationName}; $size = sprintf '%d MB', $out->{memoryDeviceSize}/1024; + $modes = $out->{memoryDeviceFailureModes}; + if ($modes > 0) { + foreach my $mask (sort keys %failure_mode) { + if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; } + } + } } else { $index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'}; @@ -1965,6 +2384,15 @@ if (defined $size) { $size =~ s{\s\s}{ }gxms; } + # Run 'omreport chassis memory index=X' to get the failures + if ($status ne 'Ok' && defined $index) { + foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) { + if (m/\A Failures; (.+?) \z/xms) { + chop(my $fail = $1); + push @failures, split m{\.}xms, $fail; + } + } + } } $location =~ s{\A \s*(.*?)\s* \z}{$1}xms; @@ -1972,21 +2400,29 @@ # Ignore empty memory slots next DIMM if !defined $index; - $count_dimms++; + $count{dimm}++; if ($status ne 'Ok') { - push @report_chassis, [ sprintf('Memory module %d (%s - %s) needs attention (%s)', - $index, $location, $size, $status), - $status2nagios{$status}, $index ]; + my $msg = undef; + if (scalar @failures == 0) { + $msg = sprintf 'Memory module %d [%s, %s] needs attention (%s)', + $index, $location, $size, $status; + } + else { + $msg = sprintf 'Memory module %d [%s, %s] needs attention: %s', + $index, $location, $size, (join q{, }, @failures); + } + + report('chassis', $msg, $status2nagios{$status}, $index); } # Ok else { - push @report_chassis, [ sprintf('Memory module %d (%s - %s) is %s', - $index, $location, $size, $status), - $OK, $index ]; + my $msg = sprintf 'Memory module %d [%s, %s] is %s', + $index, $location, $size, $status; + report('chassis', $msg, $E_OK, $index); } } - return $count_dimms; + return; } @@ -2002,7 +2438,7 @@ my $max_warn = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %cool_oid = ( '1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex', @@ -2018,9 +2454,9 @@ return 0; } elsif (!$blade && !defined $result) { - printf "SNMP [cooling]: %s.\n", $snmp_session->error; + printf "SNMP ERROR [cooling]: %s.\n", $snmp_session->error; $snmp_session->close; - exit $UNKNOWN; + exit $E_UNKNOWN; } @output = @{ get_snmp_output($result, \%cool_oid) }; @@ -2029,13 +2465,11 @@ @output = @{ run_omreport("$omopt_chassis fans") }; } - my $count_fans = 0; - FAN: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $index = $out->{coolingDeviceIndex}; - $status = $snmp_status{$out->{coolingDeviceStatus}}; + $status = $snmp_probestatus{$out->{coolingDeviceStatus}}; $reading = $out->{coolingDeviceReading}; $location = $out->{coolingDeviceLocationName}; $max_crit = exists $out->{coolingDeviceUpperCriticalThreshold} @@ -2058,21 +2492,22 @@ } next FAN if blacklisted('fan', $index); - $count_fans++; + $count{fan}++; if ($status ne 'Ok') { - push @report_chassis, [ sprintf('Chassis fan %d (%s) needs attention (%s)', - $index, $location, $status), - $status2nagios{$status}, $index ]; + my $msg = sprintf 'Chassis fan %d [%s] needs attention: %s', + $index, $location, $status; + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } else { - push @report_chassis, [ sprintf('Chassis fan %d (%s): %s', - $index, $location, $reading), - $OK, $index ]; + my $msg = sprintf 'Chassis fan %d [%s]: %s', + $index, $location, $reading; + report('chassis', $msg, $E_OK, $index); } # Collect performance data - if (defined $opt{'perfdata'}) { + if (defined $opt{perfdata}) { my $pname = lc $location; $pname =~ s{\s}{_}gxms; $pname =~ s{proc_}{cpu#}xms; @@ -2081,7 +2516,7 @@ $perfdata{$pkey} = $pval; } } - return $count_fans; + return; } @@ -2097,7 +2532,7 @@ my @states = (); my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %ps_oid = ( '1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex', @@ -2150,12 +2585,10 @@ 3 => 'Processor missing', ); - my $count_psus = 0; - PS: foreach my $out (@output) { - if ($opt{'snmp'}) { - my @states = (); # contains states for the PS + if ($snmp) { + @states = (); # contains states for the PS $index = $out->{powerSupplyIndex} - 1; $status = $snmp_status{$out->{powerSupplyStatus}}; @@ -2164,7 +2597,7 @@ ? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef; # get the combined state from the StatusReading OID - foreach my $mask (keys %ps_state) { + foreach my $mask (sort keys %ps_state) { if (($out->{powerSupplySensorState} & $mask) != 0) { push @states, $ps_state{$mask}; } @@ -2186,13 +2619,20 @@ } next PS if blacklisted('ps', $index); - $count_psus++; + $count{power}++; - push @report_chassis, [ sprintf('Power Supply %d (%s): %s', - $index, $type, $state), - $status2nagios{$status}, $index ]; + if ($status ne 'Ok') { + my $msg = sprintf 'Power Supply %d [%s] needs attention: %s', + $index, $type, $state; + report('chassis', $msg, $status2nagios{$status}, $index); + } + else { + my $msg = sprintf 'Power Supply %d [%s]: %s', + $index, $type, $state; + report('chassis', $msg, $E_OK, $index); + } } - return $count_psus; + return; } @@ -2216,7 +2656,7 @@ my %warn_threshold = %{ custom_temperature_thresholds('w') }; my %crit_threshold = %{ custom_temperature_thresholds('c') }; - if ($opt{'snmp'}) { + if ($snmp) { my %temp_oid = ( '1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex', @@ -2238,9 +2678,9 @@ my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable); if (!defined $result) { - printf "SNMP [temperatures]: %s.\n", $snmp_session->error; + printf "SNMP ERROR [temperatures]: %s.\n", $snmp_session->error; $snmp_session->close; - exit $UNKNOWN; + exit $E_UNKNOWN; } @output = @{ get_snmp_output($result, \%temp_oid) }; @@ -2251,17 +2691,15 @@ my %probe_type = ( - 1 => 'Other', # -- type is other than following values - 2 => 'Unknown', # -- type is unknown - 3 => 'AmbientESM', # -- type is Ambient Embedded Systems Management temperature probe - 16 => 'Discrete', #- - type is temperature probe with discrete reading + 1 => 'Other', # type is other than following values + 2 => 'Unknown', # type is unknown + 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe + 16 => 'Discrete', # type is temperature probe with discrete reading ); - my $count_temps = 0; - TEMP: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $index = $out->{temperatureProbeIndex} - 1; $status = $snmp_probestatus{$out->{temperatureProbeStatus}}; $reading = $out->{temperatureProbeReading} / 10; @@ -2290,69 +2728,81 @@ } next TEMP if blacklisted('temp', $index); - $count_temps++; + $count{temp}++; if ($type eq 'Discrete') { - push @report_chassis, [ sprintf('Temperature probe %d (%s): is %s', - $index, $location, $discrete), - $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ]; + my $msg = sprintf 'Temperature probe %d (%s): is %s', + $index, $location, $discrete; + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } else { # First check according to custom thresholds if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) { # Custom critical MAX - push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom max=%d)', - $index, $location, $reading, $crit_threshold{$index}{max}), - $CRITICAL, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)', + $index, $location, $reading, $crit_threshold{$index}{max}; + report('chassis', $msg, $E_CRITICAL, $index); } elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) { # Custom warning MAX - push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom max=%d)', - $index, $location, $reading, $warn_threshold{$index}{max}), - $WARNING, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)', + $index, $location, $reading, $warn_threshold{$index}{max}; + report('chassis', $msg, $E_WARNING, $index); } elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) { # Custom critical MIN - push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom min=%d)', - $index, $location, $reading, $crit_threshold{$index}{min}), - $CRITICAL, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)', + $index, $location, $reading, $crit_threshold{$index}{min}; + report('chassis', $msg, $E_CRITICAL, $index); } elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) { # Custom warning MIN - push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (custom min=%d)', - $index, $location, $reading, $warn_threshold{$index}{min}), - $WARNING, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)', + $index, $location, $reading, $warn_threshold{$index}{min}; + report('chassis', $msg, $E_WARNING, $index); } elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) { - push @report_chassis, [ sprintf('Temperature Probe %d (%s) is critically high at %d C', - $index, $location, $reading), - $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] is critically high at %d C', + $index, $location, $reading; + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) { - push @report_chassis, [ sprintf('Temperature Probe %d (%s) is too high at %d C', - $index, $location, $reading), - $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] is too high at %d C', + $index, $location, $reading; + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) { - push @report_chassis, [ sprintf('Temperature Probe %d (%s) is critically low at %d C', - $index, $location, $reading), - $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] is critically low at %d C', + $index, $location, $reading; + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) { - push @report_chassis, [ sprintf('Temperature Probe %d (%s) is too low at %d C', - $index, $location, $reading), - $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] is too low at %d C', + $index, $location, $reading; + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } # Ok else { - push @report_chassis, [ sprintf('Temperature Probe %d (%s) reads %d C (min=%s/%s, max=%s/%s)', - $index, $location, $reading, - $min_warn, $min_crit, $max_warn, $max_crit), - $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ]; + my $msg = sprintf 'Temperature Probe %d [%s] reads %d C', + $index, $location, $reading; + if ($min_warn eq '[N/A]' and $min_crit eq '[N/A]') { + $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit; + } + else { + $msg .= sprintf ' (min=%s/%s, max=%s/%s)', + $min_warn, $min_crit, $max_warn, $max_crit; + } + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } # Collect performance data - if (defined $opt{'perfdata'}) { + if (defined $opt{perfdata}) { my $pname = lc $location; $pname =~ s{\s}{_}gxms; $pname =~ s{_temp\z}{}xms; @@ -2363,7 +2813,7 @@ } } } - return $count_temps; + return; } @@ -2374,49 +2824,41 @@ my $index = undef; my $status = undef; my $state = undef; - my $oid_ver = 'new'; - my @output = (); + my $brand = undef; + my $family = undef; + my $man = undef; + my $speed = undef; + my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { # NOTE: For some reason, older models don't have the - # "Processor Device Status" OIDs. We first check the newer - # (preferred) OIDs, and if that doesn't work, check the "old" - # OIDs. - - my %cpu_oid_new # for newer models - = ( - '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex', - '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus', - '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading', - ); + # "Processor Device Status" OIDs. We check both the newer + # (preferred) OIDs and the old ones. - my %cpu_oid_old # for older models + my %cpu_oid = ( - '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex', - '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus', - '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState', + '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex', + '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus', + '1.3.6.1.4.1.674.10892.1.1100.30.1.8.1' => 'processorDeviceManufacturerName', + '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState', + '1.3.6.1.4.1.674.10892.1.1100.30.1.10.1' => 'processorDeviceFamily', + '1.3.6.1.4.1.674.10892.1.1100.30.1.12.1' => 'processorDeviceCurrentSpeed', + '1.3.6.1.4.1.674.10892.1.1100.30.1.23.1' => 'processorDeviceBrandName', + '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex', + '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus', + '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading', ); - my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_new]); - - if (!defined $result) { - $oid_ver = 'old'; - $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_old]); - } + my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid]); if (!defined $result) { - printf "SNMP [processors]: %s.\n", $snmp_session->error; + printf "SNMP ERROR [processors]: %s.\n", $snmp_session->error; $snmp_session->close; - exit $UNKNOWN; + exit $E_UNKNOWN; } - if ($oid_ver eq 'new') { - @output = @{ get_snmp_output($result, \%cpu_oid_new) }; - } - else { - @output = @{ get_snmp_output($result, \%cpu_oid_old) }; - } + @output = @{ get_snmp_output($result, \%cpu_oid) }; } else { @output = @{ run_omreport("$omopt_chassis processors") }; @@ -2424,74 +2866,113 @@ my %cpu_state = ( - 1 => 'Other', # -- other than following values - 2 => 'Unknown', # -- unknown - 3 => 'Enabled', # -- enabled - 4 => 'User Disabled', # -- disabled by user via BIOS setup - 5 => 'BIOS Disabled', # -- disabled by BIOS (POST error) - 6 => 'Idle', # -- idle + 1 => 'Other', # other than following values + 2 => 'Unknown', # unknown + 3 => 'Enabled', # enabled + 4 => 'User Disabled', # disabled by user via BIOS setup + 5 => 'BIOS Disabled', # disabled by BIOS (POST error) + 6 => 'Idle', # idle ); my %cpu_reading = ( - 1 => 'Internal Error', # -- Internal Error - 2 => 'Thermal Trip', # -- Thermal Trip - 32 => 'Configuration Error', # -- Configuration Error - 128 => 'Present', # -- Processor Present - 256 => 'Disabled', # -- Processor Disabled - 512 => 'Terminator Present', # -- Terminator Present - 1024 => 'Throttled', # -- Processor Throttled + 1 => 'Internal Error', # Internal Error + 2 => 'Thermal Trip', # Thermal Trip + 32 => 'Configuration Error', # Configuration Error + 128 => 'Present', # Processor Present + 256 => 'Disabled', # Processor Disabled + 512 => 'Terminator Present', # Terminator Present + 1024 => 'Throttled', # Processor Throttled ); -# my %cpu_reading_reverse -# = ( -# 'Present' => 128, -# 'Throttled' => 1024, -# ); - - my $count_cpus = 0; + # Mapping between family numbers from SNMP and actual CPU family + my %cpu_family + = ( + 1 => 'Other', 2 => 'Unknown', 3 => '8086', + 4 => '80286', 5 => '386', 6 => '486', + 7 => '8087', 8 => '80287', 9 => '80387', + 10 => '80487', 11 => 'Pentium', 12 => 'Pentium Pro', + 13 => 'Pentium II', 14 => 'Pentium with MMX', 15 => 'Celeron', + 16 => 'Pentium II Xeon', 17 => 'Pentium III', 18 => 'Pentium III Xeon', + 19 => 'Pentium III', 20 => 'Itanium', 21 => 'Xeon', + 22 => 'Pentium 4', 23 => 'Xeon MP', 24 => 'Itanium 2', + 25 => 'K5', 26 => 'K6', 27 => 'K6-2', + 28 => 'K6-3', 29 => 'Athlon', 30 => 'AMD2900', + 31 => 'K6-2+', 32 => 'Power PC', 33 => 'Power PC 601', + 34 => 'Power PC 603', 35 => 'Power PC 603+', 36 => 'Power PC 604', + 37 => 'Power PC 620', 38 => 'Power PC x704', 39 => 'Power PC 750', + 48 => 'Alpha', 49 => 'Alpha 21064', 50 => 'Alpha 21066', + 51 => 'Alpha 21164', 52 => 'Alpha 21164PC', 53 => 'Alpha 21164a', + 54 => 'Alpha 21264', 55 => 'Alpha 21364', 64 => 'MIPS', + 65 => 'MIPS R4000', 66 => 'MIPS R4200', 67 => 'MIPS R4400', + 68 => 'MIPS R4600', 69 => 'MIPS R10000', 80 => 'SPARC', + 81 => 'SuperSPARC', 82 => 'microSPARC II', 83 => 'microSPARC IIep', + 84 => 'UltraSPARC', 85 => 'UltraSPARC II', 86 => 'UltraSPARC IIi', + 87 => 'UltraSPARC III', 88 => 'UltraSPARC IIIi', 96 => '68040', + 97 => '68xxx', 98 => '68000', 99 => '68010', + 100 => '68020', 101 => '68030', 112 => 'Hobbit', + 120 => 'Crusoe TM5000', 121 => 'Crusoe TM3000', 122 => 'Efficeon TM8000', + 128 => 'Weitek', 131 => 'Athlon 64', 132 => 'Opteron', + 133 => 'Sempron', 134 => 'Turion 64 Mobile', 135 => 'Dual-Core Opteron', + 136 => 'Athlon 64 X2 DC', 137 => 'Turion 64 X2 M', 138 => 'Quad-Core Opteron', + 139 => '3rd gen Opteron', 144 => 'PA-RISC', 145 => 'PA-RISC 8500', + 146 => 'PA-RISC 8000', 147 => 'PA-RISC 7300LC', 148 => 'PA-RISC 7200', + 149 => 'PA-RISC 7100LC', 150 => 'PA-RISC 7100', 160 => 'V30', + 171 => 'Dual-Core Xeon 5200', 172 => 'Dual-Core Xeon 7200', 173 => 'Quad-Core Xeon 7300', + 174 => 'Quad-Core Xeon 7400', 175 => 'Multi-Core Xeon 7400', 176 => 'M1', + 177 => 'M2', 180 => 'AS400', 182 => 'Athlon XP', + 183 => 'Athlon MP', 184 => 'Duron', 185 => 'Pentium M', + 186 => 'Celeron D', 187 => 'Pentium D', 188 => 'Pentium Extreme', + 189 => 'Core Solo', 190 => 'Core2', 191 => 'Core2 Duo', + 198 => 'Core i7', 199 => 'Dual-Core Celeron', 200 => 'IBM390', + 201 => 'G4', 202 => 'G5', 203 => 'ESA/390 G6', + 204 => 'z/Architectur', 210 => 'C7-M', 211 => 'C7-D', + 212 => 'C7', 213 => 'Eden', 214 => 'Multi-Core Xeon', + 215 => 'Dual-Core Xeon 3xxx', 216 => 'Quad-Core Xeon 3xxx', 218 => 'Dual-Core Xeon 5xxx', + 219 => 'Quad-Core Xeon 5xxx', 221 => 'Dual-Core Xeon 7xxx', 222 => 'Quad-Core Xeon 7xxx', + 223 => 'Multi-Core Xeon 7xxx', 250 => 'i860', 251 => 'i960', + ); CPU: foreach my $out (@output) { -# my $throttled = 0; # flags if the CPU is throttled - - if ($opt{'snmp'}) { - if ($oid_ver eq 'new') { + if ($snmp) { + $index = exists $out->{processorDeviceStatusIndex} + ? $out->{processorDeviceStatusIndex} - 1 + : $out->{processorDeviceIndex} - 1; + $status = exists $out->{processorDeviceStatusStatus} + ? $snmp_status{$out->{processorDeviceStatusStatus}} + : $snmp_status{$out->{processorDeviceStatus}}; + if (exists $out->{processorDeviceStatusReading}) { my @states = (); # contains states for the CPU - $index = $out->{processorDeviceStatusIndex} - 1; - $status = $snmp_status{$out->{processorDeviceStatusStatus}}; # get the combined state from the StatusReading OID - foreach my $mask (keys %cpu_reading) { + foreach my $mask (sort keys %cpu_reading) { if (($out->{processorDeviceStatusReading} & $mask) != 0) { push @states, $cpu_reading{$mask}; } } # Finally, create the state string - $state = join q{,}, @states; - - # flag the CPU as throttled -# if ($out->{processorDeviceStatusReading} == ($cpu_reading_reverse{Present} -# + $cpu_reading_reverse{Throttled})) { -# $throttled = 1; -# } + $state = join q{, }, @states; } else { - $index = $out->{processorDeviceIndex} - 1; - $status = $snmp_status{$out->{processorDeviceStatus}}; $state = $cpu_state{$out->{processorDeviceStatusState}}; } + $man = $out->{processorDeviceManufacturerName}; + $family = (exists $out->{processorDeviceFamily} + and exists $cpu_family{$out->{processorDeviceFamily}}) + ? $cpu_family{$out->{processorDeviceFamily}} : undef; + $speed = $out->{processorDeviceCurrentSpeed}; + $brand = $out->{processorDeviceBrandName}; } else { $index = $out->{'Index'}; $status = $out->{'Status'}; $state = $out->{'State'}; - - # flag the CPU as throttled -# if (defined $state and $state eq 'CPU Throttled') { -# $throttled = 1; -# } + $brand = exists $out->{'Processor Brand'} ? $out->{'Processor Brand'} : undef; + $family = exists $out->{'Processor Family'} ? $out->{'Processor Family'} : undef; + $man = exists $out->{'Processor Manufacturer'} ? $out->{'Processor Manufacturer'} : undef; + $speed = exists $out->{'Current Speed'} ? $out->{'Current Speed'} : undef; } next CPU if blacklisted('cpu', $index); @@ -2502,33 +2983,41 @@ or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]'); # Ignore unoccupied CPU slots (snmp) - if ($opt{snmp} and exists $out->{processorDeviceStatusReading} + if ($snmp and exists $out->{processorDeviceStatusReading} and $out->{processorDeviceStatusReading} == 0) { next CPU; } - $count_cpus++; + $count{cpu}++; + + if (defined $brand) { + $brand =~ s{\s\s+}{ }gxms; + $brand =~ s{\((R|tm)\)}{}gxms; + $brand =~ s{\s(CPU|Processor)}{}xms; + $brand =~ s{\s\@}{}xms; + } + elsif (defined $family and defined $man and defined $speed) { + $speed =~ s{\A (\d+) .*}{$1}xms; + $brand = sprintf '%s %s %.2fGHz', $man, $family, $speed / 1000; + } + else { + $brand = "unknown"; + } - # Special case: Ignore non-critical warning about throttled - # CPUs. Ondemand CPU throttling is normal behaviour in most - # Linux distros and makes perfect sense in a server -# if ($status eq 'Non-Critical' and $throttled) { -# push @report_chassis, [ sprintf('CPU %d is %s', $index, $state), -# $OK, $index ]; -# } # Default -# elsif ($status ne 'Ok') { if ($status ne 'Ok') { - push @report_chassis, [ sprintf('CPU %d needs attention (%s)', $index, $state), - $status2nagios{$status}, $index ]; + my $msg = sprintf 'Processor %d [%s] needs attention: %s', + $index, $brand, $state; + report('chassis', $msg, $status2nagios{$status}, $index); } # Ok else { - push @report_chassis, [ sprintf('CPU %d is %s', $index, $state), - $OK, $index ]; + my $msg = sprintf 'Processor %d [%s] is %s', + $index, $brand, $state; + report('chassis', $msg, $E_OK, $index); } } - return $count_cpus; + return; } @@ -2542,7 +3031,7 @@ my $location = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %volt_oid = ( '1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex', @@ -2554,12 +3043,11 @@ my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1'; my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable); - #my $result = $snmp_session->get_entries(-columns => [keys %volt_oid]); if (!defined $result) { - printf "SNMP [voltage probes]: %s.\n", $snmp_session->error; + printf "SNMP ERROR [voltage]: %s.\n", $snmp_session->error; $snmp_session->close; - exit $UNKNOWN; + exit $E_UNKNOWN; } @output = @{ get_snmp_output($result, \%volt_oid) }; @@ -2574,13 +3062,11 @@ 2 => 'Bad', ); - my $count_volts = 0; - VOLT: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $index = $out->{voltageProbeIndex} - 1; - $status = $snmp_status{$out->{voltageProbeStatus}}; + $status = $snmp_probestatus{$out->{voltageProbeStatus}}; $reading = exists $out->{voltageProbeReading} ? sprintf('%.3f V', $out->{voltageProbeReading}/1000) : $volt_discrete_reading{$out->{voltageProbeDiscreteReading}}; @@ -2594,13 +3080,14 @@ } next VOLT if blacklisted('volt', $index); - $count_volts++; + $count{volt}++; - push @report_chassis, [ sprintf('Voltage sensor %d (%s) is %s', - $index, $location, $reading), - $opt{snmp} ? $probestatus2nagios{$status} : $status2nagios{$status}, $index ]; + my $msg = sprintf 'Voltage sensor %d [%s] is %s', + $index, $location, $reading; + my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; + report('chassis', $msg, $err, $index); } - return $count_volts; + return; } @@ -2614,7 +3101,7 @@ my $location = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %bat_oid = ( '1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex', @@ -2640,11 +3127,9 @@ 4 => 'Presence Detected', ); - my $count_bats = 0; - BATTERY: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $index = $out->{batteryIndex} - 1; $status = $snmp_status{$out->{batteryStatus}}; $reading = $bat_reading{$out->{batteryReading}}; @@ -2658,13 +3143,13 @@ } next BATTERY if blacklisted('bp', $index); - $count_bats++; + $count{bat}++; - push @report_chassis, [ sprintf('Battery probe %d (%s) is %s', - $index, $location, $reading), - $status2nagios{$status}, $index ]; + my $msg = sprintf 'Battery probe %d [%s] is %s', + $index, $location, $reading; + report('chassis', $msg, $status2nagios{$status}, $index); } - return $count_bats; + return; } @@ -2681,7 +3166,7 @@ my $unit = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %amp_oid = ( '1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex', @@ -2743,11 +3228,9 @@ 'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit ); - my $count_pwr = 0; - AMP: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $index = $out->{amperageProbeIndex} - 1; $status = $snmp_status{$out->{amperageProbeStatus}}; $reading = $amp_type{$out->{amperageProbeType}} eq 'amperageProbeTypeIsDiscrete' @@ -2769,7 +3252,7 @@ } else { $index = $out->{'Index'}; - next if $index !~ m/^\d+$/x; + next AMP if (!defined $index || $index !~ m/^\d+$/x); $status = $out->{'Status'}; $reading = $out->{'Reading'}; $location = $out->{'Probe Name'}; @@ -2783,16 +3266,16 @@ $max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms; } - next AMP if blacklisted('pm', $index); + next AMP if blacklisted('amp', $index); next AMP if $index !~ m{\A \d+ \z}xms; - $count_pwr++; + $count{amp}++; - push @report_chassis, [ sprintf('Amperage probe %d (%s) reads %s %s', - $index, $location, $reading, $unit, $status), - $status2nagios{$status}, $index ]; + my $msg = sprintf 'Amperage probe %d [%s] reads %s %s', + $index, $location, $reading, $unit, $status; + report('chassis', $msg, $status2nagios{$status}, $index); # Collect performance data - if (defined $opt{'perfdata'}) { + if (defined $opt{perfdata}) { next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number) my $pname = lc $location; $pname =~ s{\s}{_}gxms; @@ -2801,7 +3284,48 @@ $perfdata{$pkey} = $pval; } } - return $count_pwr; + + # Collect EXTRA performance data not found at first run. This is a + # rather ugly hack + if (defined $opt{perfdata} && !$snmp) { + my $found = 0; + my $index = 0; + my %used = (); + + # find used indexes + foreach (keys %perfdata) { + if (m/\A pwr_mon_(\d+)/xms) { + $used{$1} = 1; + } + } + + AMP2: + foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) { + chop $line; + if ($line eq 'Location;Reading') { + $found = 1; + next AMP2; + } + if ($line eq q{}) { + $found = 0; + next AMP2; + } + if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s ([AW]) \z/xms) { + my $aname = lc $1; + my $aval = $2; + my $aunit = $3; + $aname =~ s{\s}{_}gxms; + + # don't use an existing index + while (exists $used{$index}) { ++$index; } + + $perfdata{"pwr_mon_${index}_${aname}"} = "$aval$aunit;0;0"; + ++$index; + } + } + } + + return; } @@ -2814,7 +3338,7 @@ my $reading = undef; my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %int_oid = ( '1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex', @@ -2840,11 +3364,9 @@ 4 => 'Breach Sensor Failure', # intrusion sensor has failed ); - my $count_intr = 0; - INTRUSION: foreach my $out (@output) { - if ($opt{'snmp'}) { + if ($snmp) { $index = $out->{intrusionIndex} - 1; $status = $snmp_status{$out->{intrusionStatus}}; $reading = $int_reading{$out->{intrusionReading}}; @@ -2856,21 +3378,21 @@ } next INTRUSION if blacklisted('intr', $index); - $count_intr++; + $count{intr}++; if ($status ne 'Ok') { - push @report_chassis, [ sprintf('Chassis intrusion %d detected: %s', - $index, $reading), - $WARNING, $index ]; + my $msg = sprintf 'Chassis intrusion %d detected: %s', + $index, $reading; + report('chassis', $msg, $E_WARNING, $index); } # Ok else { - push @report_chassis, [ sprintf('Chassis intrusion %d detection: %s (%s)', - $index, $status, $reading), - $OK, $index ]; + my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)', + $index, $status, $reading; + report('chassis', $msg, $E_OK, $index); } } - return $count_intr; + return; } @@ -2878,29 +3400,23 @@ # CHASSIS: Check alert log #----------------------------------------- sub check_alertlog { - my %count = ( - 'Ok' => 0, - 'Non-Critical' => 0, - 'Critical' => 0, - ); - - return \%count if $opt{'snmp'}; # Not supported with SNMP + return if $snmp; # Not supported with SNMP my @output = @{ run_omreport("$omopt_system alertlog") }; foreach my $out (@output) { - ++$count{$out->{Severity}}; + ++$count{alert}{$out->{Severity}}; } # Create error messages and set exit value if appropriate - my $nagios_level = 0; - if ($count{'Critical'} > 0) { $nagios_level = $CRITICAL; } - elsif ($count{'Non-Critical'} > 0) { $nagios_level = $WARNING; } - - push @report_other, [ sprintf('Alert log content: %d critical, %d non-critical, %d ok', - $count{'Critical'}, $count{'Non-Critical'}, $count{'Ok'}), - $nagios_level, q{} ]; + my $err = 0; + if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; } + elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; } + + my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok', + $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'}; + report('other', $msg, $err); - return \%count; + return; } #----------------------------------------- @@ -2909,44 +3425,40 @@ sub check_esmlog_health { my $health = 'Ok'; - if ($opt{snmp}) { + if ($snmp) { my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1'; my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]); if (!defined $result) { - push @report_other, [ sprintf('SNMP ERROR getting systemStateEventLogStatus OID: %s', - $snmp_session->error), - $UNKNOWN, q{} ]; + my $msg = sprintf 'SNMP ERROR [esmhealth]: %s', + $snmp_session->error; + report('other', $msg, $E_UNKNOWN); } $health = $snmp_status{$result->{$systemStateEventLogStatus}}; } else { - open my $CMD, '-|', "$omreport $omopt_system esmlog -fmt ssv" - or ( push @report_other, [ sprintf("Couldn't run $omreport $omopt_system esmlog: $!"), - $UNKNOWN, q{} ] and return $OK ); - while (<$CMD>) { + foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) { if (m/\A Health;(.+) \z/xms) { $health = $1; chop $health; last; } } - close $CMD; } # If the overall health of the ESM log is other than "Ok", the # fill grade of the log is more than 80% and the log should be # cleared if ($health eq 'Ok') { - push @report_other, [ sprintf('ESM log is health is OK (less than 80%% full)'), - $OK, q{} ]; + my $msg = sprintf 'ESM log health is Ok (less than 80%% full)'; + report('other', $msg, $E_OK); } elsif ($health eq 'Critical') { - push @report_other, [ sprintf('ESM log is 100%% full!'), - $status2nagios{$health}, q{} ]; + my $msg = sprintf 'ESM log is 100%% full'; + report('other', $msg, $status2nagios{$health}); } else { - push @report_other, [ sprintf('ESM log is more than 80%% full'), - $status2nagios{$health}, q{} ]; + my $msg = sprintf 'ESM log is more than 80%% full'; + report('other', $msg, $status2nagios{$health}); } return; @@ -2956,14 +3468,9 @@ # CHASSIS: Check ESM log #----------------------------------------- sub check_esmlog { - my %count = ( - 'Ok' => 0, - 'Non-Critical' => 0, - 'Critical' => 0, - ); my @output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %esm_oid = ( '1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus', @@ -2971,113 +3478,141 @@ my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]); # No entries is OK - return 0 if !defined $result; + return if !defined $result; @output = @{ get_snmp_output($result, \%esm_oid) }; foreach my $out (@output) { - ++$count{$snmp_status{$out->{eventLogSeverityStatus}}}; + ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}}; } } else { @output = @{ run_omreport("$omopt_system esmlog") }; foreach my $out (@output) { - ++$count{$out->{Severity}}; + ++$count{esm}{$out->{Severity}}; } } # Create error messages and set exit value if appropriate - my $nagios_level = 0; - if ($count{'Critical'} > 0) { $nagios_level = $CRITICAL; } - elsif ($count{'Non-Critical'} > 0) { $nagios_level = $WARNING; } + my $err = 0; + if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; } + elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; } + + my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok', + $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'}; + report('other', $msg, $err); - push @report_other, [ sprintf('ESM log content: %d critical, %d non-critical, %d ok', - $count{'Critical'}, $count{'Non-Critical'}, $count{'Ok'}), - $nagios_level, q{} ]; - return \%count; + return; } - # -# Collects some information about the system via omreport +# Handy function for checking all storage components # -sub get_omreport_sysinfo -{ - # Get system model and serial number - if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") { - while (<$INFO>) { - next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms; - my ($key, $val) = split /;/xms; - $key =~ s{\s+\z}{}xms; # remove trailing whitespace - $val =~ s{\s+\z}{}xms; # remove trailing whitespace - if ($key eq 'Chassis Model' or $key eq 'Model') { - $sysinfo{model} = $val; +sub check_storage { + check_controllers(); + check_physical_disks(); + check_virtual_disks(); + check_cache_battery(); + check_connectors(); + check_enclosures(); + check_enclosure_fans(); + check_enclosure_pwr(); + check_enclosure_temp(); + check_enclosure_emms(); + return; +} + + + +#--------------------------------------------------------------------- +# Info functions +#--------------------------------------------------------------------- + +# +# Fetch output from 'omreport chassis info', put in sysinfo hash +# +sub get_omreport_chassis_info { + if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") { + my @lines = <$INFO>; + close $INFO; + foreach (@lines) { + next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms; + my ($key, $val) = split /;/xms; + $key =~ s{\s+\z}{}xms; # remove trailing whitespace + $val =~ s{\s+\z}{}xms; # remove trailing whitespace + if ($key eq 'Chassis Model' or $key eq 'Model') { + $sysinfo{model} = $val; } if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') { $sysinfo{serial} = $val; } } - close $INFO; } + return; +} - # Get BIOS information. Only if needed - if (($opt{okinfo} >= 1) or ($opt{verbose}) or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms)) { - if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") { - while (<$BIOS>) { - next if !m/;/xms; - my ($key, $val) = split /;/xms; - $key =~ s{\s+\z}{}xms; # remove trailing whitespace - $val =~ s{\s+\z}{}xms; # remove trailing whitespace - $sysinfo{bios} = $val if $key eq 'Version'; - $sysinfo{biosdate} = $val if $key eq 'Release Date'; - } - close $BIOS; +# +# Fetch output from 'omreport chassis bios', put in sysinfo hash +# +sub get_omreport_chassis_bios { + if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") { + my @lines = <$BIOS>; + close $BIOS; + foreach (@lines) { + next if !m/;/xms; + my ($key, $val) = split /;/xms; + $key =~ s{\s+\z}{}xms; # remove trailing whitespace + $val =~ s{\s+\z}{}xms; # remove trailing whitespace + $sysinfo{bios} = $val if $key eq 'Version'; + $sysinfo{biosdate} = $val if $key eq 'Release Date'; } } + return; +} - # Return now if verbose - return if $opt{verbose}; - - # Get OS information. Only if needed - if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) { - if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") { - while (<$VER>) { - next if !m/;/xms; - my ($key, $val) = split /;/xms; - $key =~ s{\s+\z}{}xms; # remove trailing whitespace - $val =~ s{\s+\z}{}xms; # remove trailing whitespace - if ($key eq 'Operating System') { - $sysinfo{osname} = $val; - } - elsif ($key eq 'Operating System Version') { - $sysinfo{osver} = $val; - } +# +# Fetch output from 'omreport system operatingsystem', put in sysinfo hash +# +sub get_omreport_system_operatingsystem { + if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") { + my @lines = <$VER>; + close $VER; + foreach (@lines) { + next if !m/;/xms; + my ($key, $val) = split /;/xms; + $key =~ s{\s+\z}{}xms; # remove trailing whitespace + $val =~ s{\s+\z}{}xms; # remove trailing whitespace + if ($key eq 'Operating System') { + $sysinfo{osname} = $val; + } + elsif ($key eq 'Operating System Version') { + $sysinfo{osver} = $val; } - close $VER; } } + return; +} - # Get OMSA information. Only if needed - if ($opt{okinfo} >= 3) { - if (open my $OM, '-|', "$omreport about -fmt ssv") { - while (<$OM>) { - if (m/\A Version;(.+) \z/xms) { - $sysinfo{om} = $1; - chomp $sysinfo{om}; - } +# +# Fetch output from 'omreport about', put in sysinfo hash +# +sub get_omreport_about { + if (open my $OM, '-|', "$omreport about -fmt ssv") { + my @lines = <$OM>; + close $OM; + foreach (@lines) { + if (m/\A Version;(.+) \z/xms) { + $sysinfo{om} = $1; + chomp $sysinfo{om}; } } } - return; } - # -# Collects some information about the system via SNMP +# Fetch chassis info via SNMP, put in sysinfo hash # -sub get_snmp_sysinfo -{ - # Get system model and serial number +sub get_snmp_chassis_info { my %chassis_oid = ( '1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName', @@ -3099,96 +3634,133 @@ } } else { - push @report_other, [ sprintf('SNMP ERROR getting chassis info: %s', - $snmp_session->error), - $UNKNOWN, q{} ]; + my $msg = sprintf 'SNMP ERROR getting chassis info: %s', + $snmp_session->error; + report('other', $msg, $E_UNKNOWN); } + return; +} - # Get BIOS information. Only if needed - if (($opt{okinfo} >= 1) or ($opt{verbose}) or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms)) { - my %bios_oid - = ( - '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName', - '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName', - ); +# +# Fetch BIOS info via SNMP, put in sysinfo hash +# +sub get_snmp_chassis_bios { + my %bios_oid + = ( + '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName', + '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName', + ); - my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1'; - $result = $snmp_session->get_table(-baseoid => $systemBIOSTable); + my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1'; + my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable); - if (defined $result) { - foreach my $oid (keys %{ $result }) { - if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') { - $sysinfo{biosdate} = $result->{$oid}; - $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms; - } - elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') { - $sysinfo{bios} = $result->{$oid}; - } + if (defined $result) { + foreach my $oid (keys %{ $result }) { + if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') { + $sysinfo{biosdate} = $result->{$oid}; + $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms; + } + elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') { + $sysinfo{bios} = $result->{$oid}; } - } - else { - push @report_other, [ sprintf('SNMP ERROR getting BIOS info: %s', - $snmp_session->error), - $UNKNOWN, q{} ]; } } + else { + my $msg = sprintf 'SNMP ERROR getting BIOS info: %s', + $snmp_session->error; + report('other', $msg, $E_UNKNOWN); + } + return; +} - # Return now if verbose - return if $opt{verbose}; - - # Get OS information. Only if needed - if (($opt{okinfo} >= 3) or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms)) { - my %os_oid - = ( - '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName', - '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName', - ); +# +# Fetch OS info via SNMP, put in sysinfo hash +# +sub get_snmp_system_operatingsystem { + my %os_oid + = ( + '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName', + '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName', + ); - my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1'; - $result = $snmp_session->get_table(-baseoid => $operatingSystemTable); + my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1'; + my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable); - if (defined $result) { - foreach my $oid (keys %{ $result }) { - if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') { - $sysinfo{osname} = ($result->{$oid}); - } - elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') { - $sysinfo{osver} = $result->{$oid}; - } + if (defined $result) { + foreach my $oid (keys %{ $result }) { + if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') { + $sysinfo{osname} = ($result->{$oid}); + } + elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') { + $sysinfo{osver} = $result->{$oid}; } } - else { - push @report_other, [ sprintf('SNMP ERROR getting OS info: %s', - $snmp_session->error), - $UNKNOWN, q{} ]; - } } + else { + my $msg = sprintf 'SNMP ERROR getting OS info: %s', + $snmp_session->error; + report('other', $msg, $E_UNKNOWN); + } + return; +} - # Get OMSA version. Only if needed - if ($opt{okinfo} >= 3) { - my %omsa_oid - = ( - '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName', - ); - my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100'; - $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup); - if (defined $result) { - foreach my $oid (keys %{ $result }) { - if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') { - $sysinfo{om} = ($result->{$oid}); - } +# +# Fetch OMSA version via SNMP, put in sysinfo hash +# +sub get_snmp_about { + my %omsa_oid + = ( + '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName', + ); + my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100'; + my $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup); + if (defined $result) { + foreach my $oid (keys %{ $result }) { + if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') { + $sysinfo{om} = ($result->{$oid}); } } - else { - push @report_other, [ sprintf('SNMP ERROR getting OMSA info: %s', - $snmp_session->error), - $UNKNOWN, q{} ]; - } + } + else { + my $msg = sprintf 'SNMP ERROR getting OMSA info: %s', + $snmp_session->error; + report('other', $msg, $E_UNKNOWN); + } + return; +} + +# +# Collects some information about the system +# +sub get_sysinfo +{ + # Get system model and serial number + $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info(); + + # Get BIOS information. Only if needed + if ( $opt{okinfo} >= 1 + or $opt{debug} + or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) { + $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios(); + } + + # Get OMSA information. Only if needed + if ($opt{okinfo} >= 3 or $opt{debug}) { + $snmp ? get_snmp_about() : get_omreport_about(); + } + + # Return now if debug + return if $opt{debug}; + + # Get OS information. Only if needed + if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) { + $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem(); } return; } + # Helper function for running omreport when the results are strictly # name=value pairs. sub run_omreport_info { @@ -3197,17 +3769,13 @@ my @keys = (); # Run omreport and fetch output - open my $CMD, '-|', "$omreport $command -fmt ssv 2>&1" - or ( push @report_other, [ "Couldn't run $omreport: $!", - $UNKNOWN, q{} ] and return [] ); - my $rawtext = do { local $/ = undef; <$CMD> }; # slurping - close $CMD; + my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1"); # Parse output, store in array for ((split /\n/xms, $rawtext)) { if (m/\A Error/xms) { - push @report_other, [ "Problem running 'omreport $command': $_", - $UNKNOWN, q{} ]; + my $msg = "Problem running 'omreport $command': $_"; + report('other', $msg, $E_UNKNOWN); } next if !m/;/xms; # ignore lines with less than two fields my @vals = split m/;/xms; @@ -3223,7 +3791,7 @@ my @snmp_output = (); my %nrpe_output = (); - if ($opt{'snmp'}) { + if ($snmp) { my %fw_oid = ( '1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType', @@ -3267,7 +3835,7 @@ ); - if ($opt{'snmp'}) { + if ($snmp) { foreach my $out (@snmp_output) { if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') { $sysinfo{'bmc'} = 1; @@ -3301,68 +3869,31 @@ } -# Handy function for checking all storage components -sub check_storage { - check_controllers(); - check_physical_disks(); - check_virtual_disks(); - check_cache_battery(); - check_connectors(); - check_enclosures(); - check_enclosure_fans(); - check_enclosure_pwr(); - check_enclosure_temp(); - check_enclosure_emms(); - return; -} - #===================================================================== # Main program #===================================================================== -# Counters -my $i_count = 0; -my %h_count = ('Ok' => 0, 'Non-Critical' => 0, 'Critical' => 0); - # Here we do the actual checking of components -if (defined $component) { - # Do single selected check - if ($component eq 'storage') { check_storage(); } - elsif ($component eq 'fans') { $i_count = check_fans(); } - elsif ($component eq 'temperature') { $i_count = check_temperatures(); } - elsif ($component eq 'memory') { $i_count = check_memory(); } - elsif ($component eq 'power') { $i_count = check_powersupplies(); } - elsif ($component eq 'cpu') { $i_count = check_processors(); } - elsif ($component eq 'voltage') { $i_count = check_volts(); } - elsif ($component eq 'batteries') { $i_count = check_batteries(); } - elsif ($component eq 'pwrmonitor') { $i_count = check_pwrmonitoring(); } - elsif ($component eq 'intrusion') { $i_count = check_intrusion(); } - elsif ($component eq 'alertlog') { %h_count = %{ check_alertlog() }; } - elsif ($component eq 'esmlog') { %h_count = %{ check_esmlog() }; } - elsif ($component eq 'esmhealth') { check_esmlog_health(); } -} -else { - # Check global status if applicable - if ($opt{global}) { - $globalstatus = check_global(); - } - - # Do multiple selected checks - if ($check{storage}) { check_storage(); } - if ($check{memory}) { check_memory(); } - if ($check{fans}) { check_fans(); } - if ($check{power}) { check_powersupplies(); } - if ($check{temperature}) { check_temperatures(); } - if ($check{cpu}) { check_processors(); } - if ($check{voltage}) { check_volts(); } - if ($check{batteries}) { check_batteries(); } - if ($check{pwrmonitor}) { check_pwrmonitoring(); } - if ($check{intrusion}) { check_intrusion(); } - if ($check{alertlog}) { check_alertlog(); } - if ($check{esmlog}) { check_esmlog(); } - if ($check{esmhealth}) { check_esmlog_health(); } -} +# Check global status if applicable +if ($global) { + $globalstatus = check_global(); +} + +# Do multiple selected checks +if ($check{storage}) { check_storage(); } +if ($check{memory}) { check_memory(); } +if ($check{fans}) { check_fans(); } +if ($check{power}) { check_powersupplies(); } +if ($check{temp}) { check_temperatures(); } +if ($check{cpu}) { check_processors(); } +if ($check{voltage}) { check_volts(); } +if ($check{batteries}) { check_batteries(); } +if ($check{amperage}) { check_pwrmonitoring(); } +if ($check{intrusion}) { check_intrusion(); } +if ($check{alertlog}) { check_alertlog(); } +if ($check{esmlog}) { check_esmlog(); } +if ($check{esmhealth}) { check_esmlog_health(); } #--------------------------------------------------------------------- @@ -3370,29 +3901,34 @@ #--------------------------------------------------------------------- # Counter variable -my %nagios_level_count = ( - 'OK' => 0, - 'WARNING' => 0, - 'CRITICAL' => 0, - 'UNKNOWN' => 0, - ); +%nagios_alert_count + = ( + 'OK' => 0, + 'WARNING' => 0, + 'CRITICAL' => 0, + 'UNKNOWN' => 0, + ); # Get system information -$opt{'snmp'} ? get_snmp_sysinfo() : get_omreport_sysinfo(); +get_sysinfo(); + +# Get firmware info if requested via option if ($opt{okinfo} >= 1) { get_firmware_info(); } # Close SNMP session -if ($opt{snmp}) { +if ($snmp) { $snmp_session->close; } # Print messages -if ($opt{verbose}) { +if ($opt{debug}) { print " System: $sysinfo{model}\n"; - print " ServiceTag: $sysinfo{serial}\n"; - print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}\n"; + print " ServiceTag: $sysinfo{serial}"; + print q{ } x (25 - length $sysinfo{serial}), "OMSA version: $sysinfo{om}\n"; + print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}"; + print q{ } x (25 - length "$sysinfo{bios} $sysinfo{biosdate}"), "Plugin version: $VERSION\n"; if ($#report_storage >= 0) { print "-----------------------------------------------------------------------------\n"; print " Storage Components \n"; @@ -3401,22 +3937,22 @@ print "---------+----------+--------------------------------------------------------\n"; foreach (@report_storage) { my ($msg, $level, $nexus) = @{$_}; - print q{ } x (8 - length $ERRORCODE{$level}) . "$ERRORCODE{$level} | " + print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | " . q{ } x (8 - length $nexus) . "$nexus | $msg\n"; - $nagios_level_count{$ERRORCODE{$level}}++; + $nagios_alert_count{$reverse_exitcode{$level}}++; } } if ($#report_chassis >= 0) { print "-----------------------------------------------------------------------------\n"; print " Chassis Components \n"; print "=============================================================================\n"; - print " STATE | ID | MESSAGE TEXT \n"; + print " STATE | ID | MESSAGE TEXT \n"; print "---------+------+------------------------------------------------------------\n"; foreach (@report_chassis) { my ($msg, $level, $nexus) = @{$_}; - print q{ } x (8 - length $ERRORCODE{$level}) . "$ERRORCODE{$level} | " + print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | " . q{ } x (4 - length $nexus) . "$nexus | $msg\n"; - $nagios_level_count{$ERRORCODE{$level}}++; + $nagios_alert_count{$reverse_exitcode{$level}}++; } } if ($#report_other >= 0) { @@ -3427,8 +3963,8 @@ print "---------+-------------------------------------------------------------------\n"; foreach (@report_other) { my ($msg, $level, $nexus) = @{$_}; - print q{ } x (8 - length $ERRORCODE{$level}) . "$ERRORCODE{$level} | $msg\n"; - $nagios_level_count{$ERRORCODE{$level}}++; + print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n"; + $nagios_alert_count{$reverse_exitcode{$level}}++; } } } @@ -3436,66 +3972,91 @@ my $c = 0; # counter to determine linebreaks # Run through each message, sorted by severity level + ALERT: foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) { my ($msg, $level, $nexus) = @{ $_ }; - next if $level == $OK; + next ALERT if $level == $E_OK; + + if (defined $opt{only}) { + # If user wants only critical alerts + next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING); + + # If user wants only warning alerts + next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL); + } # Prefix with service tag if specified with option '-i|--info' - $msg = "[$sysinfo{serial}] " . $msg if $opt{info}; + if ($opt{info}) { + if (defined $opt{htmlinfo}) { + $msg = '[<a href="' . warranty_url($sysinfo{serial}) + . "\">$sysinfo{serial}</a>] " . $msg; + } + else { + $msg = "[$sysinfo{serial}] " . $msg; + } + } # Prefix with nagios level if specified with option '--state' - $msg = $ERRORCODE{$level} . ": $msg" if $opt{state}; + $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state}; # Prefix with one-letter nagios level if specified with option '--short-state' - $msg = (substr $ERRORCODE{$level}, 0, 1) . ": $msg" if $opt{shortstate}; + $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate}; ($c++ == 0) ? print $msg : print $linebreak, $msg; - $nagios_level_count{$ERRORCODE{$level}}++; + $nagios_alert_count{$reverse_exitcode{$level}}++; } } # Determine our exit code -my $exit_code = $OK; -$exit_code = $UNKNOWN if $nagios_level_count{'UNKNOWN'} > 0; -$exit_code = $WARNING if $nagios_level_count{'WARNING'} > 0; -$exit_code = $CRITICAL if $nagios_level_count{'CRITICAL'} > 0; +$exit_code = $E_OK; +$exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0; +$exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0; +$exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0; # Global status via SNMP.. extra safety check -if ($globalstatus != $OK and $exit_code == $OK) { +if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) { print "OOPS! Something is wrong with this server, but I don't know what. "; - print "The global system health status is $ERRORCODE{$globalstatus}, "; + print "The global system health status is $reverse_exitcode{$globalstatus}, "; print "but every component check is OK. This may be a bug in the Nagios plugin, "; print "please file a bug report.\n"; - exit $UNKNOWN; + exit $E_UNKNOWN; } # Print OK message -if ($exit_code == $OK && defined $component && !$opt{verbose}) { +if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) { my %okmsg - = ( 'storage' => "all storage components ok, $no_of_pdisks physical drives, $no_of_vdisks logical drives", - 'fans' => $i_count == 0 && $blade ? 'blade system with no fan probes' : "all $i_count fans ok", - 'temperature' => "all $i_count temperatures ok", - 'memory' => "all $i_count memory modules ok", - 'power' => $i_count == 0 ? 'no instrumented power supplies found' : "all $i_count power supplies ok", - 'cpu' => "all $i_count processors ok", - 'voltage' => "all $i_count voltage probes ok", - 'batteries' => $i_count == 0 ? 'no batteries found' : "all $i_count batteries ok", - 'pwrmonitor' => $i_count == 0 ? 'no power monitoring probes found' : "all $i_count power monitoring probes ok", - 'intrusion' => $i_count == 0 ? 'no intrusion detection probes found' : "all $i_count intrusion detection probes ok", - 'alertlog' => $opt{snmp} ? 'not supported via snmp' : "all alerts: $h_count{Ok} ok, $h_count{'Non-Critical'} warning and $h_count{Critical} critical", - 'esmlog' => "all esm log entries: $h_count{Ok} ok, $h_count{'Non-Critical'} warning and $h_count{Critical} critical", - 'esmhealth' => "ESM log health ok", + = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives", + 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked", + 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked", + 'memory' => "MEMORY OK - $count{dimm} memory modules checked", + 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked", + 'cpu' => "PROCESSORS OK - $count{cpu} processors checked", + 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked", + 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked", + 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked", + 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked", + 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical", + 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical", + 'esmhealth' => "ESM LOG OK - less than 80% used", ); - print 'OK - ' . $okmsg{$component}; + print $okmsg{$opt{only}}; } -elsif ($exit_code == $OK && !$opt{verbose}) { - printf q{OK - System: '%s', SN: '%s', hardware working fine}, $sysinfo{model}, $sysinfo{serial}; +elsif ($exit_code == $E_OK && !$opt{debug}) { + if (defined $opt{htmlinfo}) { + printf q{OK - System: '<a href="%s">%s</a>', SN: '<a href="%s">%s</a>', hardware working fine}, + documentation_url($sysinfo{model}), $sysinfo{model}, + warranty_url($sysinfo{serial}), $sysinfo{serial}; + } + else { + printf q{OK - System: '%s', SN: '%s', hardware working fine}, + $sysinfo{model}, $sysinfo{serial}; + } if ($check{storage}) { printf ', %d logical drives, %d physical drives', - $no_of_vdisks, $no_of_pdisks; + $count{vdisk}, $count{pdisk}; } else { print ', not checking storage'; @@ -3503,13 +4064,13 @@ if ($opt{okinfo} >= 1) { print $linebreak; - printf q{----- BIOS: '%s %s'}, $sysinfo{bios}, $sysinfo{biosdate}; + printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate}; if ($sysinfo{rac}) { - printf q{, %s: '%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw}; + printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw}; } if ($sysinfo{bmc}) { - printf q{, BMC: '%s'}, $sysinfo{bmc_fw}; + printf q{, BMC='%s'}, $sysinfo{bmc_fw}; } } @@ -3518,12 +4079,16 @@ my @storageprint = (); foreach my $id (sort keys %{ $sysinfo{controller} }) { chomp $sysinfo{controller}{$id}{driver}; - push @storageprint, sprintf q{----- Storage ctrl %s name: '%s', firmware: '%s', driver: '%s'}, + my $msg = sprintf q{----- Ctrl %s [%s]: Fw='%s', Dr='%s'}, $sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name}, $sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver}; + if (defined $sysinfo{controller}{$id}{storport}) { + $msg .= sprintf q{, Storport: '%s'}, $sysinfo{controller}{$id}{storport}; + } + push @storageprint, $msg; } foreach my $id (sort keys %{ $sysinfo{enclosure} }) { - push @storageprint, sprintf q{----- Storage encl %s name: '%s', firmware: '%s'}, + push @storageprint, sprintf q{----- Encl %s [%s]: Fw='%s'}, $sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name}, $sysinfo{enclosure}{$id}->{firmware}; } @@ -3543,8 +4108,15 @@ else { if ($opt{extinfo}) { print $linebreak; - printf '------ SYSTEM: %s, SN: %s', - $sysinfo{model}, $sysinfo{serial}; + if (defined $opt{htmlinfo}) { + printf '------ SYSTEM: <a href="%s">%s</a>, SN: <a href="%s">%s</a>', + documentation_url($sysinfo{model}), $sysinfo{model}, + warranty_url($sysinfo{serial}), $sysinfo{serial}; + } + else { + printf '------ SYSTEM: %s, SN: %s', + $sysinfo{model}, $sysinfo{serial}; + } } if (defined $opt{postmsg}) { my $post = undef; @@ -3552,7 +4124,7 @@ open my $POST, '<', $opt{postmsg} or ( print $linebreak and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n" - and exit $UNKNOWN ); + and exit $E_UNKNOWN ); $post = <$POST>; close $POST; chomp $post; @@ -3568,8 +4140,8 @@ $post =~ s{[%]d}{$sysinfo{biosdate}}gxms; $post =~ s{[%]o}{$sysinfo{osname}}gxms; $post =~ s{[%]r}{$sysinfo{osver}}gxms; - $post =~ s{[%]p}{$no_of_pdisks}gxms; - $post =~ s{[%]l}{$no_of_vdisks}gxms; + $post =~ s{[%]p}{$count{pdisk}}gxms; + $post =~ s{[%]l}{$count{vdisk}}gxms; $post =~ s{[%]n}{$linebreak}gxms; $post =~ s{[%]{2}}{%}gxms; print $post; @@ -3577,683 +4149,39 @@ } } -# Print performance data -if (defined $opt{perfdata} && !$opt{verbose} && %perfdata) { - my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata - print q{| }; - print join $lb, map { "'$_'=$perfdata{$_}" } sort keys %perfdata; +# Print any perl warnings that have occured +if (@perl_warnings) { + foreach (@perl_warnings) { + chop @$_; + print "${linebreak}INTERNAL ERROR: @$_"; + } + $exit_code = $E_UNKNOWN; } -print "\n" if !$opt{verbose}; - -# Exit with proper exit code -exit $exit_code; - - -# Man page created with: -# -# pod2man -s 3pm -r "`./check_openmanage -V | head -n 1`" -c 'Nagios plugin' check_openmanage check_openmanage.3pm -# - -__END__ - -=head1 NAME - -check_openmanage - Nagios plugin for checking the hardware status on - Dell servers running OpenManage - -=head1 SYNOPSIS - -check_openmanage [I<OPTION>]... - -=head1 DESCRIPTION - -check_openmanage is a plugin for Nagios which checks the hardware -health of Dell PowerEdge and PowerVault servers. It uses the Dell -OpenManage Server Administrator (OMSA) software to accomplish this -task. check_openmanage can be used with SNMP or NRPE, whichever suits -your needs and particular taste. The plugin checks the health of the -storage subsystem, power supplies, memory modules, temperature probes -etc., and gives an alert if any of the components are faulty or -operate outside normal parameters. - -check_openmanage is designed to be used by either locally (using NRPE) -or remotely (using SNMP). In either mode, the output is (nearly) the -same. Note that checking the alert log is not supported in SNMP mode. - -=head2 Alternate Basename - -=over 4 - -The normal basename is C<check_openmanage>. With this every component -in the server is checked (modifiable via the B<--check> option). You -can create symbolic links C<check_openmanage_COMPONENT> which changes -the behaviour of the plugin. The C<COMPONENT> part may be one of - -=over 4 - -=item B<storage> - -Only check storage - -=item B<memory> - -Only check memory modules - -=item B<fans> - -Only check fans - -=item B<power> - -Only check power supplies - -=item B<temperature> - -Only check temperatures - -=item B<cpu> - -Only check processors - -=item B<voltage> - -Only check voltage probes - -=item B<batteries> - -Only check batteries - -=item B<pwrmonitor> - -Only check power usage - -=item B<intrusion> - -Only check chassis intrusion - -=item B<esmhealth> - -Only check ESM log overall health, i.e. fill grade - -=item B<esmlog> - -Only check the event log (ESM) content - -=item B<alertlog> - -Only check the alert log content - -=back - -=back - -=head1 OPTIONS - -=head2 General Options - -=over 4 - -=over 4 -=item -t, --timeout I<SECONDS> +# Reset the WARN signal +$SIG{__WARN__} = $original_sigwarn; -The number of seconds after which the plugin will abort. Default -timeout is 30 seconds if the option is not present. - -=item -g, --global - -Check everything except logs. By default log content and chassis -intrusion sensor are skipped. With this option everything will be -checked except for log contents. - -If used with SNMP, the global system health OID is also probed. This -gives an added security against bugs in the plugin. The plugin will -produce an special error message in cases where 1) the global status -is not OK, and 2) a hardware error has not been detected by the rest -of the plugin. - -If used with omreport, i.e. via NRPE or similar, the output from -C<omreport system> is used to find the global chassis health. Note -that storage health is excluded. Not as good as with SNMP, but it -still means added security against plugin bugs. - -This option negates the C<--check> option described below, for all -checks but the esmlog and alertlog. If used with alternate basenames, -the option has no effect. - -=item -p, --perfdata [I<multline>] - -Collect performance data. Performance data collected include -temperatures (in Celcius) and fan speeds (in rpm). On systems that -support it, power consumption is also collected (in Watts). - -If given the argument C<multiline>, the plugin will output the -performance data on multiple lines, for Nagios 3.x and above. - -=item -w, --warning I<STRING> or I<FILE> - -Override the machine-default temperature warning thresholds. Syntax is -C<id1=max[/min],id2=max[/min],...>. The following example sets warning -limits to max 50C for probe 0, and max 45C and min 10C for probe 1: - -check_openmanage -w 0=50,1=45/10 - -The minimum limit can be omitted, if desired. Most often, you are only -interested in setting the maximum thresholds. - -This parameter can be either a string with the limits, or a file -containing the limits string. The option can be specified multiple -times. - -=item -c, --critical I<STRING> or I<FILE> - -Override the machine-default temperature critical thresholds. Syntax -and behaviour is the same as for warning thresholds described above. - -=item -o, --ok-info I<NUMBER> - -This option lets you define how much output you want the plugin to -give when everything is OK, i.e. the verbosity level. The default -value is 0 (one line of output). The output levels are cumulative. - -=over 4 - -=item B<0> - -- Only one line (default) - -=item B<1> - -- BIOS and firmware info on a separate line - -=item B<2> - -- Storage controller and enclosure info on separate lines - -=item B<3> - -- OMSA version on separate line - -=back - -The reason that OMSA version is separated from the rest is that -finding it requires running a really slow omreport command, when the -plugin is run locally via NRPE. - -=item -i, --info - -Prefix any alerts with the service tag. - -=item -e, --extinfo - -Display a short summary of system information (model and service tag) -in case of an alert. - -=item --postmsg I<STRING> or I<FILE> - -User specified post message. Useful for displaying arbitrary or -various system information at the end of alerts. The argument is -either a string with the message, or a file containing that -string. You can control the format with the following interpreted -sequences: - -=over 4 - -=item B<%m> - -System model - -=item B<%s> - -Service tag - -=item B<%b> - -BIOS version - -=item B<%d> - -BIOS release date - -=item B<%o> - -Operating system name - -=item B<%r> - -Operating system release - -=item B<%p> - -Number of physical drives - -=item B<%l> - -Number of logical drives - -=item B<%n> - -Line break. Will be a regular line break if run from a TTY, else an -HTML line break. - -=item B<%%> - -A literal C<%> - -=back - -=item --state - -Prefix each alert with its corresponding service state (i.e. warning, -critical etc.). This is useful in case of several alerts from the same -monitored system. - -=item --short-state - -Same as the B<--state> option above, except that the state is -abbreviated to a single letter (W=warning, C=critical etc.). - -=item --linebreak=I<STRING> - -check_openmanage will sometimes report more than one line, e.g. if -there are several alerts. If the script has a TTY, it will use regular -linebreaks. If not (which is the case with NRPE) it will use HTML -linebreaks. Sometimes it can be useful to control what the plugin uses -as a line separator, and this option provides that control. - -The argument is the exact string to be used as the line -separator. There are two exceptions, i.e. two keywords that translates -to the following: - -=over 4 - -=item B<REG> - -Regular linebreaks, i.e. "\n". - -=item B<HTML> - -HTML linebreaks, i.e. "<br/>". - -=back - -This is a rather special option that is normally not needed. The -default behaviour should be sufficient for most users. - -=item -v, --verbose - -Verbose output. Will report status on everything, even if status is -ok. Blacklisted or unchecked components are ignored (i.e. no output). - -=item -h, --help - -Display help text. - -=item -m, --man - -Display man page. - -=item -V, --version - -Display version info. - -=back - -=back - -=head2 SNMP Options - -=over 4 - -=over 4 - -=item -s, --snmp - -Trigger SNMP mode. - -=item -H, --hostname I<HOSTNAME> - -The transport address of the destination SNMP device. This argument -is optional and defaults to C<localhost>. - -=item -P, --protocol I<PROTOCOL> - -SNMP protocol version. This option is optional and expects a digit -(i.e. C<1>, C<2> or C<3>) to define the SNMP version. The default is -C<2>, i.e. SNMP version 2c. - -=item -C, --community I<COMMUNITY> - -This option expects a string that is to be used as the SNMP community -name when using SNMP version 1 or 2c. By default the community name -is set to C<public> if the option is not present. - -=item --port I<PORT> - -SNMP port of the remote (monitored) system. Defaults to the well-known -SNMP port 161. - -=item -U, --username I<SECURITYNAME> - -[SNMPv3] The User-based Security Model (USM) used by SNMPv3 requires -that a securityName be specified. This option is required when using -SNMP version 3, and expects a string 1 to 32 octets in lenght. - -=item --authpassword I<PASSWORD>, --authkey I<KEY> - -[SNMPv3] By default a securityLevel of C<noAuthNoPriv> is assumed. If -the --authpassword option is specified, the securityLevel becomes -C<authNoPriv>. The --authpassword option expects a string which is at -least 1 octet in length as argument. - -Optionally, instead of the --authpassword option, the --authkey option -can be used so that a plain text password does not have to be -specified in a script. The --authkey option expects a hexadecimal -string produced by localizing the password with the -authoritativeEngineID for the specific destination device. The -C<snmpkey> utility included with the Net::SNMP distribution can be -used to create the hexadecimal string (see L<snmpkey>). - -=item --authprotocol I<ALGORITHM> - -[SNMPv3] Two different hash algorithms are defined by SNMPv3 which can -be used by the Security Model for authentication. These algorithms are -HMAC-MD5-96 C<MD5> (RFC 1321) and HMAC-SHA-96 C<SHA-1> (NIST FIPS PUB -180-1). The default algorithm used by the plugin is HMAC-MD5-96. This -behavior can be changed by using this option. The option expects -either the string C<md5> or C<sha> to be passed as argument to modify -the hash algorithm. - -=item --privpassword I<PASSWORD>, --privkey I<KEY> - -[SNMPv3] By specifying the options --privkey or --privpassword, the -securityLevel associated with the object becomes -C<authPriv>. According to SNMPv3, privacy requires the use of -authentication. Therefore, if either of these two options are present -and the --authkey or --authpassword arguments are missing, the -creation of the object fails. The --privkey and --privpassword -options expect the same input as the --authkey and --authpassword -options respectively. - -=item --privprotocol I<ALGORITHM> - -[SNMPv3] The User-based Security Model described in RFC 3414 defines a -single encryption protocol to be used for privacy. This protocol, -CBC-DES C<DES> (NIST FIPS PUB 46-1), is used by default or if the -string C<des> is passed to the --privprotocol option. The Net::SNMP -module also supports RFC 3826 which describes the use of -CFB128-AES-128 C<AES> (NIST FIPS PUB 197) in the USM. The AES -encryption protocol can be selected by passing C<aes> or C<aes128> to -the --privprotocol option. - -One of the following arguments are required: des, aes, aes128, 3des, -3desde - -=back - -=back - -=head2 Blacklisting - -=over 4 - -=over 4 - -=item -b, --blacklist I<STRING> or I<FILE> - -Blacklist missing and/or failed components, if you do not plan to fix -them. The parameter is either the blacklist string, or a file (that -may or may not exist) containing the string. The blacklist string -contains component names with component IDs separated by slash -(/). Blacklisted components are left unchecked. - -TIP: Use the option C<-v> (or C<--verbose>) to get the blacklist ID for -devices. The ID is listed in a separate column in the verbose output. - -=over 9 - -=item B<Syntax:> - -component1=id1[,id2,...]/component2=id1[,id2,...]/... - -=item B<Example:> - -check_openmanage -b ps=0/fan=3,5/pdisk=1:0:0:1 - -=back - -In the example we blacklist powersupply 0, fans 3 and 5, and -physical disk 1:0:0:1. Legal component names include: - -=over 8 - -=item B<ctrl> - -Controller - -=item B<ctrl_fw> - -Suppress the special warning message about old controller -firmware. Use this if you can not or will not upgrade the firmware. - -=item B<ctrl_driver> - -Suppress the special warning message about old controller driver. -Particularly useful on systems where you can not upgrade the driver. - -=item B<pdisk> - -Physical disk. - -=item B<vdisk> - -Logical drive (virtual disk) - -=item B<bat> - -Controller cache battery - -=item B<conn> - -Connector (channel) - -=item B<encl> - -Enclosure - -=item B<encl_fan> - -Enclosure fan - -=item B<encl_ps> - -Enclosure power supply - -=item B<encl_temp> - -Enclosure temperature probe - -=item B<encl_emm> - -Enclosure management module (EMM) - -=item B<dimm> - -Memory module - -=item B<fan> - -Fan - -=item B<ps> - -Powersupply - -=item B<temp> - -Temperature sensor - -=item B<cpu> - -Processor (CPU) - -=item B<volt> - -Voltage probe - -=item B<bp> - -System battery - -=item B<pm> - -Amperage probe (power consumption monitoring) - -=item B<intr> - -Intrusion sensor - -=back - -=back - -=back - -=head2 Check Control - -=over 4 - -=over 4 - -=item --check I<STRING> or I<FILE> - -This parameter allows you to adjust which components that should be -checked at all. This is a rougher approach than blacklisting, which -require that you specify component id or index. The parameter should -be either a string containing the adjustments, or a file containing -the string. No errors are raised if the file does not exist. - -Note: This option is ignored with alternate basenames. - -=over 9 - -=item B<Example:> - -check_openmanage --check storage=0,intrusion=1 - -=back - -Legal values are described below, along with the default value. - -=over 4 - -=item B<storage> - -Check storage subsystem (controllers, disks etc.). Default: ON - -=item B<memory> - -Check memory (dimms). Default: ON - -=item B<fans> - -Check chassis fans. Default: ON - -=item B<power> - -Check power supplies. Default: ON - -=item B<temperature> - -Check temperature sensors. Default: ON - -=item B<cpu> - -Check CPUs. Default: ON - -=item B<voltage> - -Check voltage sensors. Default: ON - -=item B<batteries> - -Check system batteries. Default: ON - -=item B<pwrmonitor> - -Check power consumption monitoring. Default: ON - -=item B<intrusion> - -Check chassis intrusion. Default: OFF - -=item B<esmhealth> - -Check the ESM log health, i.e. fill grade. Default: ON - -=item B<esmlog> - -Check the ESM log content. Default: OFF - -=item B<alertlog> - -Check the alert log content. Default: OFF - -=back - -=back - -=back - -=head1 DIAGNOSTICS - -The option C<--verbose> (or C<-v>) can be specified to display all -monitored components. - -=head1 DEPENDENCIES - -If SNMP is requested, the perl module Net::SNMP is -required. Otherwise, only a regular perl distribution is required to -run the script. On the target (monitored) system, Dell Openmanage -Server Administrator (OMSA) must be installed and running. - -=head1 EXIT STATUS - -If no errors are discovered, a value of 0 (OK) is returned. An exit -value of 1 (WARNING) signifies one or more non-critical errors, while -2 (CRITICAL) signifies one or more critical errors. - -The exit value 3 (UNKNOWN) is reserved for errors within the script, -or errors getting values from Dell OMSA. - -=head1 AUTHOR - -Written by Trond H. Amundsen <t.h.amundsen@usit.uio.no> - -=head1 BUGS AND LIMITATIONS - -Storage info is not collected or checked on very old PowerEdge models -and/or old OMSA versions, due to limitations in OMSA. The overall -support on those models/versions by this plugin is not well tested. - -=head1 INCOMPATIBILITIES - -The plugin does not work with the Nagios embedded perl interpreter -(ePN). You should specify C<perl /path/to/check_openmanage> in your -Nagios config if you have ePN enabled. - -=head1 REPORTING BUGS - -Report bugs to <t.h.amundsen@usit.uio.no> - -=head1 LICENSE AND COPYRIGHT +# Print performance data +if (defined $opt{perfdata} && !$opt{debug} && %perfdata) { + my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata + print q{|}; -This nagios plugin comes with ABSOLUTELY NO WARRANTY. -You may redistribute copies of this plugin under the terms of -the GNU General Public License L<http://www.gnu.org/licenses/gpl.html>. + sub perfdata { + my %order + = ( + fan => 0, + pwr => 1, + temp => 2, + enclosure => 3, + ); + return ($order{(split /_/, $a, 2)[0]} cmp $order{(split /_/, $b, 2)[0]}) || $a cmp $b; + } -=head1 SEE ALSO + print join $lb, map { "'$_'=$perfdata{$_}" } sort perfdata keys %perfdata; +} -L<http://folk.uio.no/trondham/software/check_openmanage.html> +# Print a linebreak at the end +print "\n" if !$opt{debug}; -=cut +# Exit with proper exit code +exit $exit_code; | ||
[+] | Added | check_openmanage-3.5.5.tar.gz/check_openmanage.8 ^ |
@@ -0,0 +1,680 @@ +.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sh \" Subsection heading +.br +.if t .Sp +.ne 5 +.PP +\fB\\$1\fR +.PP +.. +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. | will give a +.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to +.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' +.\" expand to `' in nroff, nothing in troff, for use with C<>. +.tr \(*W-|\(bv\*(Tr +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.\" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.hy 0 +.if n .na +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "CHECK_OPENMANAGE 8" +.TH CHECK_OPENMANAGE 8 "2010-01-13" "check_openmanage 3.5.5" "Nagios plugin" +.SH "NAME" +check_openmanage \- Nagios plugin for checking the hardware status on + Dell servers running OpenManage +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +check_openmanage [\fI\s-1OPTION\s0\fR]... +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +check_openmanage is a plugin for Nagios which checks the hardware +health of Dell servers running OpenManage Server Administrator +(\s-1OMSA\s0). The plugin checks the health of the storage subsystem, power +supplies, memory modules, temperature probes etc., and gives an alert +if any of the components are faulty or operate outside normal +parameters. +.PP +check_openmanage is designed to be used by either locally (using \s-1NRPE\s0 +or similar) or remotely (using \s-1SNMP\s0). In either mode, the output is +(nearly) the same. Note that checking the alert log is not supported +in \s-1SNMP\s0 mode. +.SH "GENERAL OPTIONS" +.IX Header "GENERAL OPTIONS" +.IP "\-t, \-\-timeout \fI\s-1SECONDS\s0\fR" 4 +.IX Item "-t, --timeout SECONDS" +The number of seconds after which the plugin will abort. Default +timeout is 30 seconds if the option is not present. +.IP "\-p, \-\-perfdata [\fImultline\fR]" 4 +.IX Item "-p, --perfdata [multline]" +Collect performance data. Performance data collected include +temperatures (in Celcius) and fan speeds (in rpm). On systems that +support it, power consumption is also collected (in Watts). +.Sp +If given the argument \f(CW\*(C`multiline\*(C'\fR, the plugin will output the +performance data on multiple lines, for Nagios 3.x and above. +.IP "\-w, \-\-warning \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 +.IX Item "-w, --warning STRING or FILE" +Override the machine-default temperature warning thresholds. Syntax is +\&\f(CW\*(C`id1=max[/min],id2=max[/min],...\*(C'\fR. The following example sets warning +limits to max 50C for probe 0, and max 45C and min 10C for probe 1: +.Sp +check_openmanage \-w 0=50,1=45/10 +.Sp +The minimum limit can be omitted, if desired. Most often, you are only +interested in setting the maximum thresholds. +.Sp +This parameter can be either a string with the limits, or a file +containing the limits string. The option can be specified multiple +times. +.IP "\-c, \-\-critical \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 +.IX Item "-c, --critical STRING or FILE" +Override the machine-default temperature critical thresholds. Syntax +and behaviour is the same as for warning thresholds described above. +.IP "\-o, \-\-ok\-info \fI\s-1NUMBER\s0\fR" 4 +.IX Item "-o, --ok-info NUMBER" +This option lets you define how much output you want the plugin to +give when everything is \s-1OK\s0, i.e. the verbosity level. The default +value is 0 (one line of output). The output levels are cumulative. +.RS 4 +.IP "\fB0\fR" 4 +.IX Item "0" +\&\- Only one line (default) +.IP "\fB1\fR" 4 +.IX Item "1" +\&\- \s-1BIOS\s0 and firmware info on a separate line +.IP "\fB2\fR" 4 +.IX Item "2" +\&\- Storage controller and enclosure info on separate lines +.IP "\fB3\fR" 4 +.IX Item "3" +\&\- \s-1OMSA\s0 version on separate line +.RE +.RS 4 +.Sp +The reason that \s-1OMSA\s0 version is separated from the rest is that +finding it requires running a really slow omreport command, when the +plugin is run locally via \s-1NRPE\s0. +.RE +.IP "\-\-omreport \fI\s-1OMREPORT\s0 \s-1PATH\s0\fR" 4 +.IX Item "--omreport OMREPORT PATH" +Specify full path to omreport, if it is not installed in any of the +regular places. Usually this option is only needed on Windows, if +omreport is not installed on the C: drive. +.IP "\-i, \-\-info" 4 +.IX Item "-i, --info" +Prefix any alerts with the service tag. +.IP "\-e, \-\-extinfo" 4 +.IX Item "-e, --extinfo" +Display a short summary of system information (model and service tag) +in case of an alert. +.IP "\-\-htmlinfo [\fI\s-1CODE\s0\fR]" 4 +.IX Item "--htmlinfo [CODE]" +Using this option will make the servicetag and model name into +clickable \s-1HTML\s0 links in the output. The model name link will point to +the official Dell documentation for that model, while the servicetag +link will point to a website containing support info for that +particular server. +.Sp +This option takes an optional argument, which should be your country +code or \f(CW\*(C`me\*(C'\fR for the middle east. If the country code is omitted the +servicetag link will still work, but it will not be speficic for your +country or area. Example for Germany: +.Sp +.Vb 1 +\& check_openmanage --htmlinfo de +.Ve +.Sp +If this option is used together with either the \fI\-\-extinfo\fR or +\&\fI\-\-info\fR options, it is particularly useful. Only the most common +country codes is supported at this time. +.IP "\-\-postmsg \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 +.IX Item "--postmsg STRING or FILE" +User specified post message. Useful for displaying arbitrary or +various system information at the end of alerts. The argument is +either a string with the message, or a file containing that +string. You can control the format with the following interpreted +sequences: +.RS 4 +.IP "\fB%m\fR" 4 +.IX Item "%m" +System model +.IP "\fB%s\fR" 4 +.IX Item "%s" +Service tag +.IP "\fB%b\fR" 4 +.IX Item "%b" +\&\s-1BIOS\s0 version +.IP "\fB%d\fR" 4 +.IX Item "%d" +\&\s-1BIOS\s0 release date +.IP "\fB%o\fR" 4 +.IX Item "%o" +Operating system name +.IP "\fB%r\fR" 4 +.IX Item "%r" +Operating system release +.IP "\fB%p\fR" 4 +.IX Item "%p" +Number of physical drives +.IP "\fB%l\fR" 4 +.IX Item "%l" +Number of logical drives +.IP "\fB%n\fR" 4 +.IX Item "%n" +Line break. Will be a regular line break if run from a \s-1TTY\s0, else an +\&\s-1HTML\s0 line break. +.IP "\fB%%\fR" 4 +.IX Item "%%" +A literal \f(CW\*(C`%\*(C'\fR +.RE +.RS 4 +.RE +.IP "\-s, \-\-state" 4 +.IX Item "-s, --state" +Prefix each alert with its corresponding service state (i.e. warning, +critical etc.). This is useful in case of several alerts from the same +monitored system. +.IP "\-\-short\-state" 4 +.IX Item "--short-state" +Same as the \fB\-\-state\fR option above, except that the state is +abbreviated to a single letter (W=warning, C=critical etc.). +.IP "\-\-linebreak \fI\s-1STRING\s0\fR" 4 +.IX Item "--linebreak STRING" +check_openmanage will sometimes report more than one line, e.g. if +there are several alerts. If the script has a \s-1TTY\s0, it will use regular +linebreaks. If not (which is the case with \s-1NRPE\s0) it will use \s-1HTML\s0 +linebreaks. Sometimes it can be useful to control what the plugin uses +as a line separator, and this option provides that control. +.Sp +The argument is the exact string to be used as the line +separator. There are two exceptions, i.e. two keywords that translates +to the following: +.RS 4 +.IP "\fB\s-1REG\s0\fR" 4 +.IX Item "REG" +Regular linebreaks, i.e. \*(L"\en\*(R". +.IP "\fB\s-1HTML\s0\fR" 4 +.IX Item "HTML" +\&\s-1HTML\s0 linebreaks, i.e. \*(L"<br/>\*(R". +.RE +.RS 4 +.Sp +This is a rather special option that is normally not needed. The +default behaviour should be sufficient for most users. +.RE +.IP "\-d, \-\-debug" 4 +.IX Item "-d, --debug" +Debug output. Will report status on everything, even if status is +ok. Blacklisted or unchecked components are ignored (i.e. no output). +.Sp +\&\s-1NOTE:\s0 This option is intended for diagnostics and debugging purposes +only. Do not use this option from within Nagios, i.e. in the Nagios +config. +.IP "\-h, \-\-help" 4 +.IX Item "-h, --help" +Display help text. +.IP "\-V, \-\-version" 4 +.IX Item "-V, --version" +Display version info. +.SH "SNMP OPTIONS" +.IX Header "SNMP OPTIONS" +.IP "\-H, \-\-hostname \fI\s-1HOSTNAME\s0\fR" 4 +.IX Item "-H, --hostname HOSTNAME" +The transport address of the destination \s-1SNMP\s0 device. Using this +option triggers \s-1SNMP\s0 mode. +.IP "\-P, \-\-protocol \fI\s-1PROTOCOL\s0\fR" 4 +.IX Item "-P, --protocol PROTOCOL" +\&\s-1SNMP\s0 protocol version. This option is optional and expects a digit +(i.e. \f(CW1\fR, \f(CW2\fR or \f(CW3\fR) to define the \s-1SNMP\s0 version. The default is +\&\f(CW2\fR, i.e. \s-1SNMP\s0 version 2c. +.IP "\-C, \-\-community \fI\s-1COMMUNITY\s0\fR" 4 +.IX Item "-C, --community COMMUNITY" +This option expects a string that is to be used as the \s-1SNMP\s0 community +name when using \s-1SNMP\s0 version 1 or 2c. By default the community name +is set to \f(CW\*(C`public\*(C'\fR if the option is not present. +.IP "\-\-port \fI\s-1PORT\s0\fR" 4 +.IX Item "--port PORT" +\&\s-1SNMP\s0 port of the remote (monitored) system. Defaults to the well-known +\&\s-1SNMP\s0 port 161. +.IP "\-U, \-\-username \fI\s-1SECURITYNAME\s0\fR" 4 +.IX Item "-U, --username SECURITYNAME" +[SNMPv3] The User-based Security Model (\s-1USM\s0) used by SNMPv3 requires +that a securityName be specified. This option is required when using +\&\s-1SNMP\s0 version 3, and expects a string 1 to 32 octets in lenght. +.IP "\-\-authpassword \fI\s-1PASSWORD\s0\fR, \-\-authkey \fI\s-1KEY\s0\fR" 4 +.IX Item "--authpassword PASSWORD, --authkey KEY" +[SNMPv3] By default a securityLevel of \f(CW\*(C`noAuthNoPriv\*(C'\fR is assumed. If +the \-\-authpassword option is specified, the securityLevel becomes +\&\f(CW\*(C`authNoPriv\*(C'\fR. The \-\-authpassword option expects a string which is at +least 1 octet in length as argument. +.Sp +Optionally, instead of the \-\-authpassword option, the \-\-authkey option +can be used so that a plain text password does not have to be +specified in a script. The \-\-authkey option expects a hexadecimal +string produced by localizing the password with the +authoritativeEngineID for the specific destination device. The +\&\f(CW\*(C`snmpkey\*(C'\fR utility included with the Net::SNMP distribution can be +used to create the hexadecimal string (see snmpkey). +.IP "\-\-authprotocol \fI\s-1ALGORITHM\s0\fR" 4 +.IX Item "--authprotocol ALGORITHM" +[SNMPv3] Two different hash algorithms are defined by SNMPv3 which can +be used by the Security Model for authentication. These algorithms are +\&\s-1HMAC\-MD5\-96\s0 \f(CW\*(C`MD5\*(C'\fR (\s-1RFC\s0 1321) and \s-1HMAC\-SHA\-96\s0 \f(CW\*(C`SHA\-1\*(C'\fR (\s-1NIST\s0 \s-1FIPS\s0 \s-1PUB\s0 +180\-1). The default algorithm used by the plugin is \s-1HMAC\-MD5\-96\s0. This +behavior can be changed by using this option. The option expects +either the string \f(CW\*(C`md5\*(C'\fR or \f(CW\*(C`sha\*(C'\fR to be passed as argument to modify +the hash algorithm. +.IP "\-\-privpassword \fI\s-1PASSWORD\s0\fR, \-\-privkey \fI\s-1KEY\s0\fR" 4 +.IX Item "--privpassword PASSWORD, --privkey KEY" +[SNMPv3] By specifying the options \-\-privkey or \-\-privpassword, the +securityLevel associated with the object becomes +\&\f(CW\*(C`authPriv\*(C'\fR. According to SNMPv3, privacy requires the use of +authentication. Therefore, if either of these two options are present +and the \-\-authkey or \-\-authpassword arguments are missing, the +creation of the object fails. The \-\-privkey and \-\-privpassword +options expect the same input as the \-\-authkey and \-\-authpassword +options respectively. +.IP "\-\-privprotocol \fI\s-1ALGORITHM\s0\fR" 4 +.IX Item "--privprotocol ALGORITHM" +[SNMPv3] The User-based Security Model described in \s-1RFC\s0 3414 defines a +single encryption protocol to be used for privacy. This protocol, +CBC-DES \f(CW\*(C`DES\*(C'\fR (\s-1NIST\s0 \s-1FIPS\s0 \s-1PUB\s0 46\-1), is used by default or if the +string \f(CW\*(C`des\*(C'\fR is passed to the \-\-privprotocol option. The Net::SNMP +module also supports \s-1RFC\s0 3826 which describes the use of +\&\s-1CFB128\-AES\-128\s0 \f(CW\*(C`AES\*(C'\fR (\s-1NIST\s0 \s-1FIPS\s0 \s-1PUB\s0 197) in the \s-1USM\s0. The \s-1AES\s0 +encryption protocol can be selected by passing \f(CW\*(C`aes\*(C'\fR or \f(CW\*(C`aes128\*(C'\fR to +the \-\-privprotocol option. +.Sp +One of the following arguments are required: des, aes, aes128, 3des, +3desde +.SH "BLACKLISTING" +.IX Header "BLACKLISTING" +.IP "\-b, \-\-blacklist \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 +.IX Item "-b, --blacklist STRING or FILE" +Blacklist missing and/or failed components, if you do not plan to fix +them. The parameter is either the blacklist string, or a file (that +may or may not exist) containing the string. The blacklist string +contains component names with component IDs separated by slash +(/). Blacklisted components are left unchecked. +.Sp +\&\s-1TIP:\s0 Use the option \f(CW\*(C`\-d\*(C'\fR (or \f(CW\*(C`\-\-debug\*(C'\fR) to get the blacklist \s-1ID\s0 for +devices. The \s-1ID\s0 is listed in a separate column in the debug output. +.Sp +\&\s-1NOTE:\s0 If blacklisting is in effect, the global health of the system is +not checked. +.RS 4 +.IP "\fBSyntax:\fR" 9 +.IX Item "Syntax:" +component1=id1[,id2,...]/component2=id1[,id2,...]/... +.Sp +The \s-1ID\s0 part can also be \f(CW\*(C`all\*(C'\fR, in which all components of that type +is blacklisted. +.IP "\fBExample:\fR" 9 +.IX Item "Example:" +check_openmanage \-b ps=0/fan=3,5/pdisk=1:0:0:1/ctrl_driver=all +.RE +.RS 4 +.Sp +In the example we blacklist powersupply 0, fans 3 and 5, physical disk +1:0:0:1, and warnings about out-of-date drivers for all +controllers. Legal component names include: +.IP "\fBctrl\fR" 8 +.IX Item "ctrl" +Storage controller. Note that if a controller is blacklisted, all +components on that controller (such as physical and logical drives) +are blacklisted as well. +.IP "\fBctrl_fw\fR" 8 +.IX Item "ctrl_fw" +Suppress the special warning message about old controller +firmware. Use this if you can not or will not upgrade the firmware. +.IP "\fBctrl_driver\fR" 8 +.IX Item "ctrl_driver" +Suppress the special warning message about old controller driver. +Particularly useful on systems where you can not upgrade the driver. +.IP "\fBctrl_stdr\fR" 8 +.IX Item "ctrl_stdr" +Suppress the special warning message about old Storport driver on +Windows. +.IP "\fBpdisk\fR" 8 +.IX Item "pdisk" +Physical disk. +.IP "\fBvdisk\fR" 8 +.IX Item "vdisk" +Logical drive (virtual disk) +.IP "\fBbat\fR" 8 +.IX Item "bat" +Controller cache battery +.IP "\fBbat_charge\fR" 8 +.IX Item "bat_charge" +Ignore warnings related to the controller cache battery charging +cycle, which happens approximately every 40 days on Dell servers. Note +that using this blacklist keyword makes check_openmanage ignore +non-critical cache battery errors. +.IP "\fBconn\fR" 8 +.IX Item "conn" +Connector (channel) +.IP "\fBencl\fR" 8 +.IX Item "encl" +Enclosure +.IP "\fBencl_fan\fR" 8 +.IX Item "encl_fan" +Enclosure fan +.IP "\fBencl_ps\fR" 8 +.IX Item "encl_ps" +Enclosure power supply +.IP "\fBencl_temp\fR" 8 +.IX Item "encl_temp" +Enclosure temperature probe +.IP "\fBencl_emm\fR" 8 +.IX Item "encl_emm" +Enclosure management module (\s-1EMM\s0) +.IP "\fBdimm\fR" 8 +.IX Item "dimm" +Memory module +.IP "\fBfan\fR" 8 +.IX Item "fan" +Fan +.IP "\fBps\fR" 8 +.IX Item "ps" +Powersupply +.IP "\fBtemp\fR" 8 +.IX Item "temp" +Temperature sensor +.IP "\fBcpu\fR" 8 +.IX Item "cpu" +Processor (\s-1CPU\s0) +.IP "\fBvolt\fR" 8 +.IX Item "volt" +Voltage probe +.IP "\fBbp\fR" 8 +.IX Item "bp" +System battery +.IP "\fBamp\fR" 8 +.IX Item "amp" +Amperage probe (power consumption monitoring) +.IP "\fBintr\fR" 8 +.IX Item "intr" +Intrusion sensor +.RE +.RS 4 +.RE +.SH "CHECK CONTROL" +.IX Header "CHECK CONTROL" +.IP "\-\-only \fI\s-1KEYWORD\s0\fR" 4 +.IX Item "--only KEYWORD" +This option can be specifed once and expects a keyword. The different +keywords and the behaviour of check_openmanage is described below. +.RS 4 +.IP "\fBcritical\fR" 4 +.IX Item "critical" +Print only critical alerts. With this option any warning alerts are +suppressed. +.IP "\fBwarning\fR" 4 +.IX Item "warning" +Print only warning alerts. With this option any critical alerts are +suppressed. +.IP "\fBchassis\fR" 4 +.IX Item "chassis" +Check all chassis components and nothing else. +.IP "\fBstorage\fR" 4 +.IX Item "storage" +Only check storage +.IP "\fBmemory\fR" 4 +.IX Item "memory" +Only check memory modules +.IP "\fBfans\fR" 4 +.IX Item "fans" +Only check fans +.IP "\fBpower\fR" 4 +.IX Item "power" +Only check power supplies +.IP "\fBtemp\fR" 4 +.IX Item "temp" +Only check temperatures +.IP "\fBcpu\fR" 4 +.IX Item "cpu" +Only check processors +.IP "\fBvoltage\fR" 4 +.IX Item "voltage" +Only check voltage probes +.IP "\fBbatteries\fR" 4 +.IX Item "batteries" +Only check batteries +.IP "\fBamperage\fR" 4 +.IX Item "amperage" +Only check power usage +.IP "\fBintrusion\fR" 4 +.IX Item "intrusion" +Only check chassis intrusion +.IP "\fBesmhealth\fR" 4 +.IX Item "esmhealth" +Only check \s-1ESM\s0 log overall health, i.e. fill grade +.IP "\fBesmlog\fR" 4 +.IX Item "esmlog" +Only check the event log (\s-1ESM\s0) content +.IP "\fBalertlog\fR" 4 +.IX Item "alertlog" +Only check the alert log content +.RE +.RS 4 +.RE +.IP "\-\-check \fI\s-1STRING\s0\fR or \fI\s-1FILE\s0\fR" 4 +.IX Item "--check STRING or FILE" +This parameter allows you to adjust which components that should be +checked at all. This is a rougher approach than blacklisting, which +require that you specify component id or index. The parameter should +be either a string containing the adjustments, or a file containing +the string. No errors are raised if the file does not exist. +.Sp +Note: This option is ignored with alternate basenames. +.RS 4 +.IP "\fBExample:\fR" 9 +.IX Item "Example:" +check_openmanage \-\-check storage=0,intrusion=1 +.RE +.RS 4 +.Sp +Legal values are described below, along with the default value. +.IP "\fBstorage\fR" 4 +.IX Item "storage" +Check storage subsystem (controllers, disks etc.). Default: \s-1ON\s0 +.IP "\fBmemory\fR" 4 +.IX Item "memory" +Check memory (dimms). Default: \s-1ON\s0 +.IP "\fBfans\fR" 4 +.IX Item "fans" +Check chassis fans. Default: \s-1ON\s0 +.IP "\fBpower\fR" 4 +.IX Item "power" +Check power supplies. Default: \s-1ON\s0 +.IP "\fBtemp\fR" 4 +.IX Item "temp" +Check temperature sensors. Default: \s-1ON\s0 +.IP "\fBcpu\fR" 4 +.IX Item "cpu" +Check CPUs. Default: \s-1ON\s0 +.IP "\fBvoltage\fR" 4 +.IX Item "voltage" +Check voltage sensors. Default: \s-1ON\s0 +.IP "\fBbatteries\fR" 4 +.IX Item "batteries" +Check system batteries. Default: \s-1ON\s0 +.IP "\fBamperage\fR" 4 +.IX Item "amperage" +Check amperage probes. Default: \s-1ON\s0 +.IP "\fBintrusion\fR" 4 +.IX Item "intrusion" +Check chassis intrusion. Default: \s-1ON\s0 +.IP "\fBesmhealth\fR" 4 +.IX Item "esmhealth" +Check the \s-1ESM\s0 log health, i.e. fill grade. Default: \s-1ON\s0 +.IP "\fBesmlog\fR" 4 +.IX Item "esmlog" +Check the \s-1ESM\s0 log content. Default: \s-1OFF\s0 +.IP "\fBalertlog\fR" 4 +.IX Item "alertlog" +Check the alert log content. Default: \s-1OFF\s0 +.RE +.RS 4 +.RE +.SH "DIAGNOSTICS" +.IX Header "DIAGNOSTICS" +The option \f(CW\*(C`\-\-debug\*(C'\fR (or \f(CW\*(C`\-d\*(C'\fR) can be specified to display all +monitored components. +.SH "DEPENDENCIES" +.IX Header "DEPENDENCIES" +If \s-1SNMP\s0 is requested, the perl module Net::SNMP is +required. Otherwise, only a regular perl distribution is required to +run the script. On the target (monitored) system, Dell Openmanage +Server Administrator (\s-1OMSA\s0) must be installed and running. +.SH "EXIT STATUS" +.IX Header "EXIT STATUS" +If no errors are discovered, a value of 0 (\s-1OK\s0) is returned. An exit +value of 1 (\s-1WARNING\s0) signifies one or more non-critical errors, while +2 (\s-1CRITICAL\s0) signifies one or more critical errors. +.PP +The exit value 3 (\s-1UNKNOWN\s0) is reserved for errors within the script, +or errors getting values from Dell \s-1OMSA\s0. +.SH "AUTHOR" +.IX Header "AUTHOR" +Written by Trond H. Amundsen <t.h.amundsen@usit.uio.no> +.SH "BUGS AND LIMITATIONS" +.IX Header "BUGS AND LIMITATIONS" +Storage info is not collected or checked on very old PowerEdge models +and/or old \s-1OMSA\s0 versions, due to limitations in \s-1OMSA\s0. The overall +support on those models/versions by this plugin is not well tested. +.SH "INCOMPATIBILITIES" +.IX Header "INCOMPATIBILITIES" +The plugin should work with the Nagios embedded perl interpreter +(ePN). However, this is not thoroughly tested. +.SH "REPORTING BUGS" +.IX Header "REPORTING BUGS" +Report bugs to <t.h.amundsen@usit.uio.no> +.SH "LICENSE AND COPYRIGHT" +.IX Header "LICENSE AND COPYRIGHT" +This program is free software: you can redistribute it and/or modify +it under the terms of the \s-1GNU\s0 General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. +.PP +This program is distributed in the hope that it will be useful, but +\&\s-1WITHOUT\s0 \s-1ANY\s0 \s-1WARRANTY\s0; without even the implied warranty of +\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS\s0 \s-1FOR\s0 A \s-1PARTICULAR\s0 \s-1PURPOSE\s0. See the \s-1GNU\s0 +General Public License for more details. +.PP +You should have received a copy of the \s-1GNU\s0 General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +.SH "SEE ALSO" +.IX Header "SEE ALSO" +<http://folk.uio.no/trondham/software/check_openmanage.html> | ||
Added | check_openmanage-3.5.5.tar.gz/check_openmanage.exe ^ | |
[+] | Added | check_openmanage-3.5.5.tar.gz/check_openmanage.php ^ |
@@ -0,0 +1,187 @@ +<?php +# +# PNP4Nagios template for check_openmanage +# Author: Trond Hasle Amundsen +# Contact: t.h.amundsen@usit.uio.no +# Website: http://folk.uio.no/trondham/software/check_openmanage.html +# Date: 2009-08-25 + +# Array with different colors +$colors = array("0022ff", "22ff22", "ff0000", "00aaaa", "ff00ff", + "ffa500", "cc0000", "0000cc", "0080C0", "8080C0", + "FF0080", "800080", "688e23", "408080", "808000", + "000000", "00FF00", "0080FF", "FF8000", "800000", + "FB31FB"); + +# Color for power usage in watts +$PWRcolor = "66FF00"; + +# Counters +$count = 0; # general counter +$f = 0; # fan probe counter +$t = 0; # temp probe counter +$a = 0; # amp probe counter +$e = 0; # enclosure counter + +# Flags +$visited_fan = 0; +$visited_temp = 0; +$visited_pwr = 0; + +# Enclosure id +$enclosure_id = ''; + +# Default title +$def_title = 'Dell OpenManage'; + +# Loop through the performance data +foreach ($DS as $i) { + + # AMPERAGE PROBE (Watts) + if(preg_match('/^pwr_mon_/',$NAME[$i]) && $UNIT[$i] == 'W') { + $NAME[$i] = preg_replace('/^pwr_mon_\d+_/', '', $NAME[$i]); + $NAME[$i] = preg_replace('/_/', ' ', $NAME[$i]); + + ++$count; + $ds_name[$count] = "Power Consumption"; + $vlabel = "Watt"; + + $title = $ds_name[$count]; + + $opt[$count] = "--slope-mode --vertical-label \"$vlabel\" --title \"$def_title: $title\" "; + + $def[$count] = "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + $def[$count] .= "AREA:var$i#$PWRcolor:\"$NAME[$i]\" " ; + $def[$count] .= "LINE:var$i#000000: " ; + $def[$count] .= "GPRINT:var$i:LAST:\"%6.0lf $UNIT[$i] last \" "; + $def[$count] .= "GPRINT:var$i:MAX:\"%6.0lf $UNIT[$i] max \" "; + $def[$count] .= "GPRINT:var$i:AVERAGE:\"%6.2lf $UNIT[$i] avg \\n\" "; + } + + # AMPERAGE PROBES (Ampere) + if(preg_match('/^pwr_mon_/',$NAME[$i]) && $UNIT[$i] == 'A') { + $NAME[$i] = preg_replace('/^pwr_mon_\d+_/', '', $NAME[$i]); + $NAME[$i] = preg_replace('/_/', ' ', $NAME[$i]); + $NAME[$i] = preg_replace('/ current \d+$/', '', $NAME[$i]); + $NAME[$i] = preg_replace('/ps/', 'PowerSupply', $NAME[$i]); + + if ($visited_pwr == 0) { + ++$count; + $visited_pwr = 1; + } + $ds_name[$count] = "Amperage Probes"; + $vlabel = "Ampere"; + + $title = $ds_name[$count]; + + $opt[$count] = "-X0 --lower-limit 0 --slope-mode --vertical-label \"$vlabel\" --title \"$def_title: $title\" "; + if(isset($def[$count])){ + $def[$count] .= "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + }else{ + $def[$count] = "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + } + $def[$count] .= "LINE:var$i#".$colors[$a].":\"$NAME[$i]\" " ; + $def[$count] .= "AREA:var$i#".$colors[$a++]."20: " ; + $def[$count] .= "GPRINT:var$i:LAST:\"%4.2lf $UNIT[$i] last \" "; + $def[$count] .= "GPRINT:var$i:MAX:\"%4.2lf $UNIT[$i] max \" "; + $def[$count] .= "GPRINT:var$i:AVERAGE:\"%4.4lf $UNIT[$i] avg \\n\" "; + } + + # FANS (RPMs) + if(preg_match('/^fan_/',$NAME[$i])){ + if ($visited_fan == 0) { + ++$count; + $visited_fan = 1; + } + + $NAME[$i] = preg_replace('/^fan_\d+_/', '', $NAME[$i]); + $NAME[$i] = preg_replace('/_rpm$/', '', $NAME[$i]); + $NAME[$i] = preg_replace('/_/', ' ', $NAME[$i]); + + $ds_name[$count] = "Fan Speed"; + + $opt[$count] = "-X0 --slope-mode --vertical-label \"RPMs\" --title \"$def_title: Fan Speeds\" "; + if(isset($def[$count])){ + $def[$count] .= "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + }else{ + $def[$count] = "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + } + $def[$count] .= "LINE:var$i#".$colors[$f++].":\"$NAME[$i]\" " ; + $def[$count] .= "GPRINT:var$i:LAST:\"%6.0lf RPM last \" "; + $def[$count] .= "GPRINT:var$i:MAX:\"%6.0lf RPM max \" "; + $def[$count] .= "GPRINT:var$i:AVERAGE:\"%6.2lf RPM avg \\n\" "; + } + + # TEMPERATURES (Celcius) + if(preg_match('/^temp_/',$NAME[$i])){ + if ($visited_temp == 0) { + ++$count; + $visited_temp = 1; + } + $NAME[$i] = preg_replace('/^temp_\d+_/', '', $NAME[$i]); + $NAME[$i] = preg_replace('/_/', ' ', $NAME[$i]); + + $ds_name[$count] = "Chassis Temperatures"; + + $warnThresh = "INF"; + $critThresh = "INF"; + + if ($WARN[$i] != "") { + $warnThresh = $WARN[$i]; + } + if ($CRIT[$i] != "") { + $critThresh = $CRIT[$i]; + } + + $opt[$count] = "--slope-mode --vertical-label \"Celcius\" --title \"$def_title: Chassis Temperatures\" "; + if(isset($def[$count])){ + $def[$count] .= "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + }else{ + $def[$count] = "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + } + $def[$count] .= "LINE:var$i#".$colors[$t++].":\"$NAME[$i]\" " ; + + $def[$count] .= "GPRINT:var$i:LAST:\"%6.0lf $UNIT[$i] last \" "; + $def[$count] .= "GPRINT:var$i:MAX:\"%6.0lf $UNIT[$i] max \" "; + $def[$count] .= "GPRINT:var$i:AVERAGE:\"%6.2lf $UNIT[$i] avg \\n\" "; + } + + # ENCLOSURE TEMPERATURES (Celcius) + if(preg_match('/^enclosure_(?<id>.+?)_temp_\d+$/', $NAME[$i], $matches)){ + $this_id = $matches['id']; + + if ($enclosure_id != $this_id) { + $e = 0; + ++$count; + $enclosure_id = $this_id; + } + $NAME[$i] = preg_replace('/^enclosure_.+?_temp_(\d+)$/', 'Probe $1', $NAME[$i]); + + $ds_name[$count] = "Enclosure $enclosure_id Temperatures"; + + $warnThresh = "INF"; + $critThresh = "INF"; + + if ($WARN[$i] != "") { + $warnThresh = $WARN[$i]; + } + if ($CRIT[$i] != "") { + $critThresh = $CRIT[$i]; + } + + $opt[$count] = "--slope-mode --vertical-label \"Celcius\" --title \"$def_title: Enclosure $enclosure_id Temperatures\" "; + + if(isset($def[$count])){ + $def[$count] .= "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + }else{ + $def[$count] = "DEF:var$i=$rrdfile:$DS[$i]:AVERAGE " ; + } + $def[$count] .= "LINE:var$i#".$colors[$e++].":\"$NAME[$i]\" " ; + + $def[$count] .= "GPRINT:var$i:LAST:\"%6.0lf $UNIT[$i] last \" "; + $def[$count] .= "GPRINT:var$i:MAX:\"%6.0lf $UNIT[$i] max \" "; + $def[$count] .= "GPRINT:var$i:AVERAGE:\"%6.2lf $UNIT[$i] avg \\n\" "; + } + +} +?> | ||
[+] | Added | check_openmanage-3.5.5.tar.gz/check_openmanage.pod ^ |
@@ -0,0 +1,685 @@ +# Man page created with: +# +# pod2man -s 8 -r "`./check_openmanage -V | head -n 1`" -c 'Nagios plugin' check_openmanage.pod check_openmanage.8 +# +# $Id: check_openmanage.pod 16225 2010-01-13 15:15:08Z trondham $ + +=head1 NAME + +check_openmanage - Nagios plugin for checking the hardware status on + Dell servers running OpenManage + +=head1 SYNOPSIS + +check_openmanage [I<OPTION>]... + +=head1 DESCRIPTION + +check_openmanage is a plugin for Nagios which checks the hardware +health of Dell servers running OpenManage Server Administrator +(OMSA). The plugin checks the health of the storage subsystem, power +supplies, memory modules, temperature probes etc., and gives an alert +if any of the components are faulty or operate outside normal +parameters. + +check_openmanage is designed to be used by either locally (using NRPE +or similar) or remotely (using SNMP). In either mode, the output is +(nearly) the same. Note that checking the alert log is not supported +in SNMP mode. + +=head1 GENERAL OPTIONS + +=over 4 + +=item -t, --timeout I<SECONDS> + +The number of seconds after which the plugin will abort. Default +timeout is 30 seconds if the option is not present. + +=item -p, --perfdata [I<multline>] + +Collect performance data. Performance data collected include +temperatures (in Celcius) and fan speeds (in rpm). On systems that +support it, power consumption is also collected (in Watts). + +If given the argument C<multiline>, the plugin will output the +performance data on multiple lines, for Nagios 3.x and above. + +=item -w, --warning I<STRING> or I<FILE> + +Override the machine-default temperature warning thresholds. Syntax is +C<id1=max[/min],id2=max[/min],...>. The following example sets warning +limits to max 50C for probe 0, and max 45C and min 10C for probe 1: + +check_openmanage -w 0=50,1=45/10 + +The minimum limit can be omitted, if desired. Most often, you are only +interested in setting the maximum thresholds. + +This parameter can be either a string with the limits, or a file +containing the limits string. The option can be specified multiple +times. + +=item -c, --critical I<STRING> or I<FILE> + +Override the machine-default temperature critical thresholds. Syntax +and behaviour is the same as for warning thresholds described above. + +=item -o, --ok-info I<NUMBER> + +This option lets you define how much output you want the plugin to +give when everything is OK, i.e. the verbosity level. The default +value is 0 (one line of output). The output levels are cumulative. + +=over 4 + +=item B<0> + +- Only one line (default) + +=item B<1> + +- BIOS and firmware info on a separate line + +=item B<2> + +- Storage controller and enclosure info on separate lines + +=item B<3> + +- OMSA version on separate line + +=back + +The reason that OMSA version is separated from the rest is that +finding it requires running a really slow omreport command, when the +plugin is run locally via NRPE. + +=item --omreport I<OMREPORT PATH> + +Specify full path to omreport, if it is not installed in any of the +regular places. Usually this option is only needed on Windows, if +omreport is not installed on the C: drive. + +=item -i, --info + +Prefix any alerts with the service tag. + +=item -e, --extinfo + +Display a short summary of system information (model and service tag) +in case of an alert. + +=item --htmlinfo [I<CODE>] + +Using this option will make the servicetag and model name into +clickable HTML links in the output. The model name link will point to +the official Dell documentation for that model, while the servicetag +link will point to a website containing support info for that +particular server. + +This option takes an optional argument, which should be your country +code or C<me> for the middle east. If the country code is omitted the +servicetag link will still work, but it will not be speficic for your +country or area. Example for Germany: + + check_openmanage --htmlinfo de + +If this option is used together with either the I<--extinfo> or +I<--info> options, it is particularly useful. Only the most common +country codes is supported at this time. + +=item --postmsg I<STRING> or I<FILE> + +User specified post message. Useful for displaying arbitrary or +various system information at the end of alerts. The argument is +either a string with the message, or a file containing that +string. You can control the format with the following interpreted +sequences: + +=over 4 + +=item B<%m> + +System model + +=item B<%s> + +Service tag + +=item B<%b> + +BIOS version + +=item B<%d> + +BIOS release date + +=item B<%o> + +Operating system name + +=item B<%r> + +Operating system release + +=item B<%p> + +Number of physical drives + +=item B<%l> + +Number of logical drives + +=item B<%n> + +Line break. Will be a regular line break if run from a TTY, else an +HTML line break. + +=item B<%%> + +A literal C<%> + +=back + +=item -s, --state + +Prefix each alert with its corresponding service state (i.e. warning, +critical etc.). This is useful in case of several alerts from the same +monitored system. + +=item --short-state + +Same as the B<--state> option above, except that the state is +abbreviated to a single letter (W=warning, C=critical etc.). + +=item --linebreak I<STRING> + +check_openmanage will sometimes report more than one line, e.g. if +there are several alerts. If the script has a TTY, it will use regular +linebreaks. If not (which is the case with NRPE) it will use HTML +linebreaks. Sometimes it can be useful to control what the plugin uses +as a line separator, and this option provides that control. + +The argument is the exact string to be used as the line +separator. There are two exceptions, i.e. two keywords that translates +to the following: + +=over 4 + +=item B<REG> + +Regular linebreaks, i.e. "\n". + +=item B<HTML> + +HTML linebreaks, i.e. "<br/>". + +=back + +This is a rather special option that is normally not needed. The +default behaviour should be sufficient for most users. + +=item -d, --debug + +Debug output. Will report status on everything, even if status is +ok. Blacklisted or unchecked components are ignored (i.e. no output). + +NOTE: This option is intended for diagnostics and debugging purposes +only. Do not use this option from within Nagios, i.e. in the Nagios +config. + +=item -h, --help + +Display help text. + +=item -V, --version + +Display version info. + +=back + +=head1 SNMP OPTIONS + +=over 4 + +=item -H, --hostname I<HOSTNAME> + +The transport address of the destination SNMP device. Using this +option triggers SNMP mode. + +=item -P, --protocol I<PROTOCOL> + +SNMP protocol version. This option is optional and expects a digit +(i.e. C<1>, C<2> or C<3>) to define the SNMP version. The default is +C<2>, i.e. SNMP version 2c. + +=item -C, --community I<COMMUNITY> + +This option expects a string that is to be used as the SNMP community +name when using SNMP version 1 or 2c. By default the community name +is set to C<public> if the option is not present. + +=item --port I<PORT> + +SNMP port of the remote (monitored) system. Defaults to the well-known +SNMP port 161. + +=item -U, --username I<SECURITYNAME> + +[SNMPv3] The User-based Security Model (USM) used by SNMPv3 requires +that a securityName be specified. This option is required when using +SNMP version 3, and expects a string 1 to 32 octets in lenght. + +=item --authpassword I<PASSWORD>, --authkey I<KEY> + +[SNMPv3] By default a securityLevel of C<noAuthNoPriv> is assumed. If +the --authpassword option is specified, the securityLevel becomes +C<authNoPriv>. The --authpassword option expects a string which is at +least 1 octet in length as argument. + +Optionally, instead of the --authpassword option, the --authkey option +can be used so that a plain text password does not have to be +specified in a script. The --authkey option expects a hexadecimal +string produced by localizing the password with the +authoritativeEngineID for the specific destination device. The +C<snmpkey> utility included with the Net::SNMP distribution can be +used to create the hexadecimal string (see L<snmpkey>). + +=item --authprotocol I<ALGORITHM> + +[SNMPv3] Two different hash algorithms are defined by SNMPv3 which can +be used by the Security Model for authentication. These algorithms are +HMAC-MD5-96 C<MD5> (RFC 1321) and HMAC-SHA-96 C<SHA-1> (NIST FIPS PUB +180-1). The default algorithm used by the plugin is HMAC-MD5-96. This +behavior can be changed by using this option. The option expects +either the string C<md5> or C<sha> to be passed as argument to modify +the hash algorithm. + +=item --privpassword I<PASSWORD>, --privkey I<KEY> + +[SNMPv3] By specifying the options --privkey or --privpassword, the +securityLevel associated with the object becomes +C<authPriv>. According to SNMPv3, privacy requires the use of +authentication. Therefore, if either of these two options are present +and the --authkey or --authpassword arguments are missing, the +creation of the object fails. The --privkey and --privpassword +options expect the same input as the --authkey and --authpassword +options respectively. + +=item --privprotocol I<ALGORITHM> + +[SNMPv3] The User-based Security Model described in RFC 3414 defines a +single encryption protocol to be used for privacy. This protocol, +CBC-DES C<DES> (NIST FIPS PUB 46-1), is used by default or if the +string C<des> is passed to the --privprotocol option. The Net::SNMP +module also supports RFC 3826 which describes the use of +CFB128-AES-128 C<AES> (NIST FIPS PUB 197) in the USM. The AES +encryption protocol can be selected by passing C<aes> or C<aes128> to +the --privprotocol option. + +One of the following arguments are required: des, aes, aes128, 3des, +3desde + +=back + +=head1 BLACKLISTING + +=over 4 + +=item -b, --blacklist I<STRING> or I<FILE> + +Blacklist missing and/or failed components, if you do not plan to fix +them. The parameter is either the blacklist string, or a file (that +may or may not exist) containing the string. The blacklist string +contains component names with component IDs separated by slash +(/). Blacklisted components are left unchecked. + +TIP: Use the option C<-d> (or C<--debug>) to get the blacklist ID for +devices. The ID is listed in a separate column in the debug output. + +NOTE: If blacklisting is in effect, the global health of the system is +not checked. + +=over 9 + +=item B<Syntax:> + +component1=id1[,id2,...]/component2=id1[,id2,...]/... + +The ID part can also be C<all>, in which all components of that type +is blacklisted. + +=item B<Example:> + +check_openmanage -b ps=0/fan=3,5/pdisk=1:0:0:1/ctrl_driver=all + +=back + +In the example we blacklist powersupply 0, fans 3 and 5, physical disk +1:0:0:1, and warnings about out-of-date drivers for all +controllers. Legal component names include: + +=over 8 + +=item B<ctrl> + +Storage controller. Note that if a controller is blacklisted, all +components on that controller (such as physical and logical drives) +are blacklisted as well. + +=item B<ctrl_fw> + +Suppress the special warning message about old controller +firmware. Use this if you can not or will not upgrade the firmware. + +=item B<ctrl_driver> + +Suppress the special warning message about old controller driver. +Particularly useful on systems where you can not upgrade the driver. + +=item B<ctrl_stdr> + +Suppress the special warning message about old Storport driver on +Windows. + +=item B<pdisk> + +Physical disk. + +=item B<vdisk> + +Logical drive (virtual disk) + +=item B<bat> + +Controller cache battery + +=item B<bat_charge> + +Ignore warnings related to the controller cache battery charging +cycle, which happens approximately every 40 days on Dell servers. Note +that using this blacklist keyword makes check_openmanage ignore +non-critical cache battery errors. + +=item B<conn> + +Connector (channel) + +=item B<encl> + +Enclosure + +=item B<encl_fan> + +Enclosure fan + +=item B<encl_ps> + +Enclosure power supply + +=item B<encl_temp> + +Enclosure temperature probe + +=item B<encl_emm> + +Enclosure management module (EMM) + +=item B<dimm> + +Memory module + +=item B<fan> + +Fan + +=item B<ps> + +Powersupply + +=item B<temp> + +Temperature sensor + +=item B<cpu> + +Processor (CPU) + +=item B<volt> + +Voltage probe + +=item B<bp> + +System battery + +=item B<amp> + +Amperage probe (power consumption monitoring) + +=item B<intr> + +Intrusion sensor + +=back + +=back + +=head1 CHECK CONTROL + +=over 4 + +=item --only I<KEYWORD> + +This option can be specifed once and expects a keyword. The different +keywords and the behaviour of check_openmanage is described below. + +=over 4 + +=item B<critical> + +Print only critical alerts. With this option any warning alerts are +suppressed. + +=item B<warning> + +Print only warning alerts. With this option any critical alerts are +suppressed. + +=item B<chassis> + +Check all chassis components and nothing else. + +=item B<storage> + +Only check storage + +=item B<memory> + +Only check memory modules + +=item B<fans> + +Only check fans + +=item B<power> + +Only check power supplies + +=item B<temp> + +Only check temperatures + +=item B<cpu> + +Only check processors + +=item B<voltage> + +Only check voltage probes + +=item B<batteries> + +Only check batteries + +=item B<amperage> + +Only check power usage + +=item B<intrusion> + +Only check chassis intrusion + +=item B<esmhealth> + +Only check ESM log overall health, i.e. fill grade + +=item B<esmlog> + +Only check the event log (ESM) content + +=item B<alertlog> + +Only check the alert log content + +=back + +=item --check I<STRING> or I<FILE> + +This parameter allows you to adjust which components that should be +checked at all. This is a rougher approach than blacklisting, which +require that you specify component id or index. The parameter should +be either a string containing the adjustments, or a file containing +the string. No errors are raised if the file does not exist. + +Note: This option is ignored with alternate basenames. + +=over 9 + +=item B<Example:> + +check_openmanage --check storage=0,intrusion=1 + +=back + +Legal values are described below, along with the default value. + +=over 4 + +=item B<storage> + +Check storage subsystem (controllers, disks etc.). Default: ON + +=item B<memory> + +Check memory (dimms). Default: ON + +=item B<fans> + +Check chassis fans. Default: ON + +=item B<power> + +Check power supplies. Default: ON + +=item B<temp> + +Check temperature sensors. Default: ON + +=item B<cpu> + +Check CPUs. Default: ON + +=item B<voltage> + +Check voltage sensors. Default: ON + +=item B<batteries> + +Check system batteries. Default: ON + +=item B<amperage> + +Check amperage probes. Default: ON + +=item B<intrusion> + +Check chassis intrusion. Default: ON + +=item B<esmhealth> + +Check the ESM log health, i.e. fill grade. Default: ON + +=item B<esmlog> + +Check the ESM log content. Default: OFF + +=item B<alertlog> + +Check the alert log content. Default: OFF + +=back + +=back + +=head1 DIAGNOSTICS + +The option C<--debug> (or C<-d>) can be specified to display all +monitored components. + +=head1 DEPENDENCIES + +If SNMP is requested, the perl module Net::SNMP is +required. Otherwise, only a regular perl distribution is required to +run the script. On the target (monitored) system, Dell Openmanage +Server Administrator (OMSA) must be installed and running. + +=head1 EXIT STATUS + +If no errors are discovered, a value of 0 (OK) is returned. An exit +value of 1 (WARNING) signifies one or more non-critical errors, while +2 (CRITICAL) signifies one or more critical errors. + +The exit value 3 (UNKNOWN) is reserved for errors within the script, +or errors getting values from Dell OMSA. + +=head1 AUTHOR + +Written by Trond H. Amundsen <t.h.amundsen@usit.uio.no> + +=head1 BUGS AND LIMITATIONS + +Storage info is not collected or checked on very old PowerEdge models +and/or old OMSA versions, due to limitations in OMSA. The overall +support on those models/versions by this plugin is not well tested. + +=head1 INCOMPATIBILITIES + +The plugin should work with the Nagios embedded perl interpreter +(ePN). However, this is not thoroughly tested. + +=head1 REPORTING BUGS + +Report bugs to <t.h.amundsen@usit.uio.no> + +=head1 LICENSE AND COPYRIGHT + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see L<http://www.gnu.org/licenses/>. + +=head1 SEE ALSO + +L<http://folk.uio.no/trondham/software/check_openmanage.html> + +=cut | ||
[+] | Added | check_openmanage-3.5.5.tar.gz/check_openmanage.spec ^ |
@@ -0,0 +1,141 @@ +Summary: Nagios plugin to monitor hardware health on Dell servers +Name: check_openmanage +Version: 3.5.5 +Release: 1%{?dist} +License: GPL +Packager: Trond Hasle Amundsen <t.h.amundsen@usit.uio.no> +Group: Applications/System +BuildRoot: %{_tmppath}/%{name}-%{version}-root +URL: http://folk.uio.no/trondham/software/%{name}.html +Source0: http://folk.uio.no/trondham/software/files/%{name}-%{version}.tar.gz +BuildRequires: perl + +Requires: perl >= 5.6.0 +Requires: perl(POSIX) +Requires: perl(Getopt::Long) + +%description +check_openmanage is a plugin for Nagios which checks the hardware +health of Dell servers running OpenManage Server Administrator +(OMSA). The plugin can be used remotely with SNMP or locally with +NRPE, check_by_ssh or similar, whichever suits your needs and +particular taste. The plugin checks the health of the storage +subsystem, power supplies, memory modules, temperature probes etc., +and gives an alert if any of the components are faulty or operate +outside normal parameters. + +%prep +%setup -q + +%build +pod2man -s 8 -r "%{name} %{version}" -c "Nagios plugin" %{name}.pod %{name}.8 +gzip %{name}.8 + +%install +mkdir -p %{buildroot}/%{_libdir}/nagios/plugins +mkdir -p %{buildroot}/%{_mandir}/man8 +install -p -m 0755 %{name} %{buildroot}/%{_libdir}/nagios/plugins +install -m 0644 %{name}.8.gz %{buildroot}/%{_mandir}/man8 + +%clean +rm -rf %{buildroot} + +%files +%defattr(-, root, root, -) +%doc README COPYING CHANGES +%{_libdir}/nagios/plugins/%{name} +%attr(0755, root, root) %{_mandir}/man8/%{name}.8.gz + + +%changelog +* Fri Jan 22 2010 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.5.5-1 +- Version 3.5.5 + +* Wed Jan 13 2010 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.5.4-1 +- Version 3.5.4 + +* Thu Dec 17 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.5.3-1 +- Version 3.5.3 + +* Tue Nov 17 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.5.2-1 +- Version 3.5.2 + +* Thu Oct 22 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.5.1-1 +- Version 3.5.1 + +* Tue Oct 13 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.5.0-1 +- Version 3.5.0 +- New location for the manual page (section 3 -> 8) + +* Fri Aug 7 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.9-1 +- Version 3.4.9 + +* Fri Jul 31 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.8-1 +- Version 3.4.8 + +* Fri Jul 24 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.7-1 +- Version 3.4.7 + +* Tue Jul 7 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.6-1 +- Version 3.4.6 + +* Mon Jun 22 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.5-1 +- Version 3.4.5 + +* Mon Jun 22 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.4-1 +- Version 3.4.4 + +* Thu Jun 11 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.3-1 +- Version 3.4.3 + +* Wed Jun 3 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.2-1 +- Version 3.4.2 + +* Mon May 27 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.1-1 +- Version 3.4.1 + +* Mon May 25 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.4.0-1 +- Version 3.4.0 + +* Tue May 5 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.3.2-1 +- Version 3.3.2 + +* Tue Apr 28 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.3.1-1 +- Version 3.3.1 + +* Tue Apr 7 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.3.0-1 +- Version 3.3.0 + +* Sun Mar 29 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.7-1 +- Version 3.2.7 + +* Thu Mar 5 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.6-1 +- Version 3.2.6 + +* Tue Feb 24 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.5-1 +- Version 3.2.5 +- take 64bit (other libdir) into consideration + +* Tue Feb 17 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.4-1 +- Version 3.2.4 + +* Mon Feb 9 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.3-1 +- Version 3.2.3 + +* Tue Feb 3 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.2-1 +- Version 3.2.2 + +* Tue Feb 3 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.1-1 +- Version 3.2.1 + +* Tue Jan 27 2009 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.2.0-1 +- Version 3.2.0 + +* Sat Dec 20 2008 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.0.2-1 +- Version 3.0.2 + +* Thu Dec 4 2008 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 3.0.0-1 +- Version 3.0.0 + +* Wed Nov 19 2008 Trond H. Amundsen <t.h.amundsen@usit.uio.no> - 2.1.0-0 +- first RPM release | ||
[+] | Changed | check_openmanage-3.5.5.tar.gz/install.bat ^ |
@@ -16,16 +16,4 @@ echo %dir% :COPY -copy check_openmanage %dir% -copy check_openmanage %dir%\check_openmanage_alertlog -copy check_openmanage %dir%\check_openmanage_batteries -copy check_openmanage %dir%\check_openmanage_cpu -copy check_openmanage %dir%\check_openmanage_esmlog -copy check_openmanage %dir%\check_openmanage_esmhealth -copy check_openmanage %dir%\check_openmanage_fans -copy check_openmanage %dir%\check_openmanage_intrusion -copy check_openmanage %dir%\check_openmanage_memory -copy check_openmanage %dir%\check_openmanage_power -copy check_openmanage %dir%\check_openmanage_pwrmonitor -copy check_openmanage %dir%\check_openmanage_storage -copy check_openmanage %dir%\check_openmanage_temperature +copy check_openmanage.exe %dir% | ||
[+] | Changed | check_openmanage-3.5.5.tar.gz/install.sh ^ |
@@ -1,20 +1,25 @@ #!/bin/sh +# Find correct lib dir if [ "`uname -m`" = "x86_64" ]; then - def_install_dir=/usr/lib64/nagios/plugins/contrib + libdir=/usr/lib64 else - def_install_dir=/usr/lib/nagios/plugins/contrib + libdir=/usr/lib fi -def_mandir=/usr/share/man/man3 +# Default install locations +def_plugindir=${libdir}/nagios/plugins +def_mandir=/usr/share/man/man8 + +# Find install locations if [ "$1" = "-q" ]; then - install_dir=$def_install_dir + plugindir=$def_plugindir mandir=$def_mandir else - echo -n "Plugin dir [$def_install_dir]: " - read install_dir - if [ "$install_dir" = "" ]; then - install_dir=$def_install_dir + echo -n "Plugin dir [$def_plugindir]: " + read plugindir + if [ "$plugindir" = "" ]; then + plugindir=$def_plugindir fi echo -n "Man page dir [$def_mandir]: " read mandir @@ -23,14 +28,16 @@ fi fi -if [ -d $install_dir ]; then +# Error if plugin dir doesn't exist +if [ -d $plugindir ]; then : else - echo "ERROR: Plugin directory $install_dir doesn't exist," + echo "ERROR: Plugin directory $plugindir doesn't exist," echo "ERROR: or is not a directory" exit 1 fi +# Error if man dir doesn't exist if [ -d $mandir ]; then : else @@ -39,25 +46,9 @@ exit 1 fi -# The script and symlinks -cp check_openmanage $install_dir -cd $install_dir -ln -sf check_openmanage check_openmanage_alertlog -ln -sf check_openmanage check_openmanage_batteries -ln -sf check_openmanage check_openmanage_cpu -ln -sf check_openmanage check_openmanage_esmlog -ln -sf check_openmanage check_openmanage_esmhealth -ln -sf check_openmanage check_openmanage_fans -ln -sf check_openmanage check_openmanage_intrusion -ln -sf check_openmanage check_openmanage_memory -ln -sf check_openmanage check_openmanage_power -ln -sf check_openmanage check_openmanage_pwrmonitor -ln -sf check_openmanage check_openmanage_storage -ln -sf check_openmanage check_openmanage_temperature - -# The man page -cd - >/dev/null -cp check_openmanage.3pm $mandir +# Install +install -p -m 0755 check_openmanage $plugindir +install -m 0644 check_openmanage.8 $mandir # Done echo "done." |