File check_lsi_raid of Package nagios-plugins-lsi (Revision 2)
Currently displaying revision 2, show latest
x
1
#!/usr/bin/perl -w
2
# =============================================================================
3
# check_lsi_raid: Nagios/Icinga plugin to check LSI Raid Controller status
4
# -----------------------------------------------------------------------------
5
# Created as part of a semester project at the University of Applied Sciences
6
# Hagenberg (http://www.fh-ooe.at/en/hagenberg-campus/)
7
#
8
# Copyright (c) 2013-2016:
9
# Georg Schoenberger (gschoenberger@thomas-krenn.com)
10
# Grubhofer Martin (s1110239013@students.fh-hagenberg.at)
11
# Scheipner Alexander (s1110239032@students.fh-hagenberg.at)
12
# Werner Sebastian (s1110239038@students.fh-hagenberg.at)
13
# Jonas Meurer (jmeurer@inet.de)
14
#
15
# This program is free software; you can redistribute it and/or modify it under
16
# the terms of the GNU General Public License as published by the Free Software
17
# Foundation; either version 3 of the License, or (at your option) any later
18
# version.
19
#
20
# This program is distributed in the hope that it will be useful, but WITHOUT
21
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
22
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
23
# details.
24
#
25
# You should have received a copy of the GNU General Public License along with
26
# this program; if not, see <http://www.gnu.org/licenses/>.
27
# ==============================================================================
28
use strict;
29
use warnings;
30
use Getopt::Long qw(:config no_ignore_case);
31
use File::Which;
32
33
our $VERBOSITY = 0;
34
our $VERSION = "2.5";
35
our $NAME = "check_lsi_raid: Nagios/Icinga plugin to check LSI Raid Controller status";
36
our $C_TEMP_WARNING = 85;
37
our $C_TEMP_CRITICAL = 95;
38
our $C_MEM_CORRECTABLE_WARNING = 0;
39
our $C_MEM_CORRECTABLE_CRITICAL = 0;
40
our $PD_TEMP_WARNING = 40;
41
our $PD_TEMP_CRITICAL = 45;
42
our $BBU_TEMP_WARNING = 50;
43
our $BBU_TEMP_CRITICAL = 60;
44
our $CV_TEMP_WARNING = 70;
45
our $CV_TEMP_CRITICAL = 85;
46
our ($IGNERR_M, $IGNERR_O, $IGNERR_P, $IGNERR_S, $IGNERR_B) = (0, 0, 0, 0, 0);
47
our $NOENCLOSURES = 0;
48
our $NOWRITEBACKOK = 0;
49
our $CONTROLLER = 0;
50
51
use constant {
52
STATE_OK => 0,
53
STATE_WARNING => 1,
54
STATE_CRITICAL => 2,
55
STATE_UNKNOWN => 3,
56
};
57
58
# Header maps to parse logical and physical devices
59
our $LDMAP;
60
our @map_a = ('DG/VD','TYPE','State','Access','Consist','Cache','sCC','Size');
61
our @map_cc_a = ('DG/VD','TYPE','State','Access','Consist','Cache','Cac','sCC','Size');
62
our @pdmap_a = ('EID:Slt','DID','State','DG','Size','Intf','Med','SED','PI','SeSz','Model','Sp');
63
64
# Print command line usage to stdout.
65
sub displayUsage {
66
print "Usage: \n";
67
print " [ -h | --help ]
68
Display this help page\n";
69
print " [ -v | -vv | -vvv | --verbose ]
70
Sets the verbosity level.
71
No -v is the normal single line output for Nagios/Icinga, -v is a
72
more detailed version but still usable in Nagios. -vv is a
73
multiline output for debugging configuration errors or more
74
detailed information. -vvv is for plugin problem diagnosis.
75
For further information please visit:
76
http://nagiosplug.sourceforge.net/developer-guidelines.html#AEN39\n";
77
print " [ -V --version ]
78
Displays the plugin and, if available, the version of StorCLI.\n";
79
print " [ -C <num> | --controller <num> ]
80
Specifies a controller number, defaults to 0.\n";
81
print " [ -EID <ids> | --enclosure <ids> ]
82
Specifies one or more enclosure numbers, per default all enclosures. Takes either
83
an integer as additional argument or a commaseperated list,
84
e.g. '0,1,2'. With --noenclosures enclosures can be disabled.\n";
85
print " [ -LD <ids> | --logicaldevice <ids>]
86
Specifies one or more logical devices, defaults to all. Takes either an
87
integer as additional argument or a comma seperated list e.g. '0,1,2'.\n";
88
print " [ -PD <ids> | --physicaldevice <ids> ]
89
Specifies one or more physical devices, defaults to all. Takes either an
90
integer as additional argument or a comma seperated list e.g. '0,1,2'.\n";
91
print " [ -Tw <temp> | --temperature-warn <temp> ]
92
Specifies the RAID controller temperature warning threshold, the default
93
threshold is ${C_TEMP_WARNING}C.\n";
94
print " [ -Tc <temp> | --temperature-critical <temp> ]
95
Specifies the RAID controller temperature critical threshold, the default
96
threshold is ${C_TEMP_CRITICAL}C.\n";
97
print " [ -PDTw <temp> | --physicaldevicetemperature-warn <temp> ]
98
Specifies the disk temperature warning threshold, the default threshold
99
is ${PD_TEMP_WARNING}C.\n";
100
print " [ -PDTc <temp> | --physicaldevicetemperature-critical <temp> ]
101
Specifies the disk temperature critical threshold, the default threshold
102
is ${PD_TEMP_CRITICAL}C.\n";
103
print " [ -BBUTw <temp> | --bbutemperature-warning <temp> ]
104
Specifies the BBU temperature warning threshold, default threshold
105
is ${BBU_TEMP_WARNING}C.\n";
106
print " [ -BBUTc <temp> | --bbutemperature-critical <temp> ]
107
Specifies the BBU temperature critical threshold, default threshold
108
is ${BBU_TEMP_CRITICAL}C.\n";
109
print " [ -CVTw <temp> | --cvtemperature-warning <temp> ]
110
Specifies the CV temperature warning threshold, default threshold
111
is ${CV_TEMP_WARNING}C.\n";
112
print " [ -CVTc <temp> | --cvtemperature-critical <temp> ]
113
Specifies the CV temperature critical threshold, default threshold
114
is ${CV_TEMP_CRITICAL}C.\n";
115
print " [ -Im <count> | --ignore-media-errors <count> ]
116
Specifies the warning threshold for media errors per disk, the default
117
threshold is $IGNERR_M.\n";
118
print " [ -Io <count> | --ignore-other-errors <count> ]
119
Specifies the warning threshold for other errors per disk, the default
120
threshold is $IGNERR_O.\n";
121
print " [ -Ip <count> | --ignore-predictive-fail-count <count> ]
122
Specifies the warning threshold for predictive failure analysis errors per disk, the default
123
threshold is $IGNERR_P.\n";
124
print " [ -Is <count> | --ignore-shield-counter <count> ]
125
Specifies the warning threshold for shield counter per disk, the default
126
threshold is $IGNERR_S.\n";
127
print " [ -Ib <count> | --ignore-bbm-counter <count> ]
128
Specifies the warning threshold for bbm errors per disk, the default
129
threshold is $IGNERR_B.\n";
130
print " [ -p <path> | --path <path>]
131
Specifies the path to StorCLI, per default uses the tool 'which' to get
132
the StorCLI path and also checks for binaries in /opt/MegaRAID/storcli.\n";
133
print " [ -b <0/1/2> | --BBU <0/1/2> ]
134
Check if a BBU or a CacheVault module is present. One must be present unless
135
'-b 0' is defined. This ensures that for a given controller a BBU/CV must be
136
present per default. '-b 2' checks if one is present, but does not exit
137
CRITICAL when there is no BBU/CV.\n";
138
print " [ --noenclosures <0/1> ]
139
Specifies if enclosures are present or not. 0 means enclosures are
140
present (default), 1 states no enclosures are used (no 'eall' in
141
storcli commands).\n";
142
print " [ --nowritebackok <0/1> ]
143
Specifies if a WriteThrough Cache configuration is ok or not. 0 means WriteThrough is
144
considered Critical (default), 1 states WriteThrough is ok.\n";
145
print " [ --nosudo ]
146
Turn off using sudo.\n";
147
print " [ --nocleanlogs ]
148
Do not clean storcli logs after running storcli commands.\n";
149
}
150
151
# Displays a short Help text for the user
152
sub displayHelp {
153
print $NAME."\n";
154
print "Pulgin version: " . $VERSION ."\n";
155
print "Copyright (C) 2013-2015 Thomas-Krenn.AG\n";
156
print "Current updates available at
157
https://github.com/thomas-krenn/check_lsi_raid.git\n";
158
print "This Nagios/Icinga Plugin checks LSI RAID controllers for controller,
159
physical device, logical device, BBU and CV warnings and errors.\n";
160
print "In order for this plugin to work properly you need to add the nagios
161
user to your sudoers file (or create a new one in /etc/sudoers.d/).\n";
162
displayUsage();
163
print "Further information about this plugin can be found at:
164
http://www.thomas-krenn.com/de/wiki/LSI_RAID_Monitoring_Plugin and
165
http://www.thomas-krenn.com/de/wiki/LSI_RAID_Monitoring_Plugin
166
Please send an email to the tk-monitoring plugin-user mailing list:
167
tk-monitoring-plugins-user\@lists.thomas-krenn.com
168
if you have questions regarding use of this software, to submit patches, or
169
suggest improvements.
170
Example usage:
171
* If StorCli can be found with 'which' or in /opt/MegaRAID/storcli
172
* check_lsi_raid
173
* check_lsi_raid -p /opt/MegaRAID/storcli/storcli64
174
* check_lsi_raid -p /opt/MegaRAID/storcli/storcli64 -C 1\n";
175
exit(STATE_UNKNOWN);
176
}
177
178
# Prints the name and the version of check_lsi_raid. If storcli is available,
179
# the version of it is printed also.
180
# @param storcli The path to storcli command utility
181
sub displayVersion {
182
my $storcli = shift;
183
my $writelogs = shift;
184
if(defined($storcli)){
185
my $command = $storcli.' -v';
186
if(!$writelogs) { $command .= ' nolog'; }
187
my @storcliVersion = `$command`;
188
foreach my $line (@storcliVersion){
189
if($line =~ /^\s+StorCli.*/) {
190
$line =~ s/^\s+|\s+$//g;
191
print $line;
192
}
193
}
194
print "\n";
195
}
196
exit(STATE_OK);
197
}
198
199
# Checks if a storcli call was successfull, i.e. if the line 'Status = Sucess'
200
# is present in the command output.
201
# @param output The output of the storcli command as array
202
# @return 1 on success, 0 if not
203
sub checkCommandStatus{
204
my @output = @{(shift)};
205
foreach my $line (@output){
206
if($line =~ /^Status/){
207
if($line eq "Status = Success\n"){
208
return 1;
209
}
210
elsif (grep { /Fail(ed|ure)\s+46/i } @output){
211
# Return 46 means a drive is not attached, this is a valid failure
212
return 1;
213
}
214
else{
215
return 0;
216
}
217
}
218
}
219
}
220
221
# Shows the time the controller is using. Can be used to check if the
222
# controller number is a correct one.
223
# @param storcli The path to storcli command utility, followed by the controller
224
# number, e.g. 'storcli64 /c0'.
225
# @return 1 on success, 0 if not
226
sub getControllerTime{
227
my $storcli = shift;
228
my $writelogs = shift;
229
my $command = $storcli.' show time';
230
if(!$writelogs) { $command .= ' nolog'; }
231
my @output = `$command`;
232
return (checkCommandStatus(\@output));
233
}
234
235
# Get the status of the raid controller
236
# @param storcli The path to storcli command utility, followed by the controller
237
# number, e.g. 'storcli64 /c0'.
238
# @param logDevices If given, a list of desired logical device numbers
239
# @param commands_a An array to push the used command to
240
# @return A hash, each key a value of the raid controller info
241
sub getControllerInfo{
242
my $storcli = shift;
243
my $writelogs = shift;
244
my $commands_a = shift;
245
my $command = '';
246
247
$storcli =~ /^(.*)\/c[0-9]+/;
248
$command = $1.'/c'.$CONTROLLER.' show all';
249
if(!$writelogs) { $command .= ' nolog'; }
250
251
push @{$commands_a}, $command;
252
my @output = `$command`;
253
if($? >> 8 != 0){
254
print "Invalid StorCLI command! ($command)\n";
255
exit(STATE_UNKNOWN);
256
}
257
my %foundController_h;
258
foreach my $line(@output){
259
if($line =~ /\=/){
260
my @lineVals = split('=', $line);
261
$lineVals[0] =~ s/^\s+|\s+$//g;
262
$lineVals[1] =~ s/^\s+|\s+$//g;
263
$foundController_h{$lineVals[0]} = $lineVals[1];
264
}
265
}
266
return \%foundController_h;
267
}
268
269
# Checks the status of the raid controller
270
# @param statusLevel_a The status level array, elem 0 is the current status,
271
# elem 1 the warning sensors, elem 2 the critical sensors, elem 3 the verbose
272
# information for the sensors.
273
# @param foundController The hash of controller infos, created by getControllerInfo
274
sub getControllerStatus{
275
my @statusLevel_a = @{(shift)};
276
my %foundController = %{(shift)};
277
my $status = '';
278
foreach my $key (%foundController){
279
if($key eq 'ROC temperature(Degree Celsius)'){
280
$foundController{$key} =~ /^([0-9]+\.?[0-9]+).*$/;
281
if(defined($1)){
282
if(!(checkThreshs($1, $C_TEMP_CRITICAL))){
283
$status = 'Critical';
284
push @{$statusLevel_a[2]}, 'ROC_Temperature';
285
}
286
elsif(!(checkThreshs($1, $C_TEMP_WARNING))){
287
$status = 'Warning' unless $status eq 'Critical';
288
push @{$statusLevel_a[1]}, 'ROC_Temperature';
289
}
290
$statusLevel_a[3]->{'ROC_Temperature'} = $1;
291
}
292
}
293
elsif($key eq 'Degraded'){
294
if($foundController{$key} != 0){
295
$status = 'Warning' unless $status eq 'Critical';
296
push @{$statusLevel_a[1]}, 'CTR_Degraded_drives';
297
$statusLevel_a[3]->{'CTR_Degraded_drives'} = $foundController{$key};
298
}
299
}
300
elsif($key eq 'Offline'){
301
if($foundController{$key} != 0){
302
$status = 'Warning' unless $status eq 'Critical';
303
push @{$statusLevel_a[1]}, 'CTR_Offline_drives';
304
$statusLevel_a[3]->{'CTR_Offline_drives'} = $foundController{$key};
305
}
306
}
307
elsif($key eq 'Critical Disks'){
308
if($foundController{$key} != 0){
309
$status = 'Critical';
310
push @{$statusLevel_a[2]}, 'CTR_Critical_disks';
311
$statusLevel_a[3]->{'CTR_Critical_disks'} = $foundController{$key};
312
}
313
}
314
elsif($key eq 'Failed Disks'){
315
if($foundController{$key} != 0){
316
$status = 'Critical';
317
push @{$statusLevel_a[2]}, 'CTR_Failed_disks';
318
$statusLevel_a[3]->{'CTR_Failed_disks'} = $foundController{$key};
319
}
320
}
321
elsif($key eq 'Memory Correctable Errors'){
322
if($foundController{$key} != 0){
323
if(!(checkThreshs($foundController{$key}, $C_MEM_CORRECTABLE_CRITICAL))){
324
$status = 'Critical';
325
push @{$statusLevel_a[2]}, 'CTR_Memory_correctable_errors';
326
}
327
elsif(!(checkThreshs($foundController{$key}, $C_MEM_CORRECTABLE_WARNING))){
328
$status = 'Warning' unless $status eq 'Critical';
329
push @{$statusLevel_a[1]}, 'CTR_Memory_correctable_errors';
330
}
331
$statusLevel_a[3]->{'CTR_Memory_correctable_errors'} = $foundController{$key};
332
}
333
}
334
elsif($key eq 'Memory Uncorrectable Errors'){
335
if($foundController{$key} != 0){
336
$status = 'Critical';
337
push @{$statusLevel_a[2]}, 'CTR_Memory_Uncorrectable_errors';
338
$statusLevel_a[3]->{'CTR_Memory_Uncorrectable_errors'} = $foundController{$key};
339
}
340
}
341
}
342
if($status ne ''){
343
if($status eq 'Warning'){
344
if(${$statusLevel_a[0]} ne 'Critical'){
345
${$statusLevel_a[0]} = 'Warning';
346
}
347
}
348
else{
349
${$statusLevel_a[0]} = 'Critical';
350
}
351
$statusLevel_a[3]->{'CTR_Status'} = $status;
352
}
353
else{
354
$statusLevel_a[3]->{'CTR_Status'} = 'OK';
355
}
356
}
357
358
# Checks which logical devices are present for the given controller and parses
359
# the logical devices to a list of hashes. Each hash represents a logical device
360
# with its values from the output.
361
# @param storcli The path to storcli command utility, followed by the controller
362
# number, e.g. 'storcli64 /c0'.
363
# @param logDevices If given, a list of desired logical device numbers
364
# @param action The storcli action to check, 'all' or 'init'
365
# @param commands_a An array to push the used command to
366
# @return A list of hashes, each hash is one logical device. Check ldmap_a for valid
367
# hash keys.
368
sub getLogicalDevices{
369
my $storcli = shift;
370
my $writelogs = shift;
371
my @logDevices = @{(shift)};
372
my $action = shift;
373
my $commands_a = shift;
374
375
my $command = $storcli;
376
if(scalar(@logDevices) == 0) { $command .= "/vall"; }
377
elsif(scalar(@logDevices) == 1) { $command .= "/v$logDevices[0]"; }
378
else { $command .= "/v".join(",", @logDevices); }
379
$command .= " show $action";
380
if(!$writelogs) { $command .= ' nolog'; }
381
push @{$commands_a}, $command;
382
383
my @output = `$command`;
384
my @foundDevs;
385
if(checkCommandStatus(\@output)) {
386
if($action eq "all") {
387
my $currBlock;
388
foreach my $line(@output){
389
my @splittedLine;
390
if($line =~ /^\/(c[0-9]*\/v[0-9]*).*/){
391
$currBlock = $1;
392
next;
393
}
394
if(defined($currBlock)){
395
if($line =~ /^DG\/VD[[:blank:]]+TYPE.*/){
396
@splittedLine = split(' ', $line);
397
if(scalar(@splittedLine)== 9){
398
$LDMAP = \@map_a;
399
}
400
if(scalar(@splittedLine)== 10){
401
$LDMAP = \@map_cc_a;
402
}
403
}
404
if($line =~ /^\d+\/\d+\s+\w+\d\s+\w+.*/){
405
@splittedLine = map { s/^\s*//; s/\s*$//; $_; } split(/\s+/,$line);
406
my %lineValues_h;
407
# The current block is the c0/v0 name
408
$lineValues_h{'ld'} = $currBlock;
409
for(my $i = 0; $i < @{$LDMAP}; $i++){
410
$lineValues_h{$LDMAP->[$i]} = $splittedLine[$i];
411
}
412
push @foundDevs, \%lineValues_h;
413
}
414
}
415
}
416
}
417
elsif($action eq "init") {
418
foreach my $line(@output){
419
$line =~ s/^\s+|\s+$//g;#trim line
420
if($line =~ /^([0-9]+)\s+INIT.*$/){
421
my $vdNum = 'c'.$CONTROLLER.'/v'.$1;
422
if($line !~ /Not in progress/i){
423
my %lineValues_h;
424
my @vals = split('\s+',$line);
425
$lineValues_h{'ld'} = $vdNum;
426
$lineValues_h{'init'} = $vals[2];
427
push @foundDevs, \%lineValues_h;
428
}
429
}
430
}
431
}
432
}
433
else {
434
print "Invalid StorCLI command! ($command)\n";
435
exit(STATE_UNKNOWN);
436
}
437
return \@foundDevs;
438
}
439
440
# Checks the status of the logical devices.
441
# @param statusLevel_a The status level array, elem 0 is the current status,
442
# elem 1 the warning sensors, elem 2 the critical sensors, elem 3 the verbose
443
# information for the sensors.
444
# @param foundLDs The array of logical devices, created by getLogicalDevices
445
sub getLDStatus{
446
my @statusLevel_a = @{(shift)};
447
my @foundLDs = @{(shift)};
448
my $status = '';
449
foreach my $LD (@foundLDs){
450
if(exists($LD->{'State'})){
451
if($LD->{'State'} ne 'Optl'){
452
$status = 'Critical';
453
push @{$statusLevel_a[2]}, $LD->{'ld'}.'_State';
454
$statusLevel_a[3]->{$LD->{'ld'}.'_State'} = $LD->{'State'};
455
}
456
}
457
if(!$NOWRITEBACKOK){
458
if(exists($LD->{'Cache'})){
459
if(index($LD->{'Cache'}, 'WB') == -1){
460
$status = 'Critical';
461
push @{$statusLevel_a[2]}, $LD->{'ld'}.'_Cache';
462
$statusLevel_a[3]->{$LD->{'ld'}.'_Cache'} = $LD->{'Cache'};
463
}
464
}
465
}
466
if(exists($LD->{'Consist'})){
467
if($LD->{'Consist'} ne 'Yes' && $LD->{'TYPE'} ne 'Cac1'){
468
$status = 'Warning' unless $status eq 'Critical';
469
push @{$statusLevel_a[1]}, $LD->{'ld'}.'_Consist';
470
$statusLevel_a[3]->{$LD->{'ld'}.'_Consist'} = $LD->{'Consist'};
471
}
472
}
473
if(exists($LD->{'init'})){
474
$status = 'Warning' unless $status eq 'Critical';
475
push @{$statusLevel_a[1]}, $LD->{'ld'}.'_Init';
476
$statusLevel_a[3]->{$LD->{'ld'}.'_Init'} = $LD->{'init'};
477
}
478
}
479
if($status ne ''){
480
if($status eq 'Warning'){
481
if(${$statusLevel_a[0]} ne 'Critical'){
482
${$statusLevel_a[0]} = 'Warning';
483
}
484
}
485
else{
486
${$statusLevel_a[0]} = 'Critical';
487
}
488
$statusLevel_a[3]->{'LD_Status'} = $status;
489
}
490
else{
491
if(!exists($statusLevel_a[3]->{'LD_Status'})){
492
$statusLevel_a[3]->{'LD_Status'} = 'OK';
493
}
494
}
495
}
496
497
# Checks which physical devices are present for the given controller and parses
498
# the physical devices to a list of hashes. Each hash represents a physical device
499
# with its values from the output.
500
# @param storcli The path to storcli command utility, followed by the controller
501
# number, e.g. 'storcli64 /c0'.
502
# @param physDevices If given, a list of desired physical device numbers
503
# @param action The storcli action to check, 'all', 'initialization' or 'rebuild'
504
# @param commands_a An array to push the used command to
505
# @return A list of hashes, each hash is one physical device. Check pdmap_a for valid
506
# hash keys.
507
sub getPhysicalDevices{
508
my $storcli = shift;
509
my $writelogs = shift;
510
my @enclosures = @{(shift)};
511
my @physDevices = @{(shift)};
512
my $action = shift;
513
my $commands_a = shift;
514
515
my $command = $storcli;
516
if(!$NOENCLOSURES){
517
if(scalar(@enclosures) == 0) { $command .= "/eall"; }
518
elsif(scalar(@enclosures) == 1) { $command .= "/e$enclosures[0]"; }
519
else { $command .= "/e".join(",", @enclosures); }
520
}
521
if(scalar(@physDevices) == 0) { $command .= "/sall"; }
522
elsif(scalar(@physDevices) == 1) { $command .= "/s$physDevices[0]"; }
523
else { $command .= "/s".join(",", @physDevices); }
524
$command .= " show $action";
525
if(!$writelogs) { $command .= ' nolog'; }
526
push @{$commands_a}, $command;
527
528
my @output = `$command`;
529
my @foundDevs;
530
if(checkCommandStatus(\@output)){
531
if($action eq "all") {
532
my $currBlock;
533
my $line_ref;
534
foreach my $line(@output){
535
my @splittedLine;
536
if($line =~ /^Drive \/(c[0-9]*\/e[0-9]*\/s[0-9]*) \:$/){
537
$currBlock = $1;
538
$line_ref = {};
539
next;
540
}
541
if(defined($currBlock)){
542
# If a drive is not in a group, a - is at the DG column
543
if($line =~ /^\d+\:\d+\s+\d+\s+\w+\s+[0-9-F]+.*/){
544
@splittedLine = map { s/^\s*//; s/\s*$//; $_; } split(/\s+/,$line);
545
# The current block is the c0/e252/s0 name
546
$line_ref->{'pd'} = $currBlock;
547
my $j = 0;
548
for(my $i = 0; $i < @pdmap_a; $i++){
549
if($pdmap_a[$i] eq 'Size'){
550
my $size = $splittedLine[$j];
551
if($splittedLine[$j+1] eq 'GB' || $splittedLine[$j+1] eq 'TB'){
552
$size .= ''.$splittedLine[$j+1];
553
$j++;
554
}
555
$line_ref->{$pdmap_a[$i]} = $size;
556
$j++;
557
}
558
elsif($pdmap_a[$i] eq 'Model'){
559
my $model = $splittedLine[$j];
560
# Model should be the next last element, j starts at 0
561
if(($j+2) != scalar(@splittedLine)){
562
$model .= ' '.$splittedLine[$j+1];
563
$j++;
564
}
565
$line_ref->{$pdmap_a[$i]} = $model;
566
$j++;
567
}
568
else{
569
$line_ref->{$pdmap_a[$i]} = $splittedLine[$j];
570
$j++;
571
}
572
}
573
}
574
if($line =~ /^(Shield Counter|Media Error Count|Other Error Count|BBM Error Count|Drive Temperature|Predictive Failure Count|S\.M\.A\.R\.T alert flagged by drive)\s\=\s+(.*)$/){
575
$line_ref->{$1} = $2;
576
}
577
# If the last value is parsed, set up for the next device
578
if(exists($line_ref->{'S.M.A.R.T alert flagged by drive'})){
579
push @foundDevs, $line_ref;
580
undef $currBlock;
581
undef $line_ref;
582
}
583
}
584
}
585
}
586
elsif($action eq 'rebuild' || $action eq 'initialization') {
587
foreach my $line(@output){
588
$line =~ s/^\s+|\s+$//g;#trim line
589
if($line =~ /^\/c$CONTROLLER\/.*/){
590
if($line !~ /Not in progress/i){
591
my %lineValues_h;
592
my @vals = split('\s+',$line);
593
my $key;
594
if($action eq 'rebuild'){ $key = 'rebuild'; }
595
if($action eq 'initialization'){ $key = 'init'; }
596
$lineValues_h{'pd'} = substr($vals[0], 1);
597
$lineValues_h{$key} = $vals[1];
598
push @foundDevs, \%lineValues_h;
599
}
600
}
601
}
602
}
603
# Now we check if a drive is not attached, error code 46
604
my $failed_pattern = 'c[0-9]*\/e[0-9]*\/s[0-9]*\s+Failure\s+46';
605
if(my @match = grep { /$failed_pattern/ } @output){
606
$match[0] =~ /(c[0-9]*\/e[0-9]*\/s[0-9]*)/;
607
my $dev = {};
608
$dev->{'pd'} = $1;
609
$dev->{'Detailed Status'} = 'Failure-46';
610
push @foundDevs, $dev;
611
}
612
}
613
else {
614
if(grep { /No drive found/i } @output){
615
print "Warning (CTR Warn) [No storage attached] ($command)\n";
616
exit(STATE_WARNING);
617
}
618
print "Invalid StorCLI command! ($command)\n";
619
exit(STATE_UNKNOWN);
620
}
621
return \@foundDevs;
622
}
623
624
# Checks the status of the physical devices.
625
# @param statusLevel_a The status level array, elem 0 is the current status,
626
# elem 1 the warning sensors, elem 2 the critical sensors, elem 3 the vebose
627
# information for the sensors.
628
# @param foundPDs The array of physical devices, created by getPhysicalDevices
629
sub getPDStatus{
630
my @statusLevel_a = @{(shift)};
631
my @foundPDs = @{(shift)};
632
my $status = '';
633
foreach my $PD (@foundPDs){
634
if(exists($PD->{'State'})){
635
if($PD->{'State'} ne 'Onln' && $PD->{'State'} ne 'UGood' && $PD->{'State'} ne 'GHS' && $PD->{'State'} ne 'DHS' && $PD->{'State'} ne 'JBOD'){
636
$status = 'Critical';
637
push @{$statusLevel_a[2]}, $PD->{'pd'}.'_State';
638
$statusLevel_a[3]->{$PD->{'pd'}.'_State'} = $PD->{'State'};
639
}
640
}
641
if(exists($PD->{'Shield Counter'})){
642
if($PD->{'Shield Counter'} > $IGNERR_S){
643
$status = 'Warning' unless $status eq 'Critical';
644
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Shield_counter';
645
$statusLevel_a[3]->{$PD->{'pd'}.'_Shield_counter'} = $PD->{'Shield Counter'};
646
}
647
}
648
if(exists($PD->{'Media Error Count'})){
649
if($PD->{'Media Error Count'} > $IGNERR_M){
650
$status = 'Warning' unless $status eq 'Critical';
651
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Media_error_count';
652
$statusLevel_a[3]->{$PD->{'pd'}.'_Media_error_count'} = $PD->{'Media Error Count'};
653
}
654
}
655
if(exists($PD->{'Other Error Count'})){
656
if(($IGNERR_O != -1) && ($PD->{'Other Error Count'} > $IGNERR_O)){
657
$status = 'Warning' unless $status eq 'Critical';
658
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Other_error_count';
659
$statusLevel_a[3]->{$PD->{'pd'}.'_Other_error_count'} = $PD->{'Other Error Count'};
660
}
661
}
662
if(exists($PD->{'BBM Error Count'})){
663
if($PD->{'BBM Error Count'} > $IGNERR_B){
664
$status = 'Warning' unless $status eq 'Critical';
665
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_BBM_error_count';
666
$statusLevel_a[3]->{$PD->{'pd'}.'_BBM_error_count'} = $PD->{'BBM Error Count'};
667
}
668
}
669
if(exists($PD->{'Predictive Failure Count'})){
670
if($PD->{'Predictive Failure Count'} > $IGNERR_P){
671
$status = 'Warning' unless $status eq 'Critical';
672
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Predictive_failure_count';
673
$statusLevel_a[3]->{$PD->{'pd'}.'_Predictive_failure_count'} = $PD->{'Predictive Failure Count'};
674
}
675
}
676
if(exists($PD->{'S.M.A.R.T alert flagged by drive'})){
677
if($PD->{'S.M.A.R.T alert flagged by drive'} ne 'No'){
678
$status = 'Warning' unless $status eq 'Critical';
679
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_SMART_flag';
680
}
681
}
682
if(exists($PD->{'Detailed Status'})){
683
if($PD->{'Detailed Status'} eq 'Failure-46'){
684
$status = 'Warning' unless $status eq 'Critical';
685
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Detailed_status';
686
}
687
}
688
if(exists($PD->{'DG'})){
689
if($PD->{'DG'} eq 'F'){
690
$status = 'Warning' unless $status eq 'Critical';
691
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_DG';
692
$statusLevel_a[3]->{$PD->{'pd'}.'_DG'} = $PD->{'DG'};
693
}
694
}
695
if(exists($PD->{'Drive Temperature'})){
696
my $temp = $PD->{'Drive Temperature'};
697
if($temp ne 'N/A' && $temp ne '0C (32.00 F)'){
698
$temp =~ /^([0-9]+)C/;
699
if(!(checkThreshs($1, $PD_TEMP_CRITICAL))){
700
$status = 'Critical';
701
push @{$statusLevel_a[2]}, $PD->{'pd'}.'_Drive_Temperature';
702
}
703
elsif(!(checkThreshs($1, $PD_TEMP_WARNING))){
704
$status = 'Warning' unless $status eq 'Critical';
705
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Drive_Temperature';
706
}
707
$statusLevel_a[3]->{$PD->{'pd'}.'_Drive_Temperature'} = $1;
708
}
709
}
710
if(exists($PD->{'init'})){
711
$status = 'Warning' unless $status eq 'Critical';
712
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Init';
713
$statusLevel_a[3]->{$PD->{'pd'}.'_Init'} = $PD->{'init'};
714
}
715
if(exists($PD->{'rebuild'})){
716
$status = 'Warning' unless $status eq 'Critical';
717
push @{$statusLevel_a[1]}, $PD->{'pd'}.'_Rebuild';
718
$statusLevel_a[3]->{$PD->{'pd'}.'_Rebuild'} = $PD->{'rebuild'};
719
}
720
}
721
if($status ne ''){
722
if($status eq 'Warning'){
723
if(${$statusLevel_a[0]} ne 'Critical'){
724
${$statusLevel_a[0]} = 'Warning';
725
}
726
}
727
else{
728
${$statusLevel_a[0]} = 'Critical';
729
}
730
$statusLevel_a[3]->{'PD_Status'} = $status;
731
}
732
else{
733
if(!exists($statusLevel_a[3]->{'PD_Status'})){
734
$statusLevel_a[3]->{'PD_Status'} = 'OK';
735
}
736
}
737
}
738
739
# Checks the status of the BBU, parses 'bbu show status' for the given controller.
740
# @param storcli The path to storcli command utility, followed by the controller
741
# number, e.g. 'storcli64 /c0'.
742
# @param statusLevel_a The status level array, elem 0 is the current status,
743
# elem 1 the warning sensors, elem 2 the critical sensors, elem 3 the verbose
744
# information for the sensors.
745
# @param commands_a An array to push the used command to
746
sub getBBUStatus {
747
my $storcli = shift;
748
my $writelogs = shift;
749
my @statusLevel_a = @{(shift)};
750
my $commands_a = shift;
751
752
my $command = "$storcli /bbu show status";
753
if(!$writelogs) { $command .= ' nolog'; }
754
push @{$commands_a}, $command;
755
756
my $status = '';
757
my $learn_cycle_active = 0;
758
my @output = `$command`;
759
if(checkCommandStatus(\@output)) {
760
my $currBlock;
761
foreach my $line (@output) {
762
if($line =~ /^(BBU_Firmware_Status)/){
763
$currBlock = $1;
764
next;
765
}
766
if(defined($currBlock and $currBlock eq 'BBU_Firmware_Status')){
767
$line =~ s/^\s+|\s+$//g;#trim line
768
if ($line =~ /^Learn Cycle Active/){
769
$line =~ /([a-zA-Z\/]*)$/;
770
if($1 eq "Yes") {
771
$learn_cycle_active = 1;
772
}
773
}
774
}
775
}
776
undef $currBlock;
777
foreach my $line (@output) {
778
if($line =~ /^(BBU_Info|BBU_Firmware_Status|GasGaugeStatus)/){
779
$currBlock = $1;
780
next;
781
}
782
if(defined($currBlock)){
783
$line =~ s/^\s+|\s+$//g;#trim line
784
if($currBlock eq 'BBU_Info'){
785
if ($line =~ /^Battery State/){
786
$line =~ /([a-zA-Z]*)$/;
787
if(!$learn_cycle_active && $1 ne 'Optimal'){
788
$status = 'Warning' unless $status eq 'Critical';
789
push @{$statusLevel_a[1]}, 'BBU_State';
790
$statusLevel_a[3]->{'BBU_State'} = $1
791
}
792
}
793
elsif($line =~ /^Temperature/){
794
$line =~ /([0-9]+) C$/;
795
if(!(checkThreshs($1, $BBU_TEMP_CRITICAL))){
796
$status = 'Critical';
797
push @{$statusLevel_a[2]}, 'BBU_Temperature';
798
}
799
elsif(!(checkThreshs($1, $BBU_TEMP_WARNING))){
800
$status = 'Warning' unless $status eq 'Critical';
801
push @{$statusLevel_a[1]}, 'BBU_Temperature';
802
}
803
$statusLevel_a[3]->{'BBU_Temperature'} = $1;
804
}
805
}
806
elsif($currBlock eq 'BBU_Firmware_Status'){
807
if($line =~ /^Temperature/){
808
$line =~ /([a-zA-Z]*)$/;
809
if($1 ne "OK") {
810
$status = 'Critical';
811
push @{$statusLevel_a[2]},'BBU_Firmware_temperature';
812
$statusLevel_a[3]->{'BBU_Firmware_temperature'} = $1;
813
}
814
}
815
elsif($line =~ /^Voltage/){
816
$line =~ /([a-zA-Z]*)$/;
817
if($1 ne "OK") {
818
$status = 'Warning' unless $status eq 'Critical';
819
push @{$statusLevel_a[1]},'BBU_Voltage';
820
$statusLevel_a[3]->{'BBU_Voltage'} = $1;
821
}
822
}
823
elsif($line =~ /^I2C Errors Detected/){
824
$line =~ /([a-zA-Z]*)$/;
825
if($1 ne "No") {
826
$status = 'Critical';
827
push @{$statusLevel_a[2]},'BBU_Firmware_I2C_errors';
828
$statusLevel_a[3]->{'BBU_Firmware_I2C_Errors'} = $1;
829
}
830
}
831
elsif($line =~ /^Battery Pack Missing/){
832
$line =~ /([a-zA-Z]*)$/;
833
if($1 ne "No") {
834
$status = 'Critical';
835
push @{$statusLevel_a[2]},'BBU_Pack_missing';
836
$statusLevel_a[3]->{'BBU_Pack_missing'} = $1;
837
}
838
}
839
elsif($line =~ /^Replacement required/){
840
$line =~ /([a-zA-Z]*)$/;
841
if($1 ne "No") {
842
$status = 'Critical';
843
push @{$statusLevel_a[2]},'BBU_Replacement_required';
844
$statusLevel_a[3]->{'BBU_Replacement_required'} = $1;
845
}
846
}
847
elsif($line =~ /^Remaining Capacity Low/){
848
$line =~ /([a-zA-Z]*)$/;
849
if(!$learn_cycle_active && $1 ne "No") {
850
$status = 'Warning' unless $status eq 'Critical';
851
push @{$statusLevel_a[1]},'BBU_Remaining_capacity_low';
852
$statusLevel_a[3]->{'BBU_Remaining_capacity_low'} = $1;
853
}
854
}
855
elsif($line =~ /^Pack is about to fail \& should be replaced/){
856
$line =~ /([a-zA-Z]*)$/;
857
if($1 ne "No") {
858
$status = 'Critical';
859
push @{$statusLevel_a[2]},'BBU_Should_be_replaced';
860
$statusLevel_a[3]->{'BBU_Should_be_replaced'} = $1;
861
}
862
}
863
}
864
elsif($currBlock eq 'GasGaugeStatus'){
865
if($line =~ /^Fully Discharged/){
866
$line =~ /([a-zA-Z\/]*)$/;
867
if(!$learn_cycle_active && $1 ne "No" && $1 ne "N/A") {
868
$status = 'Critical';
869
push @{$statusLevel_a[2]},'BBU_GasGauge_discharged';
870
$statusLevel_a[3]->{'BBU_GasGauge_discharged'} = $1;
871
}
872
}
873
elsif($line =~ /^Over Temperature/){
874
$line =~ /([a-zA-Z\/]*)$/;
875
if($1 ne "No" && $1 ne "N/A") {
876
$status = 'Warning' unless $status eq 'Critical';
877
push @{$statusLevel_a[1]},'BBU_GasGauge_over_temperature';
878
$statusLevel_a[3]->{'BBU_GasGauge_over_temperature'} = $1;
879
}
880
}
881
elsif($line =~ /^Over Charged/){
882
$line =~ /([a-zA-Z\/]*)$/;
883
if($1 ne "No" && $1 ne "N/A") {
884
$status = 'Critical';
885
push @{$statusLevel_a[2]},'BBU_GasGauge_over_charged';
886
$statusLevel_a[3]->{'BBU_GasGauge_over_charged'} = $1;
887
}
888
}
889
}
890
}
891
if($status ne ''){
892
if($status eq 'Warning'){
893
if(${$statusLevel_a[0]} ne 'Critical'){
894
${$statusLevel_a[0]} = 'Warning';
895
}
896
}
897
else{
898
${$statusLevel_a[0]} = 'Critical';
899
}
900
$statusLevel_a[3]->{'BBU_Status'} = $status;
901
}
902
else{
903
$statusLevel_a[3]->{'BBU_Status'} = 'OK';
904
}
905
}
906
}
907
else {
908
print "Invalid StorCLI command! ($command)\n";
909
exit(STATE_UNKNOWN);
910
}
911
}
912
913
# Checks the status of the CV module, parses 'cv show status' for the given
914
# controller.
915
# @param storcli The path to storcli command utility, followed by the controller
916
# number, e.g. 'storcli64 /c0'.
917
# @param statusLevel_a The status level array, elem 0 is the current status,
918
# elem 1 the warning sensors, elem 2 the critical sensors, elem 3 the verbose
919
# information for the sensors.
920
# @param commands_a An array to push the used command to
921
sub getCVStatus {
922
my $storcli = shift;
923
my $writelogs = shift;
924
my @statusLevel_a = @{(shift)};
925
my $commands_a = shift;
926
927
my $command = $storcli." /cv show status";
928
if(!$writelogs) { $command .= ' nolog'; }
929
push @{$commands_a}, $command;
930
931
my $status = '';
932
my @output = `$command`;
933
if(checkCommandStatus(\@output)) {
934
my $currBlock;
935
foreach my $line (@output) {
936
if($line =~ /^(Cachevault_Info|Firmware_Status)/){
937
$currBlock = $1;
938
next;
939
}
940
if(defined($currBlock)){
941
$line =~ s/^\s+|\s+$//g;#trim line
942
if($currBlock eq 'Cachevault_Info' && $line =~ /^State/){
943
my @vals = split('\s{2,}',$line);
944
if($vals[1] ne "Optimal") {
945
$status = 'Warning' unless $status eq 'Critical';
946
push @{$statusLevel_a[1]}, 'CV_State';
947
$statusLevel_a[3]->{'CV_State'} = $vals[1]
948
}
949
}
950
elsif($currBlock eq 'Cachevault_Info' && $line =~ /^Temperature/){
951
$line =~ /([0-9]+) C$/;
952
if(!(checkThreshs($1, $CV_TEMP_CRITICAL))){
953
$status = 'Critical';
954
push @{$statusLevel_a[2]}, 'CV_Temperature';
955
}
956
elsif(!(checkThreshs($1, $CV_TEMP_WARNING))){
957
$status = 'Warning' unless $status eq 'Critical';
958
push @{$statusLevel_a[1]}, 'CV_Temperature';
959
}
960
$statusLevel_a[3]->{'CV_Temperature'} = $1;
961
}
962
elsif($currBlock eq 'Firmware_Status' && $line =~ /^Replacement required/){
963
$line =~ /([a-zA-Z0-9]*)$/;
964
if($1 ne "No") {
965
$status = 'Critical';
966
push @{$statusLevel_a[2]},'CV_Replacement_required';
967
}
968
$statusLevel_a[3]->{'CV_Replacement_required'} = $1;
969
}
970
}
971
if($status ne ''){
972
if($status eq 'Warning'){
973
if(${$statusLevel_a[0]} ne 'Critical'){
974
${$statusLevel_a[0]} = 'Warning';
975
}
976
}
977
else{
978
${$statusLevel_a[0]} = 'Critical';
979
}
980
$statusLevel_a[3]->{'CV_Status'} = $status;
981
}
982
else{
983
$statusLevel_a[3]->{'CV_Status'} = 'OK';
984
}
985
}
986
}
987
else {
988
print "Invalid StorCLI command! ($command)\n";
989
exit(STATE_UNKNOWN);
990
}
991
}
992
993
# Checks if wheter BBU or CV is present
994
# @param storcli The path to storcli command utility, followed by the controller
995
# number, e.g. 'storcli64 /c0'.
996
# @return A tuple, e.g. (0,0), where 0 means module is not present, 1 present
997
sub checkBBUorCVIsPresent{
998
my $storcli = shift;
999
my $writelogs = shift;
1000
my ($bbu,$cv);
1001
my $command = $storcli." /bbu show";
1002
if(!$writelogs) { $command .= ' nolog'; }
1003
my @output = `$command`;
1004
if(checkCommandStatus(\@output)){ $bbu = 1; }
1005
else{ $bbu = 0 };
1006
$command = $storcli." /cv show";
1007
if(!$writelogs) { $command .= ' nolog'; }
1008
@output = `$command`;
1009
if(checkCommandStatus(\@output)) { $cv = 1; }
1010
else{ $cv = 0 };
1011
return ($bbu, $cv);
1012
}
1013
1014
# Checks if a given value is in a specified range, the range must follow the
1015
# nagios development guidelines:
1016
# http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
1017
# @param value The given value to check the pattern for
1018
# @param pattern The pattern specifying the threshold range, e.g. '10:', '@10:20'
1019
# @return 0 if the value is outside the range, 1 if the value satisfies the range
1020
sub checkThreshs{
1021
my $value = shift;
1022
my $pattern = shift;
1023
if($pattern =~ /(^[0-9]+$)/){
1024
if($value < 0 || $value > $1){
1025
return 0;
1026
}
1027
}
1028
elsif($pattern =~ /(^[0-9]+)\:$/){
1029
if($value < $1){
1030
return 0;
1031
}
1032
}
1033
elsif($pattern =~ /^\~\:([0-9]+)$/){
1034
if($value > $1){
1035
return 0;
1036
}
1037
}
1038
elsif($pattern =~ /^([0-9]+)\:([0-9]+)$/){
1039
if($value < $1 || $value > $2){
1040
return 0;
1041
}
1042
}
1043
elsif($pattern =~ /^\@([0-9]+)\:([0-9]+)$/){
1044
if($value >= $1 and $value <= $2){
1045
return 0;
1046
}
1047
}
1048
else{
1049
print "Invalid temperature parameter! ($pattern)\n";
1050
exit(STATE_UNKNOWN);
1051
}
1052
return 1;
1053
}
1054
1055
# Get the status string as plugin output
1056
# @param level The desired level to get the status string for. Either 'Warning'
1057
# or 'Critical'.
1058
# @param statusLevel_a The status level array, elem 0 is the current status,
1059
# elem 1 the warning sensors, elem 2 the critical sensors, elem 3 the verbose
1060
# information for the sensors, elem 4 the used storcli commands.
1061
# @return The created status string
1062
sub getStatusString{
1063
my $level = shift;
1064
my @statusLevel_a = @{(shift)};
1065
my @sensors_a;
1066
my $status_str = "";
1067
if($level eq "Warning"){
1068
@sensors_a = @{$statusLevel_a[1]};
1069
}
1070
if($level eq "Critical"){
1071
@sensors_a = @{$statusLevel_a[2]};
1072
}
1073
# Add the controller parts only once
1074
my $parts = '';
1075
# level comes from the method call, not the real status level
1076
if($level eq "Critical"){
1077
my @keys = ('CTR_Status','LD_Status','PD_Status','BBU_Status','CV_Status');
1078
# Check which parts where checked
1079
foreach my $key (@keys){
1080
$key =~ /^([A-Z]+)\_.*$/;
1081
my $part = $1;
1082
if(${$statusLevel_a[0]} eq 'OK'){
1083
if(exists($statusLevel_a[3]->{$key}) && $statusLevel_a[3]->{$key} eq 'OK'){
1084
$parts .= ", " unless $parts eq '';
1085
$parts .= $part;
1086
}
1087
}
1088
else{
1089
if(exists($statusLevel_a[3]->{$key}) && $statusLevel_a[3]->{$key} ne 'OK'){
1090
$parts .= ", " unless $parts eq '';
1091
$parts .= $part;
1092
$parts .= ' '.substr($statusLevel_a[3]->{$key}, 0, 4);
1093
}
1094
}
1095
}
1096
$status_str.= '(';
1097
$status_str .= $parts unless !defined($parts);
1098
$status_str.= ')';
1099
}
1100
if($level eq 'Critical'){
1101
$status_str.= ' ' unless !(@sensors_a);
1102
}
1103
if($level eq 'Warning' && !@{$statusLevel_a[2]}){
1104
$status_str.= ' ' unless !(@sensors_a);
1105
}
1106
if($level eq "Warning" || $level eq "Critical"){
1107
if(@sensors_a){
1108
# Print which sensors are Warn or Crit
1109
foreach my $sensor (@sensors_a){
1110
$status_str .= "[".$sensor." = ".$level;
1111
if($VERBOSITY){
1112
if(exists($statusLevel_a[3]->{$sensor})){
1113
$status_str .= " (".$statusLevel_a[3]->{$sensor}.")";
1114
}
1115
}
1116
$status_str .= "]";
1117
}
1118
}
1119
}
1120
return $status_str;
1121
}
1122
1123
# Get the verbose string if a higher verbose level is used
1124
# @param statusLevel_a The status level array, elem 0 is the current status,
1125
# elem 1 the warning sensors, elem 2 the critical sensors, elem 3 the verbose
1126
# information for the sensors, elem 4 the used storcli commands.
1127
# @param controllerToCheck Controller parsed by getControllerInfo
1128
# @param LDDevicesToCheck LDs parsed by getLogicalDevices
1129
# @param LDInitToCheck LDs parsed by getLogicalDevices init
1130
# @param PDDevicesToCheck PDs parsed by getPhysicalDevices
1131
# @param PDInitToCheck PDs parsed by getPhysicalDevices init
1132
# @param PDRebuildToCheck PDs parsed by getPhysicalDevices rebuild
1133
# @return The created verbosity string
1134
sub getVerboseString{
1135
my @statusLevel_a = @{(shift)};
1136
my %controllerToCheck = %{(shift)};
1137
my @LDDevicesToCheck = @{(shift)};
1138
my @LDInitToCheck = @{(shift)};
1139
my @PDDevicesToCheck = @{(shift)};
1140
my @PDInitToCheck = @{(shift)};
1141
my @PDRebuildToCheck = @{(shift)};
1142
my @sensors_a;
1143
my $verb_str;
1144
1145
$verb_str .= "Used storcli commands:\n";
1146
foreach my $cmd (@{$statusLevel_a[4]}){
1147
$verb_str .= '- '.$cmd."\n";
1148
}
1149
if(${$statusLevel_a[0]} eq 'Critical'){
1150
$verb_str .= "Critical sensors:\n";
1151
foreach my $sensor (@{$statusLevel_a[2]}){
1152
$verb_str .= "\t- ".$sensor;
1153
if(exists($statusLevel_a[3]->{$sensor})){
1154
$verb_str .= ' ('.$statusLevel_a[3]->{$sensor}.')';
1155
}
1156
$verb_str .= "\n";
1157
}
1158
1159
}
1160
if( ${$statusLevel_a[0]} ne 'OK'){
1161
$verb_str .= "Warning sensors:\n";
1162
foreach my $sensor (@{$statusLevel_a[1]}){
1163
$verb_str .= "\t- ".$sensor;
1164
if(exists($statusLevel_a[3]->{$sensor})){
1165
$verb_str .= ' ('.$statusLevel_a[3]->{$sensor}.')';
1166
}
1167
$verb_str .= "\n";
1168
}
1169
1170
}
1171
if($VERBOSITY == 3){
1172
$verb_str .= "CTR information:\n";
1173
$verb_str .= "\t- ".$controllerToCheck{'Model'}.":\n";
1174
$verb_str .= "\t\t- ".'Serial Number='.$controllerToCheck{'Serial Number'}."\n";
1175
$verb_str .= "\t\t- ".'Firmware Package Build='.$controllerToCheck{'Firmware Package Build'}."\n";
1176
$verb_str .= "\t\t- ".'Mfg Date='.$controllerToCheck{'Mfg Date'}."\n";
1177
$verb_str .= "\t\t- ".'Revision No='.$controllerToCheck{'Revision No'}."\n";
1178
$verb_str .= "\t\t- ".'Bios Version='.$controllerToCheck{'Bios Version'}."\n";
1179
$verb_str .= "\t\t- ".'Firmware Version='.$controllerToCheck{'Firmware Version'}."\n";
1180
if(exists($controllerToCheck{'ROC temperature'})){
1181
$verb_str .= "\t\t- ".'ROC temperature='.$controllerToCheck{'ROC temperature'}."\n";
1182
}
1183
$verb_str .= "LD information:\n";
1184
foreach my $LD (@LDDevicesToCheck){
1185
$verb_str .= "\t- ".$LD->{'ld'}.":\n";
1186
foreach my $key (sort (keys(%{$LD}))){
1187
$verb_str .= "\t\t- ".$key.'='.$LD->{$key}."\n";
1188
}
1189
foreach my $LDinit (@LDInitToCheck){
1190
if($LDinit->{'ld'} eq $LD->{'ld'}){
1191
$verb_str .= "\t\t- init=".$LDinit->{'init'}."\n";
1192
}
1193
}
1194
}
1195
$verb_str .= "PD information:\n";
1196
foreach my $PD (@PDDevicesToCheck){
1197
$verb_str .= "\t- ".$PD->{'pd'}.":\n";
1198
foreach my $key (sort (keys(%{$PD}))){
1199
$verb_str .= "\t\t- ".$key.'='.$PD->{$key}."\n";
1200
}
1201
foreach my $PDinit (@PDInitToCheck){
1202
if($PDinit->{'pd'} eq $PD->{'pd'}){
1203
$verb_str .= "\t\t- init=".$PDinit->{'init'}."\n";
1204
}
1205
}
1206
foreach my $PDrebuild (@PDRebuildToCheck){
1207
if($PDrebuild->{'pd'} eq $PD->{'pd'}){
1208
$verb_str .= "\t\t- rebuild=".$PDrebuild->{'rebuild'}."\n";
1209
}
1210
}
1211
}
1212
my @keys = ('BBU_Status','CV_Status');
1213
foreach my $key(@keys){
1214
if(exists($statusLevel_a[3]->{$key})){
1215
$key =~ /^(\w+)_\w+$/;
1216
my $type = $1;
1217
$verb_str .= $type." information:\n";
1218
foreach my $stat (sort (keys(%{$statusLevel_a[3]}))){
1219
if($stat =~ /^$type.+$/){
1220
$verb_str .= "\t\t- $stat=".$statusLevel_a[3]->{$stat}."\n";
1221
}
1222
}
1223
}
1224
}
1225
}
1226
return $verb_str;
1227
}
1228
1229
# Get the performance string for the current check. The values are taken from
1230
# the varbose hash in the status level array.
1231
# @param statusLevel_a The current status level array
1232
# @return The created performance string
1233
sub getPerfString{
1234
my @statusLevel_a = @{(shift)};
1235
my %verboseValues_h = %{$statusLevel_a[3]};
1236
my $perf_str;
1237
foreach my $key (sort (keys(%verboseValues_h))){
1238
if($key =~ /temperature/i){
1239
$perf_str .= ' ' unless !defined($perf_str);
1240
$perf_str .= $key.'='.$verboseValues_h{$key};
1241
}
1242
if($key =~ /ROC_Temperature$/){
1243
$perf_str .= ';'.$C_TEMP_WARNING.';'.$C_TEMP_CRITICAL;
1244
}
1245
elsif($key =~ /Drive_Temperature$/){
1246
$perf_str .= ';'.$PD_TEMP_WARNING.';'.$PD_TEMP_CRITICAL;
1247
}
1248
elsif($key eq 'BBU_Temperature'){
1249
$perf_str .= ';'.$BBU_TEMP_WARNING.';'.$BBU_TEMP_CRITICAL;
1250
}
1251
elsif($key eq 'CV_Temperature'){
1252
$perf_str .= ';'.$CV_TEMP_WARNING.';'.$CV_TEMP_CRITICAL;
1253
}
1254
}
1255
return $perf_str;
1256
}
1257
1258
MAIN: {
1259
my ($storcli, $sudo, $noSudo, $noCleanlogs, $version, $exitCode);
1260
# Create default sensor arrays and push them to status level
1261
my @statusLevel_a ;
1262
my $status_str = 'OK';
1263
my $warnings_a = [];
1264
my $criticals_a = [];
1265
my $verboseValues_h = {};
1266
my $verboseCommands_a = [];
1267
push @statusLevel_a, \$status_str;
1268
push @statusLevel_a, $warnings_a;
1269
push @statusLevel_a, $criticals_a;
1270
push @statusLevel_a, $verboseValues_h;
1271
push @statusLevel_a, $verboseCommands_a;
1272
# Per default use a BBU
1273
my $bbu = 1;
1274
my @enclosures;
1275
my @logDevices;
1276
my @physDevices;
1277
my $platform = $^O;
1278
1279
if( !(GetOptions(
1280
'h|help' => sub {displayHelp();},
1281
'v|verbose' => sub {$VERBOSITY = 1 },
1282
'vv' => sub {$VERBOSITY = 2},
1283
'vvv' => sub {$VERBOSITY = 3},
1284
'V|version' => \$version,
1285
'C|controller=i' => \$CONTROLLER,
1286
'EID|enclosure=s' => \@enclosures,
1287
'LD|logicaldevice=s' => \@logDevices,
1288
'PD|physicaldevice=s' => \@physDevices,
1289
'Tw|temperature-warn=s' => \$C_TEMP_WARNING,
1290
'Tc|temperature-critical=s' => \$C_TEMP_CRITICAL,
1291
'Mcw|mem-correctable-warn=s' => \$C_MEM_CORRECTABLE_WARNING,
1292
'Mcc|mem-correctable-critical=s' => \$C_MEM_CORRECTABLE_CRITICAL,
1293
'PDTw|physicaldevicetemperature-warn=s' => \$PD_TEMP_WARNING,
1294
'PDTc|physicaldevicetemperature-critical=s' => \$PD_TEMP_CRITICAL,
1295
'BBUTw|bbutemperature-warning=s' => \$BBU_TEMP_WARNING,
1296
'BBUTc|bbutemperature-critical=s' => \$BBU_TEMP_CRITICAL,
1297
'CVTw|cvtemperature-warning=s' => \$CV_TEMP_WARNING,
1298
'CVTc|cvtemperature-critical=s' => \$CV_TEMP_CRITICAL,
1299
'Im|ignore-media-errors=i' => \$IGNERR_M,
1300
'Io|ignore-other-errors=i' => \$IGNERR_O,
1301
'Ip|ignore-predictive-fail-count=i' => \$IGNERR_P,
1302
'Is|ignore-shield-counter=i' => \$IGNERR_S,
1303
'Ib|ignore-bbm-counter=i' => \$IGNERR_B,
1304
'p|path=s' => \$storcli,
1305
'b|BBU=i' => \$bbu,
1306
'noenclosures=i' => \$NOENCLOSURES,
1307
'nowritebackok=i' => \$NOWRITEBACKOK,
1308
'nosudo' => \$noSudo,
1309
'nocleanlogs' => \$noCleanlogs
1310
))){
1311
print $NAME . " Version: " . $VERSION ."\n";
1312
displayUsage();
1313
exit(STATE_UNKNOWN);
1314
}
1315
if(defined($version)){ print $NAME . "\nVersion: ". $VERSION . "\n"; }
1316
# Check storcli tool
1317
if(!defined($storcli)){
1318
if($platform eq 'linux' || $platform eq 'solaris'){
1319
$storcli = which('storcli');
1320
if(!defined($storcli)){
1321
$storcli = which('storcli64');
1322
}
1323
if(!defined($storcli)){
1324
if(-x '/opt/MegaRAID/storcli/storcli64'){
1325
$storcli = '/opt/MegaRAID/storcli/storcli64';
1326
}
1327
}
1328
if(!defined($storcli)){
1329
if(-x '/opt/MegaRAID/storcli/storcli'){
1330
$storcli = '/opt/MegaRAID/storcli/storcli';
1331
}
1332
}
1333
$storcli = which('perccli');
1334
if(!defined($storcli)){
1335
$storcli = which('perccli64');
1336
}
1337
if(!defined($storcli)){
1338
if(-x '/opt/MegaRAID/perccli/perccli64'){
1339
$storcli = '/opt/MegaRAID/perccli/perccli64';
1340
}
1341
}
1342
}
1343
else{
1344
$storcli = which('storcli.exe');
1345
if(!defined($storcli)){
1346
$storcli = which('storcli64.exe');
1347
}
1348
}
1349
}
1350
if(!defined($storcli)){
1351
print "Error: cannot find storcli executable.\n";
1352
print "Ensure storcli is in your path, or use the '-p <storcli path>' switch!\n";
1353
exit(STATE_UNKNOWN);
1354
}
1355
if($platform eq 'linux' || $platform eq 'solaris') {
1356
if(!defined($noSudo)){
1357
my $sudo;
1358
chomp($sudo = `which sudo`);
1359
if(!defined($sudo)){
1360
print "Error: cannot find sudo executable.\n";
1361
exit(STATE_UNKNOWN);
1362
}
1363
if($> != 0){
1364
$storcli = $sudo.' '.$storcli;
1365
}
1366
}
1367
}
1368
# Print storcli version if available
1369
if(defined($version)){ displayVersion($storcli, $noCleanlogs) }
1370
# Prepare storcli command
1371
$storcli .= " /c$CONTROLLER";
1372
# Check if the controller number can be used
1373
if(!getControllerTime($storcli, $noCleanlogs)){
1374
print "Error: invalid controller number, controller not found!\n";
1375
exit(STATE_UNKNOWN);
1376
}
1377
# Prepare command line arrays
1378
@enclosures = split(/,/,join(',', @enclosures));
1379
@logDevices = split(/,/,join(',', @logDevices));
1380
@physDevices = split(/,/,join(',', @physDevices));
1381
# Check if the BBU param is correct
1382
if(($bbu != 2) && ($bbu != 1) && ($bbu != 0)) {
1383
print "Error: invalid BBU/CV parameter, must be 0, 1 or 2!\n";
1384
exit(STATE_UNKNOWN);
1385
}
1386
my ($bbuPresent,$cvPresent) = (0,0);
1387
if($bbu == 1 || $bbu == 2){
1388
($bbuPresent,$cvPresent) = checkBBUorCVIsPresent($storcli, $noCleanlogs);
1389
# Only exit with CRITICAL if bbu param is 1
1390
if($bbuPresent == 0 && $cvPresent == 0 && $bbu == 1){
1391
${$statusLevel_a[0]} = 'Critical';
1392
push @{$criticals_a}, 'BBU/CV_Present';
1393
$statusLevel_a[3]->{'BBU_Status'} = 'Critical';
1394
$statusLevel_a[3]->{'CV_Status'} = 'Critical';
1395
}
1396
}
1397
if($bbuPresent == 1){getBBUStatus($storcli, $noCleanlogs, \@statusLevel_a, $verboseCommands_a); }
1398
if($cvPresent == 1){ getCVStatus($storcli, $noCleanlogs, \@statusLevel_a, $verboseCommands_a); }
1399
1400
my $controllerToCheck = getControllerInfo($storcli, $noCleanlogs, $verboseCommands_a);
1401
my $LDDevicesToCheck = getLogicalDevices($storcli, $noCleanlogs, \@logDevices, 'all', $verboseCommands_a);
1402
my $LDInitToCheck = getLogicalDevices($storcli, $noCleanlogs, \@logDevices, 'init', $verboseCommands_a);
1403
my $PDDevicesToCheck = getPhysicalDevices($storcli, $noCleanlogs, \@enclosures, \@physDevices, 'all', $verboseCommands_a);
1404
my $PDInitToCheck = getPhysicalDevices($storcli, $noCleanlogs, \@enclosures, \@physDevices, 'initialization', $verboseCommands_a);
1405
my $PDRebuildToCheck = getPhysicalDevices($storcli, $noCleanlogs, \@enclosures, \@physDevices, 'rebuild', $verboseCommands_a);
1406
1407
getControllerStatus(\@statusLevel_a, $controllerToCheck);
1408
getLDStatus(\@statusLevel_a, $LDDevicesToCheck);
1409
getLDStatus(\@statusLevel_a, $LDInitToCheck);
1410
getPDStatus(\@statusLevel_a, $PDDevicesToCheck);
1411
getPDStatus(\@statusLevel_a, $PDInitToCheck);
1412
getPDStatus(\@statusLevel_a, $PDRebuildToCheck);
1413
1414
print ${$statusLevel_a[0]}." ";
1415
print getStatusString("Critical",\@statusLevel_a);
1416
print getStatusString("Warning",\@statusLevel_a);
1417
my $perf_str = getPerfString(\@statusLevel_a);
1418
if($perf_str){
1419
print "|".$perf_str;
1420
}
1421
if($VERBOSITY == 2 || $VERBOSITY == 3){
1422
print "\n".getVerboseString(\@statusLevel_a, $controllerToCheck, $LDDevicesToCheck, $LDInitToCheck,
1423
$PDDevicesToCheck, $PDInitToCheck, $PDRebuildToCheck)
1424
}
1425
$exitCode = STATE_OK;
1426
if(${$statusLevel_a[0]} eq "Critical"){
1427
$exitCode = STATE_CRITICAL;
1428
}
1429
if(${$statusLevel_a[0]} eq "Warning"){
1430
$exitCode = STATE_WARNING;
1431
}
1432
exit($exitCode);
1433
}
1434