File check_3ware_raid of Package nagios-plugins-3ware
x
1
#!/usr/bin/env python
2
#
3
# Copyright Hari Sekhon 2007
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
#
19
20
"""Nagios plugin to test the state of all 3ware raid arrays and/or drives
21
on all 3ware controllers on the local machine. Requires the tw_cli program
22
written by 3ware, which should be called tw_cli_64 if running on a 64-bit
23
system. May be remotely executed via any of the standard remote nagios
24
execution mechanisms"""
25
26
__version__ = 0.9
27
28
# Standard Nagios return codes
29
OK = 0
30
WARNING = 1
31
CRITICAL = 2
32
UNKNOWN = 3
33
34
import os
35
import re
36
import sys
37
try:
38
from subprocess import Popen, PIPE, STDOUT
39
except ImportError:
40
print "Failed to import subprocess module.",
41
print "Perhaps you are using a version of python older than 2.4?"
42
sys.exit(CRITICAL)
43
from optparse import OptionParser
44
45
SRCDIR = os.path.dirname(sys.argv[0])
46
47
48
def end(status, message, disks=False):
49
"""Exits the plugin with first arg as the return code and the second
50
arg as the message to output"""
51
52
check = "RAID"
53
if disks == True:
54
check = "DISKS"
55
if status == OK:
56
print "%s OK: %s" % (check, message)
57
sys.exit(OK)
58
elif status == WARNING:
59
print "%s WARNING: %s" % (check, message)
60
sys.exit(WARNING)
61
elif status == CRITICAL:
62
print "%s CRITICAL: %s" % (check, message)
63
sys.exit(CRITICAL)
64
else:
65
print "UNKNOWN: %s" % message
66
sys.exit(UNKNOWN)
67
68
69
if os.geteuid() != 0:
70
end(UNKNOWN, "You must be root to run this plugin")
71
72
ARCH = os.uname()[4]
73
74
if re.match("i[3456]86", ARCH):
75
BIN = SRCDIR + "/tw_cli"
76
elif ARCH == "x86_64":
77
BIN = SRCDIR + "/tw_cli_64"
78
else:
79
end(UNKNOWN, "architecture is not x86 or x86_64, cannot run 3ware utility")
80
81
if not os.path.exists(BIN):
82
end(UNKNOWN, "3ware utility for this architecture '%s' cannot be found" \
83
% BIN)
84
85
if not os.access(BIN, os.X_OK):
86
end(UNKNOWN, "3ware utility '%s' is not executable" % BIN)
87
88
89
def run(cmd):
90
"""runs a system command and returns stripped output"""
91
92
if cmd == "" or cmd == None:
93
end(UNKNOWN, "internal python error - " \
94
+ "no cmd supplied for 3ware utility")
95
try:
96
process = Popen(BIN, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
97
except OSError, error:
98
error = str(error)
99
if error == "No such file or directory":
100
end(UNKNOWN, "Cannot find 3ware utility '%s'" % BIN)
101
else:
102
end(UNKNOWN, "error trying to run 3ware utility - %s" % error)
103
104
if process.poll():
105
end(UNKNOWN, "3ware utility process ended prematurely")
106
107
try:
108
stdout, stderr = process.communicate(cmd)
109
except OSError, error:
110
end(UNKNOWN, "unable to communicate with 3ware utility - %s" % error)
111
112
113
if stdout == None or stdout == "":
114
end(UNKNOWN, "No output from 3ware utility")
115
116
output = str(stdout).split("\n")
117
118
if output[1] == "No controller found.":
119
end(UNKNOWN, "No 3ware controllers were found on this machine")
120
121
stripped_output = output[3:-2]
122
123
if process.returncode != 0:
124
stderr = str(stdout).replace("\n"," ")
125
end(UNKNOWN, "3ware utility returned an exit code of %s - %s" \
126
% (process.returncode, stderr))
127
else:
128
return stripped_output
129
130
131
def test_all(verbosity, warn_true=False, no_summary=False, show_drives=False):
132
"""Calls the raid and drive testing functions"""
133
134
array_result, array_message = test_arrays(verbosity, warn_true, no_summary)
135
136
if array_result != OK and not show_drives:
137
return array_result, array_message
138
139
drive_result, drive_message = test_drives(verbosity, no_summary)
140
141
if drive_result > array_result:
142
result = drive_result
143
else:
144
result = array_result
145
146
if drive_result != OK:
147
if array_result == OK:
148
message = "Arrays OK but... " + drive_message
149
else:
150
message = array_message + ", " + drive_message
151
else:
152
if show_drives:
153
message = array_message + ", " + drive_message
154
else:
155
message = array_message
156
157
return result, message
158
159
160
def test_arrays(verbosity, warn_true=False, no_summary=False):
161
"""Tests all the raid arrays on all the 3ware controllers on
162
the local machine"""
163
164
lines = run("show")
165
controllers = [ line.split()[0] for line in lines ]
166
167
status = OK
168
message = ""
169
number_arrays = 0
170
arrays_not_ok = 0
171
number_controllers = len(controllers)
172
for controller in controllers:
173
unit_lines = run("/%s show unitstatus" % controller)
174
if verbosity >= 3:
175
for unit_line in unit_lines:
176
print unit_line
177
print
178
179
for unit_line in unit_lines:
180
number_arrays += 1
181
unit_line = unit_line.split()
182
state = unit_line[2]
183
if state == "OK":
184
continue
185
elif state == "REBUILDING" or \
186
state == "VERIFY-PAUSED" or \
187
state == "VERIFYING" or \
188
state == "INITIALIZING":
189
190
unit = int(unit_line[0][1:])
191
raid = unit_line[1]
192
percent_complete = unit_line[3]
193
194
message += "Array %s status is '%s'(%s on adapter %s) - " \
195
% (unit, state, raid, controller[1:])
196
if state == "REBUILDING":
197
message += "Rebuild "
198
elif state == "VERIFY-PAUSED" or state == "VERIFYING":
199
message += "Verify "
200
elif state == "INITIALIZING":
201
message += "Initializing "
202
message += "Status: %s%% complete, " % percent_complete
203
if warn_true:
204
arrays_not_ok += 1
205
if status == OK:
206
status = WARNING
207
else:
208
arrays_not_ok += 1
209
unit = int(unit_line[0][1:])
210
raid = unit_line[1]
211
message += "Array %s status is '%s'" % (unit, state)
212
message += "(%s on adapter %s), " % (raid, controller[1:])
213
status = CRITICAL
214
215
message = message.rstrip(", ")
216
217
message = add_status_summary(status, message, arrays_not_ok, "arrays")
218
219
if not no_summary:
220
message = add_checked_summary(message, \
221
number_arrays, \
222
number_controllers, \
223
"arrays")
224
225
return status, message
226
227
228
def test_drives(verbosity, no_summary=False):
229
"""Tests all the drives on the all the 3ware raid controllers
230
on the local machine"""
231
232
lines = run("show")
233
controllers = [ line.split()[0] for line in lines ]
234
235
status = OK
236
message = ""
237
number_drives = 0
238
drives_not_ok = 0
239
number_controllers = len(controllers)
240
for controller in controllers:
241
drive_lines = run("/%s show drivestatus" % controller)
242
number_drives += len(drive_lines)
243
244
if verbosity >= 3:
245
for drive_line in drive_lines:
246
print drive_line
247
print
248
249
for drive_line in drive_lines:
250
drive_line = drive_line.split()
251
state = drive_line[1]
252
if state == "OK" or state == "NOT-PRESENT":
253
continue
254
else:
255
drives_not_ok += 1
256
drive = drive_line[0]
257
if drive[0] == "d":
258
drive = drive[1:]
259
array = drive_line[2]
260
if array[0] == "u":
261
array = array[1:]
262
message += "Status of drive in port "
263
message += "%s is '%s'(Array %s on adapter %s), " \
264
% (drive, state, array, controller[1:])
265
status = CRITICAL
266
267
message = message.rstrip(", ")
268
269
message = add_status_summary(status, message, drives_not_ok, "drives")
270
271
if not no_summary:
272
message = add_checked_summary(message, \
273
number_drives, \
274
number_controllers, \
275
"drives")
276
277
return status, message
278
279
280
def add_status_summary(status, message, number_failed, device):
281
"""Adds a status summary string to the beginning of the message
282
and returns the message"""
283
284
if device == "arrays":
285
if number_failed == 1:
286
device = "array"
287
elif device == "drives":
288
if number_failed == 1:
289
device = "drive"
290
else:
291
device = "[unknown devices, please check code]"
292
293
if status == OK:
294
if message == "":
295
message = "All %s OK" % device + message
296
else:
297
message = "All %s OK - " % device + message
298
else:
299
message = "%s %s not OK - " % (number_failed, device) + message
300
301
return message
302
303
304
def add_checked_summary(message, number_devices, number_controllers, device):
305
"""Adds a summary string of what was checked to the end of the message
306
and returns the message"""
307
308
if device == "arrays":
309
if number_devices == 1:
310
device = "array"
311
elif device == "drives":
312
if number_devices == 1:
313
device = "drive"
314
else:
315
device = "[unknown devices, please check code]"
316
317
if number_controllers == 1:
318
controller = "controller"
319
else:
320
controller = "controllers"
321
322
message += " [%s %s checked on %s %s]" % (number_devices, device, \
323
number_controllers, controller)
324
325
return message
326
327
328
def main():
329
"""Parses command line options and calls the function to
330
test the arrays/drives"""
331
332
parser = OptionParser()
333
334
335
parser.add_option( "-a",
336
"--arrays-only",
337
action="store_true",
338
dest="arrays_only",
339
help="Only test the arrays. By default both arrays " \
340
+ "and drives are checked")
341
342
parser.add_option( "-d",
343
"--drives-only",
344
action="store_true",
345
dest="drives_only",
346
help="Only test the drives. By default both arrays " \
347
+ "and drives are checked")
348
349
parser.add_option( "-n",
350
"--no-summary",
351
action="store_true",
352
dest="no_summary",
353
help="Do not display the number of arrays/drives " \
354
+ "checked. By default the number of arrays and " \
355
+ "drives checked are printed at the end of the " \
356
+ "line. This is useful information and helps to " \
357
+ "know that they are detected properly")
358
359
parser.add_option( "-s",
360
"--show-drives",
361
action="store_true",
362
dest="show_drives",
363
help="Show drive status. By default drives are " \
364
+ "checked as well as arrays, but there is no " \
365
+ "output regarding them unless there is a " \
366
+ "problem. Use this is you want drive details as " \
367
+ "well when there is an array problem (default " \
368
+ "behaviour is to only show the array problem to " \
369
+ "avoid too much cluttering information), " \
370
+ "or if you want to see the drive information " \
371
+ "even when all drives are ok")
372
373
parser.add_option( "-w",
374
"--warn-rebuilding",
375
action="store_true",
376
dest="warn_true",
377
help="Warn when an array is Rebuilding, Initializing " \
378
+ "or Verifying. You might want to do this to keep " \
379
+ "a closer eye on things. Also, these conditions " \
380
+ "can affect performance so you might want to " \
381
+ "know this is going on. Default is to not warn " \
382
+ "during these states as they are not usually " \
383
+ "problems")
384
385
parser.add_option( "-v",
386
"--verbose",
387
action="count",
388
dest="verbosity",
389
help="Verbose mode. Good for testing plugin. By default\
390
only one result line is printed as per Nagios standards")
391
392
parser.add_option( "-V",
393
"--version",
394
action="store_true",
395
dest="version",
396
help="Print version number and exit")
397
398
(options, args) = parser.parse_args()
399
400
if args:
401
parser.print_help()
402
sys.exit(UNKNOWN)
403
404
arrays_only = options.arrays_only
405
drives_only = options.drives_only
406
no_summary = options.no_summary
407
show_drives = options.show_drives
408
warn_true = options.warn_true
409
verbosity = options.verbosity
410
version = options.version
411
412
if version:
413
print __version__
414
sys.exit(OK)
415
416
if arrays_only and drives_only:
417
print "You cannot use the -a and -d switches together, they are",
418
print "mutually exclusive\n"
419
parser.print_help()
420
sys.exit(UNKNOWN)
421
elif arrays_only and show_drives:
422
print "You cannot use the -a and -s switches together"
423
print "No drive information can be printed if you only check arrays\n"
424
parser.print_help()
425
sys.exit(UNKNOWN)
426
elif drives_only and warn_true:
427
print "You cannot use the -d and -w switches together"
428
print "Array warning states are invalid when testing only drives\n"
429
parser.print_help()
430
sys.exit(UNKNOWN)
431
432
if arrays_only:
433
result, output = test_arrays(verbosity, warn_true, no_summary)
434
elif drives_only:
435
result, output = test_drives(verbosity, no_summary)
436
end(result, output, True)
437
else:
438
result, output = test_all(verbosity, warn_true, no_summary, show_drives)
439
440
end(result, output)
441
442
443
if __name__ == "__main__":
444
try:
445
main()
446
except KeyboardInterrupt:
447
print "Caught Control-C..."
448
sys.exit(CRITICAL)
449