Logoj0ke.net Open Build Service > Projects > internetx > nagios-plugins-3ware > check_3ware_raid
Sign Up | Log In

File check_3ware_raid of Package nagios-plugins-3ware

x
 
1
#!/usr/bin/env python
2
#
3
#   Copyright Hari Sekhon 2007
4
#
5
#   This program is free software; you can redistribute it and/or modify
6
#   it under the terms of the GNU General Public License as published by
7
#   the Free Software Foundation; either version 2 of the License, or
8
#   (at your option) any later version.
9
#
10
#   This program is distributed in the hope that it will be useful,
11
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
#   GNU General Public License for more details.
14
#
15
#   You should have received a copy of the GNU General Public License
16
#   along with this program; if not, write to the Free Software
17
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
# 
19
20
"""Nagios plugin to test the state of all 3ware raid arrays and/or drives
21
   on all 3ware controllers on the local machine. Requires the tw_cli program
22
   written by 3ware, which should be called tw_cli_64 if running on a 64-bit
23
   system. May be remotely executed via any of the standard remote nagios
24
   execution mechanisms"""
25
26
__version__ = 0.9
27
28
# Standard Nagios return codes
29
OK       = 0
30
WARNING  = 1
31
CRITICAL = 2
32
UNKNOWN  = 3
33
34
import os
35
import re
36
import sys
37
try:
38
    from subprocess import Popen, PIPE, STDOUT
39
except ImportError:
40
    print "Failed to import subprocess module.",
41
    print "Perhaps you are using a version of python older than 2.4?"
42
    sys.exit(CRITICAL)
43
from optparse import OptionParser
44
45
SRCDIR = os.path.dirname(sys.argv[0])
46
47
48
def end(status, message, disks=False):
49
    """Exits the plugin with first arg as the return code and the second
50
    arg as the message to output"""
51
    
52
    check = "RAID"
53
    if disks == True:
54
        check = "DISKS"
55
    if status == OK:
56
        print "%s OK: %s" % (check, message)
57
        sys.exit(OK)
58
    elif status == WARNING:
59
        print "%s WARNING: %s" % (check, message)
60
        sys.exit(WARNING)
61
    elif status == CRITICAL:
62
        print "%s CRITICAL: %s" % (check, message)
63
        sys.exit(CRITICAL)
64
    else:
65
        print "UNKNOWN: %s" % message
66
        sys.exit(UNKNOWN)
67
68
69
if os.geteuid() != 0:
70
    end(UNKNOWN, "You must be root to run this plugin")
71
72
ARCH = os.uname()[4]
73
74
if re.match("i[3456]86", ARCH):
75
    BIN = SRCDIR + "/tw_cli"
76
elif ARCH == "x86_64":
77
    BIN = SRCDIR + "/tw_cli_64"
78
else:
79
    end(UNKNOWN, "architecture is not x86 or x86_64, cannot run 3ware utility")
80
81
if not os.path.exists(BIN):
82
    end(UNKNOWN, "3ware utility for this architecture '%s' cannot be found" \
83
                                                                          % BIN)
84
85
if not os.access(BIN, os.X_OK):
86
    end(UNKNOWN, "3ware utility '%s' is not executable" % BIN)
87
88
89
def run(cmd):
90
    """runs a system command and returns stripped output"""
91
92
    if cmd == "" or cmd == None:
93
        end(UNKNOWN, "internal python error - " \
94
                   + "no cmd supplied for 3ware utility")
95
    try:
96
        process = Popen(BIN, stdin=PIPE, stdout=PIPE, stderr=STDOUT)
97
    except OSError, error:
98
        error = str(error)
99
        if error == "No such file or directory":
100
            end(UNKNOWN, "Cannot find 3ware utility '%s'" % BIN)
101
        else:
102
            end(UNKNOWN, "error trying to run 3ware utility - %s" % error)
103
104
    if process.poll():
105
        end(UNKNOWN, "3ware utility process ended prematurely")
106
107
    try:
108
        stdout, stderr = process.communicate(cmd)
109
    except OSError, error:
110
        end(UNKNOWN, "unable to communicate with 3ware utility - %s" % error)
111
112
113
    if stdout == None or stdout == "":
114
        end(UNKNOWN, "No output from 3ware utility")
115
    
116
    output = str(stdout).split("\n")
117
   
118
    if output[1] == "No controller found.":
119
        end(UNKNOWN, "No 3ware controllers were found on this machine")
120
121
    stripped_output = output[3:-2]
122
123
    if process.returncode != 0:
124
        stderr = str(stdout).replace("\n"," ")
125
        end(UNKNOWN, "3ware utility returned an exit code of %s - %s" \
126
                                                 % (process.returncode, stderr))
127
    else:
128
        return stripped_output
129
130
131
def test_all(verbosity, warn_true=False, no_summary=False, show_drives=False):
132
    """Calls the raid and drive testing functions"""
133
134
    array_result, array_message = test_arrays(verbosity, warn_true, no_summary)
135
136
    if array_result != OK and not show_drives:
137
        return array_result, array_message
138
139
    drive_result, drive_message = test_drives(verbosity, no_summary)
140
141
    if drive_result > array_result:
142
        result = drive_result
143
    else:
144
        result = array_result
145
    
146
    if drive_result != OK:
147
        if array_result == OK:
148
            message = "Arrays OK but... " + drive_message
149
        else:
150
            message = array_message + ", " + drive_message
151
    else:
152
        if show_drives:
153
            message = array_message + ", " + drive_message
154
        else:
155
            message = array_message
156
157
    return result, message
158
159
160
def test_arrays(verbosity, warn_true=False, no_summary=False):
161
    """Tests all the raid arrays on all the 3ware controllers on
162
    the local machine"""
163
164
    lines = run("show")
165
    controllers = [ line.split()[0] for line in lines ]
166
167
    status = OK 
168
    message = ""
169
    number_arrays = 0
170
    arrays_not_ok = 0
171
    number_controllers = len(controllers)
172
    for controller in controllers:
173
        unit_lines = run("/%s show unitstatus" % controller)
174
        if verbosity >= 3:
175
            for unit_line in unit_lines:
176
                print unit_line
177
            print
178
    
179
        for unit_line in unit_lines:
180
            number_arrays += 1
181
            unit_line = unit_line.split()
182
            state = unit_line[2]
183
            if state == "OK":
184
                continue
185
            elif state == "REBUILDING"    or \
186
                 state == "VERIFY-PAUSED" or \
187
                 state == "VERIFYING"     or \
188
                 state == "INITIALIZING":
189
190
                unit = int(unit_line[0][1:])
191
                raid = unit_line[1]
192
                percent_complete = unit_line[3]
193
194
                message += "Array %s status is '%s'(%s on adapter %s) - " \
195
                                          % (unit, state, raid, controller[1:])
196
                if state == "REBUILDING":
197
                    message += "Rebuild "
198
                elif state == "VERIFY-PAUSED" or state == "VERIFYING":
199
                    message += "Verify "
200
                elif state == "INITIALIZING":
201
                    message += "Initializing "
202
                message += "Status: %s%% complete, " % percent_complete
203
                if warn_true:
204
                    arrays_not_ok += 1
205
                    if status == OK:
206
                        status = WARNING
207
            else:
208
                arrays_not_ok += 1
209
                unit = int(unit_line[0][1:])
210
                raid = unit_line[1]
211
                message += "Array %s status is '%s'" % (unit, state)
212
                message += "(%s on adapter %s), " % (raid, controller[1:])
213
                status = CRITICAL
214
215
    message = message.rstrip(", ")
216
217
    message = add_status_summary(status, message, arrays_not_ok, "arrays")
218
219
    if not no_summary:
220
        message = add_checked_summary(message, \
221
                              number_arrays, \
222
                              number_controllers, \
223
                              "arrays")
224
225
    return status, message
226
227
228
def test_drives(verbosity, no_summary=False):
229
    """Tests all the drives on the all the 3ware raid controllers
230
    on the local machine"""
231
232
    lines = run("show")
233
    controllers = [ line.split()[0] for line in lines ]
234
235
    status = OK
236
    message = ""
237
    number_drives = 0
238
    drives_not_ok = 0
239
    number_controllers = len(controllers)
240
    for controller in controllers:
241
        drive_lines = run("/%s show drivestatus" % controller)
242
        number_drives += len(drive_lines)
243
244
        if verbosity >= 3:
245
            for drive_line in drive_lines:
246
                print drive_line
247
            print
248
249
        for drive_line in drive_lines:
250
            drive_line = drive_line.split()
251
            state = drive_line[1]
252
            if state == "OK" or state == "NOT-PRESENT":
253
                continue
254
            else:
255
                drives_not_ok += 1
256
                drive = drive_line[0]
257
                if drive[0] == "d":
258
                    drive = drive[1:]
259
                array = drive_line[2]
260
                if array[0] == "u":
261
                    array = array[1:]
262
                message += "Status of drive in port "
263
                message += "%s is '%s'(Array %s on adapter %s), " \
264
                                        % (drive, state, array, controller[1:])
265
                status = CRITICAL
266
267
    message = message.rstrip(", ")
268
269
    message = add_status_summary(status, message, drives_not_ok, "drives")
270
271
    if not no_summary:
272
        message = add_checked_summary(message, \
273
                              number_drives, \
274
                              number_controllers, \
275
                              "drives")
276
   
277
    return status, message
278
279
280
def add_status_summary(status, message, number_failed, device):
281
    """Adds a status summary string to the beginning of the message
282
    and returns the message"""
283
284
    if device == "arrays":
285
        if number_failed == 1:
286
            device = "array"
287
    elif device == "drives":
288
        if number_failed == 1:
289
            device = "drive"
290
    else:
291
        device = "[unknown devices, please check code]"
292
293
    if status == OK:
294
        if message == "":
295
            message = "All %s OK" % device + message
296
        else:
297
            message = "All %s OK - " % device + message
298
    else:
299
        message = "%s %s not OK - " % (number_failed, device) + message
300
301
    return message
302
303
304
def add_checked_summary(message, number_devices, number_controllers, device):
305
    """Adds a summary string of what was checked to the end of the message
306
    and returns the message"""
307
308
    if device == "arrays":
309
        if number_devices == 1:
310
            device = "array"
311
    elif device == "drives":
312
        if number_devices == 1:
313
            device = "drive"
314
    else:
315
        device = "[unknown devices, please check code]"
316
317
    if number_controllers == 1:
318
        controller = "controller"
319
    else:
320
        controller = "controllers"
321
            
322
    message += " [%s %s checked on %s %s]" % (number_devices, device, \
323
                                                number_controllers, controller)
324
325
    return message
326
327
328
def main():
329
    """Parses command line options and calls the function to 
330
    test the arrays/drives"""
331
332
    parser = OptionParser()
333
334
335
    parser.add_option( "-a",
336
                       "--arrays-only",
337
                       action="store_true",
338
                       dest="arrays_only",
339
                       help="Only test the arrays. By default both arrays " \
340
                          + "and drives are checked")
341
342
    parser.add_option( "-d",
343
                       "--drives-only",
344
                       action="store_true",
345
                       dest="drives_only",
346
                       help="Only test the drives. By default both arrays " \
347
                          + "and drives are checked")
348
349
    parser.add_option( "-n",
350
                       "--no-summary",
351
                       action="store_true",
352
                       dest="no_summary",
353
                       help="Do not display the number of arrays/drives " \
354
                          + "checked. By default the number of arrays and " \
355
                          + "drives checked are printed at the end of the " \
356
                          + "line. This is useful information and helps to " \
357
                          + "know that they are detected properly")
358
359
    parser.add_option( "-s",
360
                       "--show-drives",
361
                       action="store_true",
362
                       dest="show_drives",
363
                       help="Show drive status. By default drives are " \
364
                          + "checked as well as arrays, but there is no " \
365
                          + "output regarding them unless there is a " \
366
                          + "problem. Use this is you want drive details as " \
367
                          + "well when there is an array problem (default " \
368
                          + "behaviour is to only show the array problem to " \
369
                          + "avoid too much cluttering information), " \
370
                          + "or if you want to see the drive information " \
371
                          + "even when all drives are ok")
372
373
    parser.add_option( "-w",
374
                       "--warn-rebuilding",
375
                       action="store_true",
376
                       dest="warn_true",
377
                       help="Warn when an array is Rebuilding, Initializing " \
378
                          + "or Verifying. You might want to do this to keep " \
379
                          + "a closer eye on things. Also, these conditions " \
380
                          + "can affect performance so you might want to " \
381
                          + "know this is going on. Default is to not warn " \
382
                          + "during these states as they are not usually " \
383
                          + "problems")
384
385
    parser.add_option( "-v", 
386
                       "--verbose", 
387
                       action="count", 
388
                       dest="verbosity",
389
                       help="Verbose mode. Good for testing plugin. By default\
390
 only one result line is printed as per Nagios standards")
391
392
    parser.add_option( "-V",
393
                       "--version",
394
                       action="store_true",
395
                       dest="version",
396
                       help="Print version number and exit")
397
398
    (options, args) = parser.parse_args()
399
400
    if args:
401
        parser.print_help()
402
        sys.exit(UNKNOWN)
403
404
    arrays_only  = options.arrays_only
405
    drives_only  = options.drives_only
406
    no_summary   = options.no_summary
407
    show_drives  = options.show_drives
408
    warn_true    = options.warn_true
409
    verbosity    = options.verbosity
410
    version      = options.version
411
412
    if version:
413
        print __version__
414
        sys.exit(OK)
415
    
416
    if arrays_only and drives_only:
417
        print "You cannot use the -a and -d switches together, they are",
418
        print "mutually exclusive\n"
419
        parser.print_help()
420
        sys.exit(UNKNOWN)
421
    elif arrays_only and show_drives:
422
        print "You cannot use the -a and -s switches together"
423
        print "No drive information can be printed if you only check arrays\n"
424
        parser.print_help()
425
        sys.exit(UNKNOWN)
426
    elif drives_only and warn_true:
427
        print "You cannot use the -d and -w switches together"
428
        print "Array warning states are invalid when testing only drives\n"
429
        parser.print_help()
430
        sys.exit(UNKNOWN)
431
432
    if arrays_only:
433
        result, output = test_arrays(verbosity, warn_true, no_summary)
434
    elif drives_only:
435
        result, output = test_drives(verbosity, no_summary)
436
        end(result, output, True)
437
    else:
438
        result, output = test_all(verbosity, warn_true, no_summary, show_drives)
439
440
    end(result, output)
441
442
443
if __name__ == "__main__":
444
    try:
445
        main()
446
    except KeyboardInterrupt:
447
        print "Caught Control-C..."
448
        sys.exit(CRITICAL)
449