Changes of Revision 29
[-] | Changed | icinga-mod_gearman.changes |
1
2 ------------------------------------------------------------------- 3 +Fri Aug 19 20:13:25 UTC 2011 - cs@linux-administrator.com 4 + 5 +- update to release 1.0.9 6 + 7 +------------------------------------------------------------------- 8 Sun Jul 31 18:39:34 UTC 2011 - cs@linux-administrator.com 9 10 - update to release 1.0.8 11 |
||
[-] | Changed | icinga-mod_gearman.spec ^ |
10 1
2 %define monitor icinga 3 %define pkgname mod_gearman 4 Name: %{monitor}-%{pkgname} 5 -Version: 1.0.8 6 +Version: 1.0.9 7 Release: 1 8 Group: Application/System 9 License: GPLv3 10 |
||
[+] | Deleted | mod_gearman-1.0.8.tar.bz2/etc/mod_gearman.conf ^ |
@@ -1,163 +0,0 @@ -############################################################################### -# -# mod_gearman - distribute checks with gearman -# -# Copyright (c) 2010 Sven Nierlein -# -# Sample Worker / NEB Module Config -# -############################################################################### - -# use debug to increase the verbosity of the module. -# Possible values are: -# 0 = only errors -# 1 = debug messages -# 2 = trace messages -# 3 = trace and all gearman related logs are going to stdout. -# Default is 0. -debug=0 - - -# sets the addess of your gearman job server. Can be specified -# more than once to add more server. -server=localhost:4730 - - -# sets the address of your 2nd (duplicate) gearman job server. Can -# be specified more than once o add more servers. -#dupserver=<host>:<port> - - -# defines if the module should distribute execution of -# eventhandlers. -eventhandler=yes - - -# defines if the module should distribute execution of -# service checks. -services=yes - - -# defines if the module should distribute execution of -# host checks. -hosts=yes - - -# sets a list of hostgroups which will go into seperate -# queues. Either specify a comma seperated list or use -# multiple lines. -#hostgroups=name1 -#hostgroups=name2,name3 - - -# sets a list of servicegroups which will go into seperate -# queues. -#servicegroups=name1,name2,name3 - -# Set this to 'no' if you want Mod-Gearman to only take care about -# servicechecks. No hostchecks will be processed by Mod-Gearman. Use -# this option to disable hostchecks and still have the possibility to -# use hostgroups for easy configuration of your services. -# If set to yes, you still have to define which hostchecks should be -# processed by either using 'hosts' or the 'hostgroups' option. -# Default is Yes. -do_hostchecks=yes - -# enables or disables encryption. It is strongly -# advised to not disable encryption. Anybody will be -# able to inject packages to your worker. -# Encryption is enabled by default and you have to -# explicitly disable it. -# When using encryption, you will either have to -# specify a shared password with key=... or a -# keyfile with keyfile=... -# Default is On. -encryption=yes - - -# A shared password which will be used for -# encryption of data pakets. Should be at least 8 -# bytes long. Maximum length is 32 characters. -key=should_be_changed - - -# The shared password will be read from this file. -# Use either key or keyfile. Only the first 32 -# characters will be used. -#keyfile=/path/to/secret.file - - -############################################################################### -# -# NEB Module Config -# -# the following settings are for the neb module only and -# will be ignored by the worker. -# -############################################################################### - -# sets a list of hostgroups which will not be executed -# by gearman. They are just passed through. -# Default is none -localhostgroups= - - -# sets a list of servicegroups which will not be executed -# by gearman. They are just passed through. -# Default is none -localservicegroups= - - -# Number of result worker threads. Usually one is -# enough. You may increase the value if your -# result queue is not processed fast enough. -# Default: 1 -result_workers=1 - - -# defines if the module should distribute perfdata -# to gearman. -# Note: processing of perfdata is not part of -# mod_gearman. You will need additional worker for -# handling performance data. For example: pnp4nagios -# Performance data is just written to the gearman -# queue. -# Default no -perfdata=no - - -############################################################################### -# -# Worker Config -# -# the following settings are for the worker only and -# will be ignored by the neb module. -# -############################################################################### - -# Path to the pidfile. Usually set by the init script -#pidfile=/usr/local/var/mod_gearman/mod_gearman_worker.pid - -# Path to the logfile. -logfile=/usr/local/var/mod_gearman/mod_gearman_worker.log - -# Minimum number of worker processes which should -# run at any time. -min-worker=1 - -# Maximum number of worker processes which should -# run at any time. You may set this equal to -# min-worker setting to disable dynamic starting of -# workers. When setting this to 1, all services from -# this worker will be executed one after another. -max-worker=20 - -# Time after which an idling worker exists -# This parameter controls how fast your waiting workers will -# exit if there are no jobs waiting. -idle-timeout=30 - -# Controls the amount of jobs a worker will do before he exits -# Use this to control how fast the amount of workers will go down -# after high load times -max-jobs=50 | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/Changes ^ |
@@ -1,5 +1,15 @@ This file documents the revision history for mod_gearman. +1.0.9 Mon Aug 15 16:05:23 CEST 2011 + - nicer error messages for send_multi when zero results transmitted + - fixed sigsegv when reloading core in combination with exports options + - added optional workaround for plugins exiting with rc 25 because + of duplicate execution + - removed version output to stderr while reloading/starting the core + - check server definition for duplicates + - replace died workers to maintain the worker population + - better timeout handling when using fork_on_exec=off + 1.0.8 Fri Jul 22 22:21:34 CEST 2011 - use identifier for error messages if set - fixed ld options (fixes debian bug #632431) thanks Ilya Barygin | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/README ^ |
@@ -5,17 +5,18 @@ ------------------- http://labs.consol.de/nagios/mod-gearman[Mod_Gearman] is an easy way -of distributing active Nagios checks across your network and to increase -nagios scalability. Mod-Gearman can even help to reduce the load on a -single nagios host, because its much smaller and more efficient in -executing checks. It consists of three parts: +of distributing active Nagios checks across your network and +increasing nagios scalability. Mod-Gearman can even help to reduce the +load on a single nagios host, because its much smaller and more +efficient in executing checks. It consists of three parts: -* There is a NEB module which resides in the Nagios core and adds servicechecks, hostchecks -and eventhandler to a Gearman queue. +* There is a NEB module which resides in the Nagios core and adds servicechecks, + hostchecks and eventhandler to a Gearman queue. * The counterpart is one or more worker clients executing the checks. -Worker can be configured to only run checks for specific host- or -servicegroups. -* You need at least one http://gearman.org[Gearman Job Server] running + Worker can be configured to only run checks for specific host- or + servicegroups. +* And you need at least one http://gearman.org[Gearman Job Server] + running. http://labs.consol.de/wp-content/uploads/2010/09/Mod-Gearman.pdf[Have a look at the slides from the Nagios Workshop 2011 in Hannover] @@ -24,8 +25,7 @@ Download -------- -* Latest stable release http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.8.tar.gz[version - 1.0.8] from on July 22 2011 +* Latest stable release http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.9.tar.gz[version 1.0.9] from August 16 2011 * Mod Gearman is available for download at: http://labs.consol.de/nagios/mod-gearman * Source is available at GitHub: http://github.com/sni/mod_gearman * Older versions are available in the <<_archive,download archive>>. @@ -35,12 +35,13 @@ Support & Questions ------------------- -* Mod-Gearman has been succesfully tested with Nagios 3.2.3 and Icinga - 1.2.0 running on Lib-Gearman 0.14. There are no known bugs at the moment. +* Mod-Gearman has been succesfully tested with Nagios >3.2.3 and Icinga + >1.2.0 running on Lib-Gearman >0.14. There are no known bugs at the moment. Let me know if you find one. * https://groups.google.com/group/mod_gearman[google groups mailinglist] * http://labs.consol.de/lang/de/forum/#/categories/mod-gearman[support forum] * Debian users may be interested in the http://labs.consol.de/nagios/mod-gearman/mod-gearman-quickstart-guide/[quickstart guide]. +* Debian users may also be interested in the official packages: http://packages.debian.org/source/wheezy/mod-gearman How does it work @@ -249,15 +250,20 @@ /usr/sbin/gearmand -t 10 -j 0 -------------------------------------- -or a supplied init script (extras/gearmand-init). +or a supplied init script (extras/gearmand-init). Command line +arguments have change in recent gearman versions and you now should +use something like: +-------------------------------------- +/usr/sbin/gearmand --threads=10 --job-retries=0 +-------------------------------------- Patch Nagios ^^^^^^^^^^^^ -NOTE: The needed patch is already included since Nagios 3.2.2. Use the patch if you -use an older version. +NOTE: The needed patch is already applied to Nagios 3.2.2. Use the patch if you +have an older version. It is not possible to distribute eventhandler with Nagios versions prior 3.2.2. Just apply the patch from the ./extras/patches directory @@ -583,8 +589,9 @@ ==== fork_on_exec:: -Use this option to disable an extra fork for each plugin execution. This option -will reduce the load on the worker host. Default: yes +Use this option to disable an extra fork for each plugin execution. +Disabling this option will reduce the load on the worker host, but may +cause trouble with unclean plugins. Default: yes + ==== fork_on_exec=no @@ -599,6 +606,19 @@ dupserver=logserver:4730,logserver2:4730 ==== +workaround_rc_25:: +Duplicate jobs from gearmand result sometimes in exit code 25 of +plugins because they are executed twice and get killed because of +using the same ressource. Sending results (when exit code is 25 ) +will be skipped with this enabled. +Only needed if you experience problems with plugins exiting with exit +code 25 randomly. Default is off. ++ +==== + workaround_rc_25=off +==== + + Queue Names @@ -808,6 +828,7 @@ Archive ------- +* http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.9.tar.gz[version 1.0.9 - August 16 2011] * http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.8.tar.gz[version 1.0.8 - July 22 2011] * http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.7.tar.gz[version 1.0.7 - July 03 2011] * http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.6.tar.gz[version 1.0.6 - June 04 2011] | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/common/gearman.c ^ |
@@ -112,6 +112,8 @@ x++; } + current_client_dup = client; + return GM_OK; } @@ -150,6 +152,9 @@ } assert(x != 0); + + current_client = client; + return GM_OK; } | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/common/utils.c ^ |
@@ -24,8 +24,11 @@ #include "utils.h" #include "crypt.h" #include "base64.h" +#include "gearman.h" pid_t current_child_pid = 0; +char temp_buffer1[GM_BUFFERSIZE]; +char temp_buffer2[GM_BUFFERSIZE]; /* escapes newlines in a string */ char *escape_newlines(char *rawbuf) { @@ -224,6 +227,8 @@ opt->spawn_rate = GM_DEFAULT_SPAWN_RATE; opt->identifier = NULL; + opt->workaround_rc_25 = GM_DISABLED; + opt->host = NULL; opt->service = NULL; @@ -288,7 +293,6 @@ int parse_args_line(mod_gm_opt_t *opt, char * arg, int recursion_level) { char *key; char *value; - char temp_buffer[GM_BUFFERSIZE]; gm_log( GM_LOG_TRACE, "parse_args_line(%s, %d)\n", arg, recursion_level); @@ -371,6 +375,13 @@ opt->active = parse_yes_or_no(value, GM_ENABLED); return(GM_OK); } + + /* workaround_rc_25 */ + else if ( !strcmp( key, "workaround_rc_25" ) ) { + opt->workaround_rc_25 = parse_yes_or_no(value, GM_ENABLED); + return(GM_OK); + } + else if ( value == NULL ) { gm_log( GM_LOG_ERROR, "unknown switch '%s'\n", key ); return(GM_OK); @@ -561,16 +572,9 @@ else if ( !strcmp( key, "server" ) ) { char *servername; while ( (servername = strsep( &value, "," )) != NULL ) { - servername = trim(servername); - if ( strcmp( servername, "" ) ) { - if(strcspn(servername, ":") == 0) { - temp_buffer[0]='\x0'; - snprintf( temp_buffer,sizeof( temp_buffer )-1, "localhost%s", servername); - temp_buffer[sizeof( temp_buffer )-1]='\x0'; - opt->server_list[opt->server_num] = strdup(temp_buffer); - } else { - opt->server_list[opt->server_num] = strdup(servername); - } + char * new_server = get_param_server(servername, opt->server_list, opt->server_num); + if(new_server != NULL) { + opt->server_list[opt->server_num] = new_server; opt->server_num++; } } @@ -580,9 +584,9 @@ else if ( !strcmp( key, "dupserver" ) ) { char *servername; while ( (servername = strsep( &value, "," )) != NULL ) { - servername = trim(servername); - if ( strcmp( servername, "" ) ) { - opt->dupserver_list[opt->dupserver_num] = strdup(servername); + char * new_server = get_param_server(servername, opt->dupserver_list, opt->dupserver_num); + if(new_server != NULL) { + opt->dupserver_list[opt->dupserver_num] = new_server; opt->dupserver_num++; } } @@ -845,7 +849,7 @@ free(opt->local_hostgroups_list[i]); for(i=0;i<opt->local_servicegroups_num;i++) free(opt->local_servicegroups_list[i]); - for(i=0;i<=GM_NEBTYPESSIZE;i++) { + for(i=0;i<GM_NEBTYPESSIZE;i++) { for(j=0;j<opt->exports[i]->elem_number;j++) { free(opt->exports[i]->name[j]); } @@ -1247,7 +1251,9 @@ bufdup = strdup(buffer); snprintf( buffer, sizeof( buffer )-1, "CRITICAL: Return code of %d is out of bounds. (worker: %s)\n%s\n", (int)(return_code), identifier, bufdup); free(bufdup); - return_code = STATE_CRITICAL; + if(return_code != 25 && mod_gm_opt->workaround_rc_25 == GM_DISABLED) { + return_code = STATE_CRITICAL; + } } exec_job->output = strdup(buffer); @@ -1324,6 +1330,10 @@ pid_t pid = getpid(); gm_log( GM_LOG_TRACE, "check_alarm_handler(%i)\n", sig ); + if(current_job != NULL && mod_gm_opt->fork_on_exec == GM_DISABLED) { + send_timeout_result(current_job); + gearman_job_send_complete(current_gearman_job, NULL, 0); + } if(current_child_pid > 0) { gm_log( GM_LOG_TRACE, "send SIGINT to %d\n", current_child_pid); @@ -1807,4 +1817,169 @@ } return; +} + +/* extract server from string and check for duplicates */ +char * get_param_server(char * servername, char * server_list[GM_LISTSIZE], int server_num) { + char temp_buffer[GM_BUFFERSIZE]; + char * new_server; + int i; + servername = trim(servername); + + if ( ! strcmp( servername, "" ) ) { + return NULL; + } + + if(strcspn(servername, ":") == 0) { + temp_buffer[0]='\x0'; + snprintf( temp_buffer,sizeof( temp_buffer )-1, "localhost%s", servername); + temp_buffer[sizeof( temp_buffer )-1]='\x0'; + new_server = strdup(temp_buffer); + } else { + new_server = strdup(servername); + } + + // check for duplicates + for(i=0;i<server_num;i++) { + if ( ! strcmp( new_server, server_list[i] ) ) { + gm_log( GM_LOG_ERROR, "duplicate definition of server: %s\n", new_server); + free(new_server); + return NULL; + } + } + + return new_server; +} + +void send_timeout_result(gm_job_t * exec_job) { + struct timeval end_time; + char buffer[GM_BUFFERSIZE]; + buffer[0] = '\x0'; + + gm_log( GM_LOG_TRACE, "send_timeout_result()\n"); + + gettimeofday(&end_time, NULL); + exec_job->finish_time = end_time; + + exec_job->return_code = 2; + exec_job->early_timeout = 1; + if ( !strcmp( exec_job->type, "service" ) ) + snprintf( buffer, sizeof( buffer ) -1, "(Service Check Timed Out On Worker: %s)\n", mod_gm_opt->identifier); + if ( !strcmp( exec_job->type, "host" ) ) + snprintf( buffer, sizeof( buffer ) -1, "(Host Check Timed Out On Worker: %s)\n", mod_gm_opt->identifier); + free(exec_job->output); + exec_job->output = strdup( buffer ); + + send_result_back(exec_job); + + return; +} + + +/* send results back */ +void send_result_back(gm_job_t * exec_job) { + gm_log( GM_LOG_TRACE, "send_result_back()\n" ); + + if(exec_job->result_queue == NULL) { + return; + } + if(exec_job->output == NULL) { + return; + } + + /* workaround for rc 25 bug + * duplicate jobs from gearmand result in exit code 25 of plugins + * because they are executed twice and get killed because of using + * the same ressource. + * Sending results (when exit code is 25 ) will be skipped with this + * enabled. + */ + if( exec_job->return_code == 25 && mod_gm_opt->workaround_rc_25 == GM_ENABLED ) { + return; + } + + gm_log( GM_LOG_TRACE, "queue: %s\n", exec_job->result_queue ); + temp_buffer1[0]='\x0'; + snprintf( temp_buffer1, sizeof( temp_buffer1 )-1, "host_name=%s\ncore_start_time=%i.%i\nstart_time=%i.%i\nfinish_time=%i.%i\nlatency=%f\nreturn_code=%i\nexited_ok=%i\n", + exec_job->host_name, + ( int )exec_job->core_start_time.tv_sec, + ( int )exec_job->core_start_time.tv_usec, + ( int )exec_job->start_time.tv_sec, + ( int )exec_job->start_time.tv_usec, + ( int )exec_job->finish_time.tv_sec, + ( int )exec_job->finish_time.tv_usec, + exec_job->latency, + exec_job->return_code, + exec_job->exited_ok + ); + temp_buffer1[sizeof( temp_buffer1 )-1]='\x0'; + + if(exec_job->service_description != NULL) { + temp_buffer2[0]='\x0'; + strncat(temp_buffer2, "service_description=", (sizeof(temp_buffer2)-1)); + strncat(temp_buffer2, exec_job->service_description, (sizeof(temp_buffer2)-1)); + strncat(temp_buffer2, "\n", (sizeof(temp_buffer2)-1)); + + strncat(temp_buffer1, temp_buffer2, (sizeof(temp_buffer1)-1)); + } + temp_buffer1[sizeof( temp_buffer1 )-1]='\x0'; + + if(exec_job->output != NULL) { + temp_buffer2[0]='\x0'; + strncat(temp_buffer2, "output=", (sizeof(temp_buffer2)-1)); + if(mod_gm_opt->debug_result) { + strncat(temp_buffer2, "(", (sizeof(temp_buffer2)-1)); + strncat(temp_buffer2, hostname, (sizeof(temp_buffer2)-1)); + strncat(temp_buffer2, ") - ", (sizeof(temp_buffer2)-1)); + } + strncat(temp_buffer2, exec_job->output, (sizeof(temp_buffer2)-1)); + strncat(temp_buffer2, "\n\n\n", (sizeof(temp_buffer2)-1)); + strncat(temp_buffer1, temp_buffer2, (sizeof(temp_buffer1)-1)); + } + strncat(temp_buffer1, "\n", (sizeof(temp_buffer1)-2)); + temp_buffer1[sizeof( temp_buffer1 )-1]='\x0'; + + gm_log( GM_LOG_TRACE, "data:\n%s\n", temp_buffer1); + + if(add_job_to_queue( current_client, + mod_gm_opt->server_list, + exec_job->result_queue, + NULL, + temp_buffer1, + GM_JOB_PRIO_NORMAL, + GM_DEFAULT_JOB_RETRIES, + mod_gm_opt->transportmode, + TRUE + ) == GM_OK) { + gm_log( GM_LOG_TRACE, "send_result_back() finished successfully\n" ); + } + else { + gm_log( GM_LOG_TRACE, "send_result_back() finished unsuccessfully\n" ); + } + + if( mod_gm_opt->dupserver_num ) { + strncpy(temp_buffer2, "type=passive\n", (sizeof(temp_buffer1)-2)); + strncat(temp_buffer2, temp_buffer1, (sizeof(temp_buffer2)-2)); + temp_buffer2[sizeof( temp_buffer2 )-1]='\x0'; + if( add_job_to_queue( current_client_dup, + mod_gm_opt->dupserver_list, + exec_job->result_queue, + NULL, + temp_buffer2, + GM_JOB_PRIO_NORMAL, + GM_DEFAULT_JOB_RETRIES, + mod_gm_opt->transportmode, + TRUE + ) == GM_OK) { + gm_log( GM_LOG_TRACE, "send_result_back() finished successfully for duplicate server.\n" ); + } + else { + gm_log( GM_LOG_TRACE, "send_result_back() finished unsuccessfully for duplicate server\n" ); + } + } + else { + gm_log( GM_LOG_TRACE, "send_result_back() has no duplicate servers to send to.\n" ); + } + + return; } | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/configure ^ |
@@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.65 for mod_gearman 1.0.8. +# Generated by GNU Autoconf 2.65 for mod_gearman 1.0.9. # # Report bugs to <sven.nierlein@consol.de>. # @@ -552,8 +552,8 @@ # Identity of this package. PACKAGE_NAME='mod_gearman' PACKAGE_TARNAME='mod_gearman' -PACKAGE_VERSION='1.0.8' -PACKAGE_STRING='mod_gearman 1.0.8' +PACKAGE_VERSION='1.0.9' +PACKAGE_STRING='mod_gearman 1.0.9' PACKAGE_BUGREPORT='sven.nierlein@consol.de' PACKAGE_URL='' @@ -1249,7 +1249,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures mod_gearman 1.0.8 to adapt to many kinds of systems. +\`configure' configures mod_gearman 1.0.9 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1315,7 +1315,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of mod_gearman 1.0.8:";; + short | recursive ) echo "Configuration of mod_gearman 1.0.9:";; esac cat <<\_ACEOF @@ -1412,7 +1412,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -mod_gearman configure 1.0.8 +mod_gearman configure 1.0.9 generated by GNU Autoconf 2.65 Copyright (C) 2009 Free Software Foundation, Inc. @@ -1875,7 +1875,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by mod_gearman $as_me 1.0.8, which was +It was created by mod_gearman $as_me 1.0.9, which was generated by GNU Autoconf 2.65. Invocation command line was $ $0 $@ @@ -2683,7 +2683,7 @@ # Define the identity of the package. PACKAGE='mod_gearman' - VERSION='1.0.8' + VERSION='1.0.9' cat >>confdefs.h <<_ACEOF @@ -5923,7 +5923,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by mod_gearman $as_me 1.0.8, which was +This file was extended by mod_gearman $as_me 1.0.9, which was generated by GNU Autoconf 2.65. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -5989,7 +5989,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -mod_gearman config.status 1.0.8 +mod_gearman config.status 1.0.9 configured by $0, generated by GNU Autoconf 2.65, with options \\"\$ac_cs_config\\" | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/configure.ac ^ |
@@ -3,7 +3,7 @@ ############################################## AC_PREREQ([2.59]) -AC_INIT([mod_gearman], [1.0.8], [sven.nierlein@consol.de]) +AC_INIT([mod_gearman], [1.0.9], [sven.nierlein@consol.de]) AM_INIT_AUTOMAKE([-Wall foreign subdir-objects]) AC_CONFIG_SRCDIR([neb_module/mod_gearman.c],[worker/worker.c],[tools/send_gearman.c],[tools/check_gearman.c],[tools/gearman_top.c]) AC_CONFIG_HEADER([config.h]) | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/docs/QUICKSTART ^ |
@@ -52,9 +52,9 @@ -------------------------------------- #> cd /tmp -#> wget "http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.8.tar.gz" -#> tar zxf mod_gearman-1.0.8.tar.gz -#> cd mod_gearman-1.0.8 +#> wget "http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.9.tar.gz" +#> tar zxf mod_gearman-1.0.9.tar.gz +#> cd mod_gearman-1.0.9 #> ./configure --prefix=/opt --with-gearman=/opt --with-user=nagios --with-init-dir=/etc/init.d #> make #> make install @@ -118,7 +118,7 @@ -------------------------------------- #> grep mod_gearman /var/log/nagios3/nagios.log -[1295003042] mod_gearman: Version 1.0.8 +[1295003042] mod_gearman: Version 1.0.9 [1295003042] Event broker module '/opt/lib/mod_gearman/mod_gearman.o' initialized successfully. -------------------------------------- @@ -152,7 +152,7 @@ -------------------------------------- #> /opt/bin/check_gearman -H localhost -q worker_`hostname` -t 10 -s check -check_gearman OK - debian64 has 1 worker and is working on 0 jobs. Version: 1.0.8|worker=1 jobs=2c +check_gearman OK - debian64 has 1 worker and is working on 0 jobs. Version: 1.0.9|worker=1 jobs=2c -------------------------------------- The interesting number is the last one, '2c' in our case, which means there | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/docs/QUICKSTART.html ^ |
@@ -606,9 +606,9 @@ <div class="listingblock"> <div class="content"> <pre><tt>#> cd /tmp -#> wget "http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.8.tar.gz" -#> tar zxf mod_gearman-1.0.8.tar.gz -#> cd mod_gearman-1.0.8 +#> wget "http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.9.tar.gz" +#> tar zxf mod_gearman-1.0.9.tar.gz +#> cd mod_gearman-1.0.9 #> ./configure --prefix=/opt --with-gearman=/opt --with-user=nagios --with-init-dir=/etc/init.d #> make #> make install @@ -666,7 +666,7 @@ <div class="listingblock"> <div class="content"> <pre><tt>#> grep mod_gearman /var/log/nagios3/nagios.log -[1295003042] mod_gearman: Version 1.0.8 +[1295003042] mod_gearman: Version 1.0.9 [1295003042] Event broker module '/opt/lib/mod_gearman/mod_gearman.o' initialized successfully.</tt></pre> </div></div> </div> @@ -693,7 +693,7 @@ <div class="listingblock"> <div class="content"> <pre><tt>#> /opt/bin/check_gearman -H localhost -q worker_`hostname` -t 10 -s check -check_gearman OK - debian64 has 1 worker and is working on 0 jobs. Version: 1.0.8|worker=1 jobs=2c</tt></pre> +check_gearman OK - debian64 has 1 worker and is working on 0 jobs. Version: 1.0.9|worker=1 jobs=2c</tt></pre> </div></div> <div class="paragraph"><p>The interesting number is the last one, <em>2c</em> in our case, which means there have been already 2 jobs executed by this worker.</p></div> @@ -733,7 +733,7 @@ <div id="footnotes"><hr /></div> <div id="footer"> <div id="footer-text"> -Last updated 2011-07-09 11:51:30 CEST +Last updated 2011-08-16 09:50:33 CEST </div> </div> </body> | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/docs/README.html ^ |
@@ -561,27 +561,28 @@ <h2 id="_what_is_mod_gearman">What is Mod-Gearman</h2> <div class="sectionbody"> <div class="paragraph"><p><a href="http://labs.consol.de/nagios/mod-gearman">Mod_Gearman</a> is an easy way -of distributing active Nagios checks across your network and to increase -nagios scalability. Mod-Gearman can even help to reduce the load on a -single nagios host, because its much smaller and more efficient in -executing checks. It consists of three parts:</p></div> +of distributing active Nagios checks across your network and +increasing nagios scalability. Mod-Gearman can even help to reduce the +load on a single nagios host, because its much smaller and more +efficient in executing checks. It consists of three parts:</p></div> <div class="ulist"><ul> <li> <p> -There is a NEB module which resides in the Nagios core and adds servicechecks, hostchecks -and eventhandler to a Gearman queue. +There is a NEB module which resides in the Nagios core and adds servicechecks, + hostchecks and eventhandler to a Gearman queue. </p> </li> <li> <p> The counterpart is one or more worker clients executing the checks. -Worker can be configured to only run checks for specific host- or -servicegroups. + Worker can be configured to only run checks for specific host- or + servicegroups. </p> </li> <li> <p> -You need at least one <a href="http://gearman.org">Gearman Job Server</a> running +And you need at least one <a href="http://gearman.org">Gearman Job Server</a> + running. </p> </li> </ul></div> @@ -592,8 +593,7 @@ <div class="ulist"><ul> <li> <p> -Latest stable release <a href="http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.8.tar.gz">version - 1.0.8</a> from on July 22 2011 +Latest stable release <a href="http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.9.tar.gz">version 1.0.9</a> from August 16 2011 </p> </li> <li> @@ -618,8 +618,8 @@ <div class="ulist"><ul> <li> <p> -Mod-Gearman has been succesfully tested with Nagios 3.2.3 and Icinga - 1.2.0 running on Lib-Gearman 0.14. There are no known bugs at the moment. +Mod-Gearman has been succesfully tested with Nagios >3.2.3 and Icinga + >1.2.0 running on Lib-Gearman >0.14. There are no known bugs at the moment. Let me know if you find one. </p> </li> @@ -638,6 +638,11 @@ Debian users may be interested in the <a href="http://labs.consol.de/nagios/mod-gearman/mod-gearman-quickstart-guide/">quickstart guide</a>. </p> </li> +<li> +<p> +Debian users may also be interested in the official packages: <a href="http://packages.debian.org/source/wheezy/mod-gearman">http://packages.debian.org/source/wheezy/mod-gearman</a> +</p> +</li> </ul></div> </div> <h2 id="_how_does_it_work">How does it work</h2> @@ -839,15 +844,21 @@ <div class="content"> <pre><tt>/usr/sbin/gearmand -t 10 -j 0</tt></pre> </div></div> -<div class="paragraph"><p>or a supplied init script (extras/gearmand-init).</p></div> +<div class="paragraph"><p>or a supplied init script (extras/gearmand-init). Command line +arguments have change in recent gearman versions and you now should +use something like:</p></div> +<div class="listingblock"> +<div class="content"> +<pre><tt>/usr/sbin/gearmand --threads=10 --job-retries=0</tt></pre> +</div></div> <h4 id="_patch_nagios">Patch Nagios</h4> <div class="admonitionblock"> <table><tr> <td class="icon"> <div class="title">Note</div> </td> -<td class="content">The needed patch is already included since Nagios 3.2.2. Use the patch if you -use an older version.</td> +<td class="content">The needed patch is already applied to Nagios 3.2.2. Use the patch if you +have an older version.</td> </tr></table> </div> <div class="paragraph"><p>It is not possible to distribute eventhandler with Nagios versions @@ -1401,8 +1412,9 @@ </dt> <dd> <p> -Use this option to disable an extra fork for each plugin execution. This option -will reduce the load on the worker host. Default: yes +Use this option to disable an extra fork for each plugin execution. +Disabling this option will reduce the load on the worker host, but may +cause trouble with unclean plugins. Default: yes </p> <div class="exampleblock"> <div class="exampleblock-content"> @@ -1429,6 +1441,26 @@ </div></div> </div></div> </dd> +<dt class="hdlist1"> +workaround_rc_25 +</dt> +<dd> +<p> +Duplicate jobs from gearmand result sometimes in exit code 25 of +plugins because they are executed twice and get killed because of +using the same ressource. Sending results (when exit code is 25 ) +will be skipped with this enabled. +Only needed if you experience problems with plugins exiting with exit +code 25 randomly. Default is off. +</p> +<div class="exampleblock"> +<div class="exampleblock-content"> +<div class="literalblock"> +<div class="content"> +<pre><tt>workaround_rc_25=off</tt></pre> +</div></div> +</div></div> +</dd> </dl></div> </div> <h2 id="_queue_names">Queue Names</h2> @@ -1688,6 +1720,11 @@ <div class="ulist"><ul> <li> <p> +<a href="http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.9.tar.gz">version 1.0.9 - August 16 2011</a> +</p> +</li> +<li> +<p> <a href="http://labs.consol.de/wp-content/uploads/2010/09/mod_gearman-1.0.8.tar.gz">version 1.0.8 - July 22 2011</a> </p> </li> @@ -1767,7 +1804,7 @@ <div id="footnotes"><hr /></div> <div id="footer"> <div id="footer-text"> -Last updated 2011-07-22 22:45:23 CEST +Last updated 2011-08-16 09:46:51 CEST </div> </div> </body> | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/etc/mod_gearman.conf.in ^ |
@@ -161,3 +161,22 @@ # Use this to control how fast the amount of workers will go down # after high load times max-jobs=50 + +# defined the rate of spawed worker per second as long +# as there are jobs waiting +spawn-rate=1 + +# Use this option to disable an extra fork for each plugin execution. Disabling +# this option will reduce the load on the worker host but can lead to problems with +# unclean plugin. +fork_on_exec=yes + +# Workarounds + +# workaround for rc 25 bug +# duplicate jobs from gearmand result in exit code 25 of plugins +# because they are executed twice and get killed because of using +# the same ressource. +# Sending results (when exit code is 25 ) will be skipped with this +# enabled. +workaround_rc_25=off | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/extras/shared.conf ^ |
@@ -181,3 +181,18 @@ # defined the rate of spawed worker per second as long # as there are jobs waiting spawn-rate=1 + +# Use this option to disable an extra fork for each plugin execution. Disabling +# this option will reduce the load on the worker host but can lead to problems with +# unclean plugin. Default: yes +fork_on_exec=yes + +# Workarounds + +# workaround for rc 25 bug +# duplicate jobs from gearmand result in exit code 25 of plugins +# because they are executed twice and get killed because of using +# the same ressource. +# Sending results (when exit code is 25 ) will be skipped with this +# enabled. +workaround_rc_25=off | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/extras/standalone_worker.conf ^ |
@@ -117,3 +117,14 @@ # defined the rate of spawed worker per second as long # as there are jobs waiting spawn-rate=1 + + +# Workarounds + +# workaround for rc 25 bug +# duplicate jobs from gearmand result in exit code 25 of plugins +# because they are executed twice and get killed because of using +# the same ressource. +# Sending results (when exit code is 25 ) will be skipped with this +# enabled. +workaround_rc_25=off | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/include/common.h ^ |
@@ -52,7 +52,7 @@ #define MOD_GM_COMMON_H /* constants */ -#define GM_VERSION "1.0.8" +#define GM_VERSION "1.0.9" #define GM_ENABLED 1 #define GM_DISABLED 0 #define GM_BUFFERSIZE 98304 @@ -99,6 +99,7 @@ #define GM_DEFAULT_MAX_WORKER 20 /**< maximum number of concurrent worker */ #define GM_DEFAULT_JOB_MAX_AGE 600 /**< discard jobs older than that */ #define GM_DEFAULT_SPAWN_RATE 1 /**< number of spawned worker per seconds */ +#define GM_DEFAULT_WORKER_LOOP_SLEEP 1 /**< sleep in worker main loop */ /* transport modes */ #define GM_ENCODE_AND_ENCRYPT 1 @@ -200,6 +201,7 @@ int idle_timeout; /**< number of seconds till a idle worker exits */ int max_jobs; /**< maximum number of jobs done after a worker exits */ int spawn_rate; /**< number of spawned new worker */ + int workaround_rc_25; /**< optional workaround for plugins returning exit code 25 */ /* send_gearman */ int timeout; /**< timeout for waiting reading on stdin */ int return_code; /**< return code */ @@ -237,6 +239,8 @@ /** options structure */ mod_gm_opt_t *mod_gm_opt; +gm_job_t * current_job; +char hostname[GM_BUFFERSIZE]; /* * @} | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/include/gearman.h ^ |
@@ -37,6 +37,10 @@ typedef void*( mod_gm_worker_fn)(gearman_job_st *job, void *context, size_t *result_size, gearman_return_t *ret_ptr); +gearman_client_st *current_client; +gearman_client_st *current_client_dup; +gearman_job_st *current_gearman_job; + int create_client( char ** server_list, gearman_client_st * client); int create_client_dup( char ** server_list, gearman_client_st * client); int create_worker( char ** server_list, gearman_worker_st * worker); | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/include/utils.h ^ |
@@ -468,7 +468,10 @@ */ void gm_log( int lvl, const char *text, ... ); -/** write log line with core logger +/** + * write_core_log + * + * write log line with core logger * * @param[in] data - log message * @@ -476,6 +479,43 @@ */ void write_core_log(char *data); + +/** + * get_param_server + * + * return string of new server or NULL on duplicate + * + * @param[in] servername - server name to parse + * @param[in] server_list - list of servers to check for duplicates + * @param[in] server_num - number of server in this list + * + * @returns the new server name or NULL + */ +char * get_param_server(char * servername, char * server_list[GM_LISTSIZE], int server_num); + + +/** + * send_timeout_result + * + * send back a timeout result + * + * @param[in] exec_job - the exec job with all results + * + * @return nothing + */ +void send_timeout_result(gm_job_t * exec_job); + +/** + * send_result_back + * + * send back result + * + * @param[in] exec_job - the exec job with all results + * + * @return nothing + */ +void send_result_back(gm_job_t * exec_job); + /** * @} */ | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/include/worker.h ^ |
@@ -185,9 +185,11 @@ /** * count and set the current number of worker * + * @param[in] restart - set to GM_ENABLED if stale worker should be replaced + * * @return nothing */ -void count_current_worker(void); +void count_current_worker(int restart); /** * @} | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/include/worker_client.h ^ |
@@ -49,7 +49,6 @@ void *get_job( gearman_job_st *, void *, size_t *, gearman_return_t * ); void do_exec_job(void); int set_worker( gearman_worker_st *worker ); -void send_result_back(void); void idle_sighandler(int sig); void set_state(int status); void clean_worker_exit(int sig); @@ -62,4 +61,3 @@ /** * @} */ - | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/neb_module/mod_gearman.c ^ |
@@ -89,8 +89,8 @@ set_default_options(mod_gm_opt); /* parse arguments */ - gm_log( GM_LOG_INFO, "Version %s\n", GM_VERSION ); - gm_log( GM_LOG_TRACE, "args: %s\n", args ); + gm_log( GM_LOG_DEBUG, "Version %s\n", GM_VERSION ); + gm_log( GM_LOG_DEBUG, "args: %s\n", args ); gm_log( GM_LOG_TRACE, "nebmodule_init(%i, %i)\n", flags ); gm_log( GM_LOG_DEBUG, "running on libgearman %s\n", gearman_version() ); @@ -245,6 +245,7 @@ /* cleanup */ free_client(&client); + mod_gm_free_opt(mod_gm_opt); return NEB_OK; | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/t/01-utils.c ^ |
@@ -17,8 +17,18 @@ return; } +mod_gm_opt_t * renew_opts(void); +mod_gm_opt_t * renew_opts() { + mod_gm_opt_t *mod_gm_opt; + + mod_gm_opt = malloc(sizeof(mod_gm_opt_t)); + set_default_options(mod_gm_opt); + + return mod_gm_opt; +} + int main(void) { - plan(48); + plan(55); /* lowercase */ char test[100]; @@ -57,8 +67,7 @@ strcpy(test, " test "); like(trim(test), "test", "trim(' test ')"); /* reading keys */ - mod_gm_opt_t *mod_gm_opt; - mod_gm_opt = malloc(sizeof(mod_gm_opt_t)); + mod_gm_opt_t *mod_gm_opt = renew_opts(); int rc = set_default_options(mod_gm_opt); ok(rc == 0, "setting default options"); mod_gm_opt->keyfile = strdup("t/data/test1.key"); @@ -153,13 +162,36 @@ ok(t.tv_usec == 0, "string2timeval 8"); /* command line parsing */ + mod_gm_free_opt(mod_gm_opt); + mod_gm_opt = renew_opts(); strcpy(test, "server=host:4730"); parse_args_line(mod_gm_opt, test, 0); like(mod_gm_opt->server_list[0], "host:4730", "server=host:4730"); + ok(mod_gm_opt->server_num == 1, "server_number = %d", mod_gm_opt->server_num); + mod_gm_free_opt(mod_gm_opt); + mod_gm_opt = renew_opts(); strcpy(test, "server=:4730"); parse_args_line(mod_gm_opt, test, 0); - like(mod_gm_opt->server_list[1], "localhost:4730", "server=:4730"); + like(mod_gm_opt->server_list[0], "localhost:4730", "server=:4730"); + ok(mod_gm_opt->server_num == 1, "server_number = %d", mod_gm_opt->server_num); + + mod_gm_free_opt(mod_gm_opt); + mod_gm_opt = renew_opts(); + strcpy(test, "server=localhost:4730"); + parse_args_line(mod_gm_opt, test, 0); + strcpy(test, "server=localhost:4730"); + parse_args_line(mod_gm_opt, test, 0); + like(mod_gm_opt->server_list[0], "localhost:4730", "duplicate server"); + ok(mod_gm_opt->server_num == 1, "server_number = %d", mod_gm_opt->server_num); + + mod_gm_free_opt(mod_gm_opt); + mod_gm_opt = renew_opts(); + strcpy(test, "server=localhost:4730,localhost:4730,:4730,host:4730,"); + parse_args_line(mod_gm_opt, test, 0); + like(mod_gm_opt->server_list[0], "localhost:4730", "duplicate server"); + like(mod_gm_opt->server_list[1], "host:4730", "duplicate server"); + ok(mod_gm_opt->server_num == 2, "server_number = %d", mod_gm_opt->server_num); mod_gm_free_opt(mod_gm_opt); @@ -171,3 +203,4 @@ printf("core logger is not available for tests: %s", data); return; } + | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/t/03-exec_checks.c ^ |
@@ -49,7 +49,7 @@ strcpy(cmd, "./send_gearman --server=blah --key=testtest --host=test --service=test --message=test --returncode=0"); rrc = real_exit_code(run_check(cmd, &result)); diag(result); - cmp_ok(rrc, "==", 1, "cmd '%s' returned rc %d", cmd, rrc); + cmp_ok(rrc, "==", 3, "cmd '%s' returned rc %d", cmd, rrc); free(result); /***************************************** @@ -58,7 +58,7 @@ strcpy(cmd, "./send_multi --server=blah --host=blah < t/data/send_multi.txt"); rrc = real_exit_code(run_check(cmd, &result)); diag(result); - cmp_ok(rrc, "==", 1, "cmd '%s' returned rc %d", cmd, rrc); + cmp_ok(rrc, "==", 3, "cmd '%s' returned rc %d", cmd, rrc); free(result); /***************************************** | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/t/05-neb.c ^ |
@@ -21,6 +21,28 @@ int process_performance_data; int main(void) { + int i; + + plan(28); + + char * test_nebargs[] = { + "encryption=no server=localhost", + "key=test12345 server=localhost", + "encryption=no server=localhost export=log_queue:1:NEBCALLBACK_LOG_DATA", + "encryption=no server=localhost export=log_queue:1:NEBCALLBACK_LOG_DATA export=proc_queue:0:NEBCALLBACK_PROCESS_DATA", + }; + + int num = sizeof(test_nebargs) / sizeof(test_nebargs[0]); + for(i=0;i<num;i++) { + check_neb(test_nebargs[i]); + } + + return exit_status(); +} + + +void check_neb(char * nebargs); +void check_neb(char * nebargs) { int (*initfunc)(int,char *,void *); int (*deinitfunc)(int,int); int (*callfunc)(int,void *); @@ -29,9 +51,6 @@ lt_ptr init_func; lt_ptr deinit_func; lt_ptr call_func; - char * nebargs = "encryption=no server=localhost"; - - plan(7); /* set some external variables */ service_check_timeout = 30; @@ -79,7 +98,7 @@ result=dlclose(neb_handle); ok(result == 0, "dlclose() -> %d", result); - return exit_status(); + return; } /* core log wrapper */ | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/tools/send_gearman.c ^ |
@@ -39,7 +39,7 @@ */ if(parse_arguments(argc, argv) != GM_OK) { print_usage(); - exit( EXIT_FAILURE ); + exit( STATE_UNKNOWN ); } /* set logging */ @@ -55,14 +55,14 @@ /* create client */ if ( create_client( mod_gm_opt->server_list, &client ) != GM_OK ) { - printf( "cannot start client\n" ); - exit( EXIT_FAILURE ); + printf( "send_gearman UNKNOWN: cannot start client\n" ); + exit( STATE_UNKNOWN ); } /* create duplicate client */ if ( create_client_dup( mod_gm_opt->dupserver_list, &client_dup ) != GM_OK ) { - gm_log( GM_LOG_ERROR, "cannot start client for duplicate server\n" ); - exit( EXIT_FAILURE ); + printf( "send_gearman UNKNOWN: cannot start client for duplicate server\n" ); + exit( STATE_UNKNOWN ); } /* send result message */ @@ -175,7 +175,7 @@ printf("see README for a detailed explaination of all options.\n"); printf("\n"); - exit( EXIT_SUCCESS ); + exit( STATE_UNKNOWN ); } @@ -199,11 +199,11 @@ if(mod_gm_opt->result_queue == NULL) { printf( "got no result queue, please use --result_queue=...\n" ); - return(GM_ERROR); + return( STATE_UNKNOWN ); } if(mod_gm_opt->host == NULL) { printf("got no hostname, please use --host=...\n" ); - return(GM_ERROR); + return( STATE_UNKNOWN ); } if(mod_gm_opt->message == NULL) { /* get all lines from stdin, wait maximum of 5 seconds */ @@ -290,9 +290,9 @@ } else { gm_log( GM_LOG_TRACE, "send_result_back() finished unsuccessfully\n" ); - return(GM_ERROR); + return( STATE_UNKNOWN ); } - return(GM_OK); + return( STATE_OK ); } @@ -302,7 +302,7 @@ printf("got no input! Either send plugin output to stdin or use --message=...\n"); - exit(EXIT_FAILURE); + exit( STATE_UNKNOWN ); } | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/tools/send_multi.c ^ |
@@ -40,7 +40,7 @@ */ if(parse_arguments(argc, argv) != GM_OK) { print_usage(); - exit( 3 ); + exit( STATE_UNKNOWN ); } /* set logging */ @@ -56,14 +56,14 @@ /* create client */ if ( create_client( mod_gm_opt->server_list, &client ) != GM_OK ) { - printf( "send_multi CRITICAL: cannot start client\n" ); - exit(2); + printf( "send_multi UNKNOWN: cannot start client\n" ); + exit( STATE_UNKNOWN ); } /* create duplicate client */ if ( create_client_dup( mod_gm_opt->dupserver_list, &client_dup ) != GM_OK ) { - gm_log( GM_LOG_ERROR, "cannot start client for duplicate server\n" ); - exit( EXIT_FAILURE ); + printf( "send_multi UNKNOWN: cannot start client for duplicate server\n" ); + exit( STATE_UNKNOWN ); } /* send result message */ @@ -72,14 +72,14 @@ /* if rc > 0, it contains the number of checks being submitted, otherwise its an error code (-1 - WARNING, -2 - CRITICAL, -3 - UNKNOWN) */ if (rc == 0) { - printf( "send_multi WARNING: %d check_multi child checks submitted\n", rc ); - rc=1; /* WARNING */ + printf( "send_multi UNKNOWN: %d check_multi child checks submitted\n", rc ); + rc=STATE_UNKNOWN; } else if (rc > 0) { printf( "send_multi OK: %d check_multi child check%s submitted\n", rc, (rc>1)?"s":"" ); - rc=0; /* OK */ + rc=STATE_OK; } else { - rc*=-2; + rc*=-1; } gearman_client_free( &client ); @@ -190,7 +190,7 @@ printf("http://my-plugin.de/wiki/projects/check_multi/feed_passive\n"); printf("\n"); - exit(3); + exit( STATE_UNKNOWN ); } @@ -286,9 +286,9 @@ void alarm_sighandler(int sig) { gm_log( GM_LOG_TRACE, "alarm_sighandler(%i)\n", sig ); - printf("got no input! Send plugin output to stdin.\n"); + printf("Timeout after %d seconds - got no input! Send plugin output to stdin.\n", mod_gm_opt->timeout); - exit(EXIT_FAILURE); + exit( STATE_CRITICAL ); } @@ -340,13 +340,27 @@ /* start <CHILD> tag found, but no closing tag </CHILD>, buffer too small? */ } else { buflen=0L; - gm_log( GM_LOG_ERROR, "Error: no closing tag </CHILD> within buffer, buffer size too small? discarding buffer, %ld bytes now\n", buflen); - return -1; + printf("send_multi UNKNOWN: no closing tag </CHILD> within buffer, buffer size too small? discarding buffer, %ld bytes now\n", buflen); + return -STATE_UNKNOWN; } gm_log( GM_LOG_TRACE, "\tbuflen after XML chunk parsing:%ld\n", buflen); /* neither <CHILD> nor </CHILD> found, discard buffer */ } else { + + /* no chunks found? then check for message in STDIN */ + if (!count) { + unsigned long i; + /* check buffer for ASCII characters */ + for (i=0; i<buflen && buffer[i] && isascii(buffer[i]); i++) + ; + /* ASCIIZ string? then print messages */ + if (buffer[i] == '\0' && i) { + printf("send_multi UNKNOWN: error msg in input buffer: %s\n", buffer); + return -STATE_UNKNOWN; + } + } + /* discard whole buffer but continue */ buflen=0L; gm_log( GM_LOG_TRACE, "Error: no starting tag <CHILD> within buffer - discarding buffer, buflen now %ld bytes\n", buflen); @@ -361,7 +375,7 @@ /* break if zero read was caused by an error */ if (!feof(stream)) { perror("fread"); - return -2; + return -STATE_CRITICAL; } } else { | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/worker/worker.c ^ |
@@ -28,7 +28,6 @@ int current_number_of_workers = 0; volatile sig_atomic_t current_number_of_jobs = 0; /* must be signal safe */ -pthread_t status_thr; int orig_argc; char ** orig_argv; @@ -153,7 +152,7 @@ /* maintain the population */ while (1) { /* check number of workers every second */ - sleep(1); + sleep(GM_DEFAULT_WORKER_LOOP_SLEEP); /* collect finished workers */ while(waitpid(-1, &status, WNOHANG) > 0) @@ -166,7 +165,7 @@ /* count current worker and jobs */ -void count_current_worker() { +void count_current_worker(int restart) { int x; gm_log( GM_LOG_TRACE, "count_current_worker()\n"); @@ -191,11 +190,16 @@ current_number_of_jobs = 0; for(x=4; x < mod_gm_opt->max_worker+4; x++) { /* verify worker is alive */ + gm_log( GM_LOG_TRACE, "worker slot: shm[%d] = %d\n", x, shm[x]); if( shm[x] != -1 && pid_alive(shm[x]) == FALSE ) { gm_log( GM_LOG_TRACE, "removed stale worker %d, old pid: %d\n", x, shm[x]); shm[x] = -1; + /* immediately start new worker, otherwise the fork rate cannot be guaranteed */ + if(restart == GM_ENABLED) { + make_new_child(GM_WORKER_MULTI); + current_number_of_workers++; + } } - gm_log( GM_LOG_TRACE, "worker slot: shm[%d] = %d\n", x, shm[x]); if(shm[x] != -1) { current_number_of_workers++; } @@ -219,7 +223,7 @@ gm_log( GM_LOG_TRACE, "check_worker_population()\n"); /* set current worker number */ - count_current_worker(); + count_current_worker(GM_ENABLED); /* check if status worker died */ if( shm[3] == -1 ) { @@ -303,7 +307,6 @@ int i; int errors = 0; int verify; - char hostname[GM_BUFFERSIZE]; mod_gm_opt_t * mod_gm_new_opt; mod_gm_new_opt = malloc(sizeof(mod_gm_opt_t)); set_default_options(mod_gm_new_opt); @@ -487,7 +490,7 @@ shm[2] = 0; /* running worker */ shm[3] = -1; /* status worker pid */ for(x = 0; x < mod_gm_opt->max_worker; x++) { - shm[x+4] = -1; /* status worker */ + shm[x+4] = -1; /* normal worker */ } return; @@ -584,7 +587,7 @@ if(waited > GM_CHILD_SHUTDOWN_TIMEOUT) { break; } - count_current_worker(); + count_current_worker(GM_DISABLED); if(current_number_of_workers == 0) return; gm_log( GM_LOG_TRACE, "still waiting (%d) %d childs missing...\n", waited, current_number_of_workers); @@ -592,7 +595,7 @@ if(mode == GM_WORKER_STOP) { killpg(0, SIGINT); - count_current_worker(); + count_current_worker(GM_DISABLED); if(current_number_of_workers == 0) return; @@ -610,7 +613,7 @@ } /* kill them the hard way */ - count_current_worker(); + count_current_worker(GM_DISABLED); if(current_number_of_workers == 0) return; for(x=3; x < mod_gm_opt->max_worker+4; x++) { @@ -624,7 +627,7 @@ } /* count childs a last time */ - count_current_worker(); + count_current_worker(GM_DISABLED); if(current_number_of_workers == 0) return; | ||
[+] | Changed | mod_gearman-1.0.9.tar.bz2/worker/worker_client.c ^ |
@@ -30,13 +30,11 @@ char temp_buffer1[GM_BUFFERSIZE]; char temp_buffer2[GM_BUFFERSIZE]; -char hostname[GM_BUFFERSIZE]; gearman_worker_st worker; gearman_client_st client; gearman_client_st client_dup; -gm_job_t * current_job; pid_t current_pid; gm_job_t * exec_job; @@ -165,6 +163,7 @@ sigprocmask(SIG_BLOCK, &block_mask, &old_mask); /* get the data */ + current_gearman_job = job; wsize = gearman_job_workload_size(job); strncpy(workload, (const char*)gearman_job_workload(job), wsize); workload[wsize] = '\0'; @@ -263,6 +262,8 @@ exit( EXIT_SUCCESS ); } + current_gearman_job = NULL; + return NULL; } @@ -316,7 +317,7 @@ if ( !strcmp( exec_job->type, "service" ) || !strcmp( exec_job->type, "host" ) ) { exec_job->output = strdup("(Could Not Start Check In Time)"); - send_result_back(); + send_result_back(exec_job); } return; @@ -326,108 +327,12 @@ /* run the command */ gm_log( GM_LOG_TRACE, "command: %s\n", exec_job->command_line); + current_job = exec_job; execute_safe_command(exec_job, mod_gm_opt->fork_on_exec, mod_gm_opt->identifier ); + current_job = NULL; if ( !strcmp( exec_job->type, "service" ) || !strcmp( exec_job->type, "host" ) ) { - send_result_back(); - } - - return; -} - - -/* send results back */ -void send_result_back() { - gm_log( GM_LOG_TRACE, "send_result_back()\n" ); - - if(exec_job->result_queue == NULL) { - return; - } - if(exec_job->output == NULL) { - return; - } - - gm_log( GM_LOG_TRACE, "queue: %s\n", exec_job->result_queue ); - temp_buffer1[0]='\x0'; - snprintf( temp_buffer1, sizeof( temp_buffer1 )-1, "host_name=%s\ncore_start_time=%i.%i\nstart_time=%i.%i\nfinish_time=%i.%i\nlatency=%f\nreturn_code=%i\nexited_ok=%i\n", - exec_job->host_name, - ( int )exec_job->core_start_time.tv_sec, - ( int )exec_job->core_start_time.tv_usec, - ( int )exec_job->start_time.tv_sec, - ( int )exec_job->start_time.tv_usec, - ( int )exec_job->finish_time.tv_sec, - ( int )exec_job->finish_time.tv_usec, - exec_job->latency, - exec_job->return_code, - exec_job->exited_ok - ); - temp_buffer1[sizeof( temp_buffer1 )-1]='\x0'; - - if(exec_job->service_description != NULL) { - temp_buffer2[0]='\x0'; - strncat(temp_buffer2, "service_description=", (sizeof(temp_buffer2)-1)); - strncat(temp_buffer2, exec_job->service_description, (sizeof(temp_buffer2)-1)); - strncat(temp_buffer2, "\n", (sizeof(temp_buffer2)-1)); - - strncat(temp_buffer1, temp_buffer2, (sizeof(temp_buffer1)-1)); - } - temp_buffer1[sizeof( temp_buffer1 )-1]='\x0'; - - if(exec_job->output != NULL) { - temp_buffer2[0]='\x0'; - strncat(temp_buffer2, "output=", (sizeof(temp_buffer2)-1)); - if(mod_gm_opt->debug_result) { - strncat(temp_buffer2, "(", (sizeof(temp_buffer2)-1)); - strncat(temp_buffer2, hostname, (sizeof(temp_buffer2)-1)); - strncat(temp_buffer2, ") - ", (sizeof(temp_buffer2)-1)); - } - strncat(temp_buffer2, exec_job->output, (sizeof(temp_buffer2)-1)); - strncat(temp_buffer2, "\n\n\n", (sizeof(temp_buffer2)-1)); - strncat(temp_buffer1, temp_buffer2, (sizeof(temp_buffer1)-1)); - } - strncat(temp_buffer1, "\n", (sizeof(temp_buffer1)-2)); - temp_buffer1[sizeof( temp_buffer1 )-1]='\x0'; - - gm_log( GM_LOG_TRACE, "data:\n%s\n", temp_buffer1); - - if(add_job_to_queue( &client, - mod_gm_opt->server_list, - exec_job->result_queue, - NULL, - temp_buffer1, - GM_JOB_PRIO_NORMAL, - GM_DEFAULT_JOB_RETRIES, - mod_gm_opt->transportmode, - TRUE - ) == GM_OK) { - gm_log( GM_LOG_TRACE, "send_result_back() finished successfully\n" ); - } - else { - gm_log( GM_LOG_TRACE, "send_result_back() finished unsuccessfully\n" ); - } - - if( mod_gm_opt->dupserver_num ) { - strncpy(temp_buffer2, "type=passive\n", (sizeof(temp_buffer1)-2)); - strncat(temp_buffer2, temp_buffer1, (sizeof(temp_buffer2)-2)); - temp_buffer2[sizeof( temp_buffer2 )-1]='\x0'; - if( add_job_to_queue( &client_dup, - mod_gm_opt->dupserver_list, - exec_job->result_queue, - NULL, - temp_buffer2, - GM_JOB_PRIO_NORMAL, - GM_DEFAULT_JOB_RETRIES, - mod_gm_opt->transportmode, - TRUE - ) == GM_OK) { - gm_log( GM_LOG_TRACE, "send_result_back() finished successfully for duplicate server.\n" ); - } - else { - gm_log( GM_LOG_TRACE, "send_result_back() finished unsuccessfully for duplicate server\n" ); - } - } - else { - gm_log( GM_LOG_TRACE, "send_result_back() has no duplicate servers to send to.\n" ); + send_result_back(exec_job); } return; |