Search
j0ke.net Open Build Service
>
Projects
>
server:monitoring
>
monit
> monit-5.8-crash.patch
Sign Up
|
Log In
Username
Password
Cancel
Overview
Repositories
Revisions
Requests
Users
Advanced
Attributes
Meta
File monit-5.8-crash.patch of Package monit
From 802137b5180acf546fcaed1e65497e9631115eec Mon Sep 17 00:00:00 2001 From: tildeslash <info@tildeslash.com> Date: Thu, 8 May 2014 22:39:25 +0200 Subject: [PATCH] - MONIT-55 (Issue #34): fix the memory corruption ... if initprocesstree_sysdep() returned error, the ptree/oldptree destroy may be called twice+ as the references were saved via Util_isProcessRunning called from control.c during process (re)start - if process initialization failed, don't let partial data to be passed to validate --- src/process.c | 463 ++++++++++++++++++++++----------------------- src/process/sysdep_LINUX.c | 95 +++++----- src/util.c | 10 +- 3 files changed, 278 insertions(+), 290 deletions(-) diff --git a/src/process.c b/src/process.c index 5c5cd4d..7df4824 100644 --- a/src/process.c +++ b/src/process.c @@ -73,19 +73,18 @@ * @return TRUE if succeeded otherwise FALSE. */ int init_process_info(void) { - memset(&systeminfo, 0, sizeof(SystemInfo_T)); - gettimeofday(&systeminfo.collected, NULL); - if(uname(&systeminfo.uname) < 0) { - LogError("'%s' resource monitoring initialization error -- uname failed: %s\n", Run.system->name, STRERROR); - return FALSE; - } - - systeminfo.total_cpu_user_percent = -10; - systeminfo.total_cpu_syst_percent = -10; - systeminfo.total_cpu_wait_percent = -10; + memset(&systeminfo, 0, sizeof(SystemInfo_T)); + gettimeofday(&systeminfo.collected, NULL); + if(uname(&systeminfo.uname) < 0) { + LogError("'%s' resource monitoring initialization error -- uname failed: %s\n", Run.system->name, STRERROR); + return FALSE; + } - return (init_process_info_sysdep()); + systeminfo.total_cpu_user_percent = -10; + systeminfo.total_cpu_syst_percent = -10; + systeminfo.total_cpu_wait_percent = -10; + return (init_process_info_sysdep()); } @@ -97,55 +96,50 @@ int init_process_info(void) { * @return TRUE if succeeded otherwise FALSE. */ int update_process_data(Service_T s, ProcessTree_T *pt, int treesize, pid_t pid) { - int leaf; - - ASSERT(s); - ASSERT(systeminfo.mem_kbyte_max > 0); - - /* save the previous pid and set actual one */ - s->inf->priv.process._pid = s->inf->priv.process.pid; - s->inf->priv.process.pid = pid; - - if ((leaf = findprocess(pid, pt, treesize)) != -1) { - - /* save the previous ppid and set actual one */ - s->inf->priv.process._ppid = s->inf->priv.process.ppid; - s->inf->priv.process.ppid = pt[leaf].ppid; - s->inf->priv.process.uid = pt[leaf].uid; - s->inf->priv.process.euid = pt[leaf].euid; - s->inf->priv.process.gid = pt[leaf].gid; - s->inf->priv.process.uptime = time(NULL) - pt[leaf].starttime; - s->inf->priv.process.children = pt[leaf].children_sum; - s->inf->priv.process.mem_kbyte = pt[leaf].mem_kbyte; - s->inf->priv.process.status_flag = pt[leaf].status_flag; - s->inf->priv.process.total_mem_kbyte = pt[leaf].mem_kbyte_sum; - s->inf->priv.process.cpu_percent = pt[leaf].cpu_percent; - s->inf->priv.process.total_cpu_percent = pt[leaf].cpu_percent_sum; - - if (systeminfo.mem_kbyte_max == 0) { - s->inf->priv.process.total_mem_percent = 0; - s->inf->priv.process.mem_percent = 0; - } else { - s->inf->priv.process.total_mem_percent = (int)((double)pt[leaf].mem_kbyte_sum * 1000.0 / systeminfo.mem_kbyte_max); - s->inf->priv.process.mem_percent = (int)((double)pt[leaf].mem_kbyte * 1000.0 / systeminfo.mem_kbyte_max); - } - - } else { - s->inf->priv.process.ppid = 0; - s->inf->priv.process.uid = -1; - s->inf->priv.process.euid = -1; - s->inf->priv.process.gid = -1; - s->inf->priv.process.uptime = 0; - s->inf->priv.process.children = 0; - s->inf->priv.process.total_mem_kbyte = 0; - s->inf->priv.process.total_mem_percent = 0; - s->inf->priv.process.mem_kbyte = 0; - s->inf->priv.process.mem_percent = 0; - s->inf->priv.process.cpu_percent = 0; - s->inf->priv.process.total_cpu_percent = 0; - } - - return TRUE; + ASSERT(s); + ASSERT(systeminfo.mem_kbyte_max > 0); + + /* save the previous pid and set actual one */ + s->inf->priv.process._pid = s->inf->priv.process.pid; + s->inf->priv.process.pid = pid; + + int leaf; + if ((leaf = findprocess(pid, pt, treesize)) != -1) { + /* save the previous ppid and set actual one */ + s->inf->priv.process._ppid = s->inf->priv.process.ppid; + s->inf->priv.process.ppid = pt[leaf].ppid; + s->inf->priv.process.uid = pt[leaf].uid; + s->inf->priv.process.euid = pt[leaf].euid; + s->inf->priv.process.gid = pt[leaf].gid; + s->inf->priv.process.uptime = time(NULL) - pt[leaf].starttime; + s->inf->priv.process.children = pt[leaf].children_sum; + s->inf->priv.process.mem_kbyte = pt[leaf].mem_kbyte; + s->inf->priv.process.status_flag = pt[leaf].status_flag; + s->inf->priv.process.total_mem_kbyte = pt[leaf].mem_kbyte_sum; + s->inf->priv.process.cpu_percent = pt[leaf].cpu_percent; + s->inf->priv.process.total_cpu_percent = pt[leaf].cpu_percent_sum; + if (systeminfo.mem_kbyte_max == 0) { + s->inf->priv.process.total_mem_percent = 0; + s->inf->priv.process.mem_percent = 0; + } else { + s->inf->priv.process.total_mem_percent = (int)((double)pt[leaf].mem_kbyte_sum * 1000.0 / systeminfo.mem_kbyte_max); + s->inf->priv.process.mem_percent = (int)((double)pt[leaf].mem_kbyte * 1000.0 / systeminfo.mem_kbyte_max); + } + } else { + s->inf->priv.process.ppid = 0; + s->inf->priv.process.uid = -1; + s->inf->priv.process.euid = -1; + s->inf->priv.process.gid = -1; + s->inf->priv.process.uptime = 0; + s->inf->priv.process.children = 0; + s->inf->priv.process.total_mem_kbyte = 0; + s->inf->priv.process.total_mem_percent = 0; + s->inf->priv.process.mem_kbyte = 0; + s->inf->priv.process.mem_percent = 0; + s->inf->priv.process.cpu_percent = 0; + s->inf->priv.process.total_cpu_percent = 0; + } + return TRUE; } @@ -154,47 +148,45 @@ int update_process_data(Service_T s, ProcessTree_T *pt, int treesize, pid_t pid) * @return TRUE if successful, otherwise FALSE */ int update_system_load() { - - if (Run.doprocess) { - - ASSERT(systeminfo.mem_kbyte_max > 0); - - /** Get load average triplet */ - if (-1 == getloadavg_sysdep(systeminfo.loadavg, 3)) { - LogError("'%s' statistic error -- load average gathering failed\n", Run.system->name); - goto error1; - } - - /** Get memory usage statistic */ - if (! used_system_memory_sysdep(&systeminfo)) { - LogError("'%s' statistic error -- memory usage gathering failed\n", Run.system->name); - goto error2; - } - systeminfo.total_mem_percent = (int)(1000 * (double)systeminfo.total_mem_kbyte / (double)systeminfo.mem_kbyte_max); - systeminfo.total_swap_percent = systeminfo.swap_kbyte_max ? (int)(1000 * (double)systeminfo.total_swap_kbyte / (double)systeminfo.swap_kbyte_max) : 0; - - /** Get CPU usage statistic */ - if (! used_system_cpu_sysdep(&systeminfo)) { - LogError("'%s' statistic error -- cpu usage gathering failed\n", Run.system->name); - goto error3; - } - - return TRUE; - } + if (Run.doprocess) { + ASSERT(systeminfo.mem_kbyte_max > 0); + + /** Get load average triplet */ + if (-1 == getloadavg_sysdep(systeminfo.loadavg, 3)) { + LogError("'%s' statistic error -- load average gathering failed\n", Run.system->name); + goto error1; + } + + /** Get memory usage statistic */ + if (! used_system_memory_sysdep(&systeminfo)) { + LogError("'%s' statistic error -- memory usage gathering failed\n", Run.system->name); + goto error2; + } + systeminfo.total_mem_percent = (int)(1000 * (double)systeminfo.total_mem_kbyte / (double)systeminfo.mem_kbyte_max); + systeminfo.total_swap_percent = systeminfo.swap_kbyte_max ? (int)(1000 * (double)systeminfo.total_swap_kbyte / (double)systeminfo.swap_kbyte_max) : 0; + + /** Get CPU usage statistic */ + if (! used_system_cpu_sysdep(&systeminfo)) { + LogError("'%s' statistic error -- cpu usage gathering failed\n", Run.system->name); + goto error3; + } + + return TRUE; + } error1: - systeminfo.loadavg[0] = 0; - systeminfo.loadavg[1] = 0; - systeminfo.loadavg[2] = 0; + systeminfo.loadavg[0] = 0; + systeminfo.loadavg[1] = 0; + systeminfo.loadavg[2] = 0; error2: - systeminfo.total_mem_kbyte = 0; - systeminfo.total_mem_percent = 0; + systeminfo.total_mem_kbyte = 0; + systeminfo.total_mem_percent = 0; error3: - systeminfo.total_cpu_user_percent = 0; - systeminfo.total_cpu_syst_percent = 0; - systeminfo.total_cpu_wait_percent = 0; + systeminfo.total_cpu_user_percent = 0; + systeminfo.total_cpu_syst_percent = 0; + systeminfo.total_cpu_wait_percent = 0; - return FALSE; + return FALSE; } @@ -203,91 +195,96 @@ error3: * @return treesize >= 0 if succeeded otherwise < 0 */ int initprocesstree(ProcessTree_T **pt_r, int *size_r, ProcessTree_T **oldpt_r, int *oldsize_r) { - int i; - int oldentry; - ProcessTree_T *pt; - ProcessTree_T *oldpt; - int root = -1; - - if (*pt_r != NULL) { - if (oldpt_r && *oldpt_r != NULL) - delprocesstree(oldpt_r, oldsize_r); - *oldpt_r = *pt_r; - *oldsize_r = *size_r; - } - - if ((*size_r = initprocesstree_sysdep(pt_r)) <= 0) { - DEBUG("system statistic error -- cannot initialize the process tree => process resource monitoring disabled\n"); - Run.doprocess = FALSE; - return -1; - } else if (Run.doprocess == FALSE) { - DEBUG("system statistic -- initialization of the process tree succeeded => process resource monitoring enabled\n"); - Run.doprocess = TRUE; - } - - pt = *pt_r; - oldpt = *oldpt_r; - - if (pt == NULL) - return 0; - - for (i = 0; i < (volatile int)*size_r; i ++) { - if (oldpt && ((oldentry = findprocess(pt[i].pid, oldpt, *oldsize_r)) != -1)) { - pt[i].cputime_prev = oldpt[oldentry].cputime; - pt[i].time_prev = oldpt[oldentry].time; - - /* The cpu_percent may be set already (for example by HPUX module) */ - if (pt[i].cpu_percent == 0 && pt[i].cputime_prev != 0 && pt[i].cputime != 0 && pt[i].cputime > pt[i].cputime_prev) { - pt[i].cpu_percent = (int)((1000 * (double)(pt[i].cputime - pt[i].cputime_prev) / (pt[i].time - pt[i].time_prev)) / systeminfo.cpus); - if (pt[i].cpu_percent > 1000) - pt[i].cpu_percent = 1000; - } - } else { - pt[i].cputime_prev = 0; - pt[i].time_prev = 0.0; - pt[i].cpu_percent = 0; - } - - if (pt[i].pid == pt[i].ppid) { - pt[i].parent = i; - continue; - } - - if ((pt[i].parent = findprocess(pt[i].ppid, pt, *size_r)) == -1) { - /* Parent process wasn't found - on Linux this is normal: main process with PID 0 is not listed, similarly in FreeBSD jail. - * We create virtual process entry for missing parent so we can have full tree-like structure with root. */ - int j = (*size_r)++; - - pt = RESIZE(*pt_r, *size_r * sizeof(ProcessTree_T)); - memset(&pt[j], 0, sizeof(ProcessTree_T)); - pt[j].ppid = pt[j].pid = pt[i].ppid; - pt[i].parent = j; - } - - if (! connectchild(pt, pt[i].parent, i)) { - /* connection to parent process has failed, this is usually caused in the part above */ - DEBUG("system statistic error -- cannot connect process id %d to its parent %d\n", pt[i].pid, pt[i].ppid); - pt[i].pid = 0; - continue; - } - } - - /* The main process in Solaris zones and FreeBSD host doesn't have pid 1, so try to find process which is parent of itself */ - for (i = 0; i < *size_r; i++) { - if (pt[i].pid == pt[i].ppid) { - root = i; - break; - } - } - - if (root == -1) { - DEBUG("system statistic error -- cannot find root process id\n"); - return -1; - } - - fillprocesstree(pt, root); - - return *size_r; + ASSERT(pt_r); + ASSERT(size_r); + ASSERT(oldpt_r); + ASSERT(oldsize_r); + + if (*pt_r) { + if (*oldpt_r) + delprocesstree(oldpt_r, oldsize_r); + *oldpt_r = *pt_r; + *oldsize_r = *size_r; + *pt_r = NULL; + *size_r = 0; + } + + if ((*size_r = initprocesstree_sysdep(pt_r)) <= 0 || ! *pt_r) { + DEBUG("System statistic error -- cannot initialize the process tree -- process resource monitoring disabled\n"); + Run.doprocess = FALSE; + if (*oldpt_r) + delprocesstree(oldpt_r, oldsize_r); + return -1; + } else if (Run.doprocess == FALSE) { + DEBUG("System statistic -- initialization of the process tree succeeded -- process resource monitoring enabled\n"); + Run.doprocess = TRUE; + } + + int oldentry; + ProcessTree_T *pt = *pt_r; + ProcessTree_T *oldpt = *oldpt_r; + for (int i = 0; i < (volatile int)*size_r; i ++) { + if (oldpt && ((oldentry = findprocess(pt[i].pid, oldpt, *oldsize_r)) != -1)) { + pt[i].cputime_prev = oldpt[oldentry].cputime; + pt[i].time_prev = oldpt[oldentry].time; + + /* The cpu_percent may be set already (for example by HPUX module) */ + if (pt[i].cpu_percent == 0 && pt[i].cputime_prev != 0 && pt[i].cputime != 0 && pt[i].cputime > pt[i].cputime_prev) { + pt[i].cpu_percent = (int)((1000 * (double)(pt[i].cputime - pt[i].cputime_prev) / (pt[i].time - pt[i].time_prev)) / systeminfo.cpus); + if (pt[i].cpu_percent > 1000) + pt[i].cpu_percent = 1000; + } + } else { + pt[i].cputime_prev = 0; + pt[i].time_prev = 0.0; + pt[i].cpu_percent = 0; + } + + if (pt[i].pid == pt[i].ppid) { + pt[i].parent = i; + continue; + } + + if ((pt[i].parent = findprocess(pt[i].ppid, pt, *size_r)) == -1) { + /* Parent process wasn't found - on Linux this is normal: main process with PID 0 is not listed, similarly in FreeBSD jail. + * We create virtual process entry for missing parent so we can have full tree-like structure with root. */ + int j = (*size_r)++; + + pt = RESIZE(*pt_r, *size_r * sizeof(ProcessTree_T)); + memset(&pt[j], 0, sizeof(ProcessTree_T)); + pt[j].ppid = pt[j].pid = pt[i].ppid; + pt[i].parent = j; + } + + if (! connectchild(pt, pt[i].parent, i)) { + /* connection to parent process has failed, this is usually caused in the part above */ + DEBUG("System statistic error -- cannot connect process id %d to its parent %d\n", pt[i].pid, pt[i].ppid); + pt[i].pid = 0; + continue; + } + } + + /* The main process in Solaris zones and FreeBSD host doesn't have pid 1, so try to find process which is parent of itself */ + int root = -1; + for (int i = 0; i < *size_r; i++) { + if (pt[i].pid == pt[i].ppid) { + root = i; + break; + } + } + + if (root == -1) { + DEBUG("System statistic error -- cannot find root process id\n"); + if (*oldpt_r) + delprocesstree(oldpt_r, oldsize_r); + if (*pt_r) + delprocesstree(pt_r, size_r); + return -1; + } + + fillprocesstree(pt, root); + + return *size_r; } @@ -299,80 +296,74 @@ int initprocesstree(ProcessTree_T **pt_r, int *size_r, ProcessTree_T **oldpt_r, * @return process index if succeeded otherwise -1 */ int findprocess(int pid, ProcessTree_T *pt, int size) { - int i; + ASSERT(pt); - ASSERT(pt); + if (size <= 0) + return -1; - if (size <= 0) - return -1; + for (int i = 0; i < size; i++) + if (pid == pt[i].pid) + return i; - for (i = 0; i < size; i++) - if (pid == pt[i].pid) - return i; - - return -1; + return -1; } /** * Delete the process tree */ void delprocesstree(ProcessTree_T **reference, int *size) { - int i; - ProcessTree_T *pt = *reference; - if (pt) { - for (i = 0; i < *size; i++) { - FREE(pt[i].cmdline); - FREE(pt[i].children); - } - FREE(pt); - *reference = NULL; - *size = 0; - } - return; + ProcessTree_T *pt = *reference; + if (pt) { + for (int i = 0; i < *size; i++) { + FREE(pt[i].cmdline); + FREE(pt[i].children); + } + FREE(pt); + *reference = NULL; + *size = 0; + } } void process_testmatch(char *pattern) { #ifdef HAVE_REGEX_H - regex_t *regex_comp; - int reg_return; -#endif - -#ifdef HAVE_REGEX_H - NEW(regex_comp); - if ((reg_return = regcomp(regex_comp, pattern, REG_NOSUB|REG_EXTENDED))) { - char errbuf[STRLEN]; - regerror(reg_return, regex_comp, errbuf, STRLEN); - regfree(regex_comp); - FREE(regex_comp); - printf("Regex %s parsing error: %s\n", pattern, errbuf); - exit(1); - } + regex_t *regex_comp; + int reg_return; + + NEW(regex_comp); + if ((reg_return = regcomp(regex_comp, pattern, REG_NOSUB|REG_EXTENDED))) { + char errbuf[STRLEN]; + regerror(reg_return, regex_comp, errbuf, STRLEN); + regfree(regex_comp); + FREE(regex_comp); + printf("Regex %s parsing error: %s\n", pattern, errbuf); + exit(1); + } #endif - initprocesstree(&ptree, &ptreesize, &oldptree, &oldptreesize); - if (Run.doprocess) { - int i, count = 0; - printf("List of processes matching pattern \"%s\":\n", pattern); - printf("------------------------------------------\n"); - for (i = 0; i < ptreesize; i++) { - int match = FALSE; - if (ptree[i].cmdline && ! strstr(ptree[i].cmdline, "procmatch")) { + initprocesstree(&ptree, &ptreesize, &oldptree, &oldptreesize); + if (Run.doprocess) { + int count = 0; + printf("List of processes matching pattern \"%s\":\n", pattern); + printf("------------------------------------------\n"); + for (int i = 0; i < ptreesize; i++) { + int match = FALSE; + if (ptree[i].cmdline && ! strstr(ptree[i].cmdline, "procmatch")) { #ifdef HAVE_REGEX_H - match = regexec(regex_comp, ptree[i].cmdline, 0, NULL, 0) ? FALSE : TRUE; + match = regexec(regex_comp, ptree[i].cmdline, 0, NULL, 0) ? FALSE : TRUE; #else - match = strstr(ptree[i].cmdline, pattern) ? TRUE : FALSE; + match = strstr(ptree[i].cmdline, pattern) ? TRUE : FALSE; #endif - if (match) { - printf("\t%s\n", ptree[i].cmdline); - count++; + if (match) { + printf("\t%s\n", ptree[i].cmdline); + count++; + } + } + } + printf("------------------------------------------\n"); + printf("Total matches: %d\n", count); + if (count > 1) + printf("WARNING: multiple processes matched the pattern. The check is FIRST-MATCH based, please refine the pattern\n"); } - } - } - printf("------------------------------------------\n"); - printf("Total matches: %d\n", count); - if (count > 1) - printf("WARNING: multiple processes matched the pattern. The check is FIRST-MATCH based, please refine the pattern\n"); - } } diff --git a/src/process/sysdep_LINUX.c b/src/process/sysdep_LINUX.c index 025b4ba..2ba4448 100644 --- a/src/process/sysdep_LINUX.c +++ b/src/process/sysdep_LINUX.c @@ -147,8 +147,10 @@ int init_process_info_sysdep(void) { long page_size; int page_shift; - if (! read_proc_file(buf, sizeof(buf), "meminfo", -1, NULL)) + if (! read_proc_file(buf, sizeof(buf), "meminfo", -1, NULL)) { + DEBUG("system statistic error -- cannot read /proc/meminfo\n"); return FALSE; + } if (! (ptr = strstr(buf, MEMTOTAL))) { DEBUG("system statistic error -- cannot get real memory amount\n"); return FALSE; @@ -171,7 +173,8 @@ int init_process_info_sysdep(void) { return FALSE; } - for (page_shift = 0; page_size != 1; page_size >>= 1, page_shift++); + for (page_shift = 0; page_size != 1; page_size >>= 1, page_shift++) + ; page_shift_to_kb = page_shift - 10; return TRUE; @@ -189,7 +192,11 @@ int initprocesstree_sysdep(ProcessTree_T ** reference) { int i = 0, j; int rv, bytes = 0; int treesize = 0; + int stat_pid = 0; int stat_ppid = 0; + int stat_uid = 0; + int stat_euid = 0; + int stat_gid = 0; char *tmp = NULL; char procname[STRLEN]; char buf[1024]; @@ -217,30 +224,23 @@ int initprocesstree_sysdep(ProcessTree_T ** reference) { /* Insert data from /proc directory */ for (i = 0; i < treesize; i++) { + stat_pid = atoi(globbuf.gl_pathv[i] + strlen("/proc/")); - pt[i].pid = atoi(globbuf.gl_pathv[i] + strlen("/proc/")); - - if (!read_proc_file(buf, sizeof(buf), "stat", pt[i].pid, NULL)) { - DEBUG("system statistic error -- cannot read /proc/%d/stat\n", pt[i].pid); + /********** /proc/PID/stat **********/ + if (!read_proc_file(buf, sizeof(buf), "stat", stat_pid, NULL)) { + DEBUG("system statistic error -- cannot read /proc/%d/stat\n", stat_pid); continue; } - - pt[i].time = get_float_time(); - if (!(tmp = strrchr(buf, ')'))) { - DEBUG("system statistic error -- file /proc/%d/stat parse error\n", pt[i].pid); + DEBUG("system statistic error -- file /proc/%d/stat parse error\n", stat_pid); continue; } *tmp = 0; if (sscanf(buf, "%*d (%256s", procname) != 1) { - DEBUG("system statistic error -- file /proc/%d/stat process name parse error\n", pt[i].pid); + DEBUG("system statistic error -- file /proc/%d/stat process name parse error\n", stat_pid); continue; } - tmp += 2; - - /* This implementation is done by using fs/procfs/array.c as a basis - * it is also worth looking into the source of the procps utils */ if (sscanf(tmp, "%c %d %*d %*d %*d %*d %*u %*u" "%*u %*u %*u %lu %lu %ld %ld %*d %*d %*d " @@ -254,58 +254,55 @@ int initprocesstree_sysdep(ProcessTree_T ** reference) { &stat_item_cstime, &stat_item_starttime, &stat_item_rss) != 8) { - DEBUG("system statistic error -- file /proc/%d/stat parse error\n", pt[i].pid); + DEBUG("system statistic error -- file /proc/%d/stat parse error\n", stat_pid); continue; } - pt[i].ppid = stat_ppid; - pt[i].starttime = get_starttime() + (time_t)(stat_item_starttime / HZ); - - /* jiffies -> seconds = 1 / HZ - * HZ is defined in "asm/param.h" and it is usually 1/100s but on - * alpha system it is 1/1024s */ - pt[i].cputime = ((float)(stat_item_utime + stat_item_stime) * 10.0) / HZ; - pt[i].cpu_percent = 0; - - /* State is Zombie -> then we are a Zombie ... clear or? (-: */ - if (stat_item_state == 'Z') - pt[i].status_flag |= PROCESS_ZOMBIE; - - if (page_shift_to_kb < 0) - pt[i].mem_kbyte = (stat_item_rss >> abs(page_shift_to_kb)); - else - pt[i].mem_kbyte = (stat_item_rss << abs(page_shift_to_kb)); - - if (! read_proc_file(buf, sizeof(buf), "status", pt[i].pid, NULL)) - return FALSE; - + /********** /proc/PID/status **********/ + if (! read_proc_file(buf, sizeof(buf), "status", stat_pid, NULL)) { + DEBUG("system statistic error -- cannot read /proc/%d/status\n", stat_pid); + continue; + } if (! (tmp = strstr(buf, UID))) { DEBUG("system statistic error -- cannot find process uid\n"); - return FALSE; + continue; } - if (sscanf(tmp+strlen(UID), "\t%d\t%d", &(pt[i].uid), &(pt[i].euid)) != 2) { + if (sscanf(tmp+strlen(UID), "\t%d\t%d", &stat_uid, &stat_euid) != 2) { DEBUG("system statistic error -- cannot read process uid\n"); - return FALSE; + continue; } - if (! (tmp = strstr(buf, GID))) { DEBUG("system statistic error -- cannot find process gid\n"); - return FALSE; + continue; } - if (sscanf(tmp+strlen(GID), "\t%d", &(pt[i].gid)) != 1) { + if (sscanf(tmp+strlen(GID), "\t%d", &stat_gid) != 1) { DEBUG("system statistic error -- cannot read process gid\n"); - return FALSE; + continue; } - if (! read_proc_file(buf, sizeof(buf), "cmdline", pt[i].pid, &bytes)) { - DEBUG("system statistic error -- cannot read /proc/%d/cmdline\n", pt[i].pid); + /********** /proc/PID/cmdline **********/ + if (! read_proc_file(buf, sizeof(buf), "cmdline", stat_pid, &bytes)) { + DEBUG("system statistic error -- cannot read /proc/%d/cmdline\n", stat_pid); continue; } - /* The cmdline file contains argv elements/strings terminated separated by '\0' => join the string: */ - for (j = 0; j < (bytes - 1); j++) + for (j = 0; j < (bytes - 1); j++) // The cmdline file contains argv elements/strings terminated separated by '\0' => join the string if (buf[j] == 0) buf[j] = ' '; - pt[i].cmdline = *buf ? Str_dup(buf) : Str_dup(procname); + + /* Set the data in ptree only if all process related reads succeeded (prevent partial data in the case that continue was called during data gathering) */ + pt[i].time = get_float_time(); + pt[i].pid = stat_pid; + pt[i].ppid = stat_ppid; + pt[i].uid = stat_uid; + pt[i].euid = stat_euid; + pt[i].gid = stat_gid; + pt[i].starttime = get_starttime() + (time_t)(stat_item_starttime / HZ); + pt[i].cmdline = Str_dup(*buf ? buf : procname); + pt[i].cputime = ((float)(stat_item_utime + stat_item_stime) * 10.0) / HZ; // jiffies -> seconds = 1 / HZ. HZ is defined in "asm/param.h" and it is usually 1/100s but on alpha system it is 1/1024s + pt[i].cpu_percent = 0; + pt[i].mem_kbyte = (page_shift_to_kb < 0) ? (stat_item_rss >> abs(page_shift_to_kb)) : (stat_item_rss << abs(page_shift_to_kb)); + if (stat_item_state == 'Z') // State is Zombie -> then we are a Zombie ... clear or? (-: + pt[i].status_flag |= PROCESS_ZOMBIE; } *reference = pt; diff --git a/src/util.c b/src/util.c index a159bad..eca2c90 100644 --- a/src/util.c +++ b/src/util.c @@ -1332,12 +1332,12 @@ int Util_isProcessRunning(Service_T s, int refresh) { ASSERT(s); errno = 0; if (s->matchlist) { + if (refresh || ! ptree || ! ptreesize) + initprocesstree(&ptree, &ptreesize, &oldptree, &oldptreesize); + /* The process table read may sporadically fail during read, because we're using glob on some platforms which may fail if the proc filesystem + * which it traverses is changed during glob (process stopped). Note that the glob failure is rare and temporary - it will be OK on next cycle. + * We skip the process matching that cycle however because we don't have process informations - will retry next cycle */ if (Run.doprocess) { - if (refresh || ! ptree || ! ptreesize) - initprocesstree(&ptree, &ptreesize, &oldptree, &oldptreesize); - /* The process table read may sporadically fail during read, because we're using glob on some platforms which may fail if the proc filesystem - * which it traverses is changed during glob (process stopped). Note that the glob failure is rare and temporary - it will be OK on next cycle. - * We skip the process matching that cycle however because we don't have process informations - will retry next cycle */ for (i = 0; i < ptreesize; i++) { int found = FALSE; if (ptree[i].cmdline) { -- 1.8.5.2