From 663a4dce7c52dd50fefd1d71d9ff3ec811acb795 Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Thu, 15 Jan 2009 23:20:54 +0000 Subject: Add support for monitoring CPU usage. --- module/bsnmp-jails.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 189 insertions(+), 10 deletions(-) (limited to 'module/bsnmp-jails.c') diff --git a/module/bsnmp-jails.c b/module/bsnmp-jails.c index 5e5e030..b88cdd7 100644 --- a/module/bsnmp-jails.c +++ b/module/bsnmp-jails.c @@ -130,6 +130,17 @@ TAILQ_HEAD(monitor_list, monitor); /* list of monitor structures */ static struct monitor_list monitors = TAILQ_HEAD_INITIALIZER (monitors); +/* Represents a 'top level' process */ +struct ptop { + pid_t pid; + uint32_t cpu_time; + + #define PROC_GONE 0 + #define PROC_EXISTS 1 + #define PROC_NEW 2 + int status; +}; + struct jaildat { uint32_t index; TAILQ_ENTRY(jaildat) link; @@ -152,9 +163,15 @@ struct jaildat { uint64_t in_packets; uint64_t out_octets; uint64_t out_packets; - uint32_t n_processes; uint64_t disk_space; uint64_t disk_files; + uint32_t cpu_time_total; + uint32_t cpu_time_offset; + uint32_t n_processes; + + /* Top process information */ + uint32_t n_ptops; + struct ptop *ptops; }; TAILQ_HEAD(jaildat_list, jaildat); @@ -207,8 +224,21 @@ emsg(const char *format, ...) va_start (va, format); vsyslog (LOG_ERR, format, va); vfprintf (stderr, format, va); + fputc ('\n', stderr); + va_end (va); +} + +#if 0 +static void +msg(const char *format, ...) +{ + va_list va; + va_start (va, format); + vfprintf (stderr, format, va); + fputc ('\n', stderr); va_end (va); } +#endif typedef void* (*if_enumerator) (struct ifreq *ifr, void* data); @@ -787,12 +817,75 @@ measure_start (void *unused) * PROCESS LOOKUPS */ +static int +process_compar_kp_pid (const void *a, const void *b) +{ + pid_t pida, pidb; + + pida = ((const struct kinfo_proc*)a)->ki_pid; + pidb = ((const struct kinfo_proc*)b)->ki_pid; + + if (pida == pidb) + return 0; + + /* Note we're sorting in reverse */ + return (pida < pidb) ? 1 : -1; +} + +static int +process_compapre_kp_ppid_pid (const void *a, const void *b) +{ + pid_t pida, pidb; + + pida = ((const struct kinfo_proc*)a)->ki_ppid; + pidb = ((const struct kinfo_proc*)b)->ki_pid; + + if (pida == pidb) + return 0; + + /* Note we're sorting in reverse */ + return (pida < pidb) ? 1 : -1; +} + +static int +process_compare_kp_ptop_pid (const void *a, const void *b) +{ + pid_t pida, pidb; + + pida = ((const struct kinfo_proc*)a)->ki_pid; + pidb = ((const struct ptop*)b)->pid; + + if (pida == pidb) + return 0; + + /* Note we're sorting in reverse */ + return (pida < pidb) ? 1 : -1; +} + +static int +process_compare_ptop_pid (const void *a, const void *b) +{ + pid_t pida, pidb; + + pida = ((const struct ptop*)a)->pid; + pidb = ((const struct ptop*)b)->pid; + + if (pida == pidb) + return 0; + + /* Note we're sorting in reverse */ + return (pida < pidb) ? 1 : -1; +} + static void process_refresh_all (void) { + struct kinfo_proc *kp, *tkp, *k; struct jaildat *jail; - struct kinfo_proc *kp; - int nentries, i, id; + struct ptop *ptop; + int nentries, i, jid; + uint32_t cpu_time; + void *alloc; /* Get a process listing */ kp = kvm_getprocs (kvm_handle, KERN_PROC_PROC, 0, &nentries); @@ -801,20 +894,100 @@ process_refresh_all (void) return; } - /* Clear process counts for each jail */ - TAILQ_FOREACH (jail, &jaildats, link) + /* Sort the input we get, in reverse */ + qsort (kp, nentries, sizeof (*kp), process_compar_kp_pid); + + /* Mark all processes in the jail for later sweep */ + TAILQ_FOREACH (jail, &jaildats, link) { + for (i = 0; i < jail->n_ptops; ++i) + jail->ptops[i].status = PROC_GONE; + jail->n_processes = 0; + jail->cpu_time_total = jail->cpu_time_offset; + } /* Okay now loop and add to each process's jail */ for (i = 0; i < nentries; i++) { - id = kp[i].ki_jid; - if (id == 0) + jid = kp[i].ki_jid; + + /* No jail? */ + if (jid == 0) continue; - jail = hsh_get (jaildat_by_id, &id, sizeof (id)); - if (jail) - jail->n_processes++; + jail = hsh_get (jaildat_by_id, &jid, sizeof (jid)); + if (jail == NULL) + continue; + + jail->n_processes += 1; + + /* Find the top level process within jail to account to */ + tkp = &kp[i]; + for (;;) { + if (tkp->ki_pid == tkp->ki_ppid) + break; + k = bsearch (tkp, kp, nentries, sizeof (*kp), process_compapre_kp_ppid_pid); + if (k == NULL || k->ki_jid != jid) + break; + tkp = k; + } + + /* Find top process for that pid */ + ptop = bsearch (tkp, jail->ptops, jail->n_ptops, + sizeof (struct ptop), process_compare_kp_ptop_pid); + if (ptop == NULL) { + alloc = realloc (jail->ptops, (jail->n_ptops + 1) * sizeof (struct ptop)); + if (alloc == NULL) { + emsg ("out of memory"); + continue; + } + + jail->ptops = alloc; + ptop = jail->ptops + jail->n_ptops; + jail->n_ptops += 1; + + ptop->pid = tkp->ki_pid; + ptop->cpu_time = 0; + ptop->status = PROC_NEW; + } + + /* Account CPU time to this process */ + cpu_time = kp[i].ki_runtime + kp[i].ki_childtime.tv_usec; + cpu_time /= 10000; + cpu_time += kp[i].ki_childtime.tv_sec * 100; + + if (ptop->status == PROC_GONE) { + ptop->cpu_time = 0; + ptop->status = PROC_EXISTS; + } + + ptop->cpu_time += cpu_time; + jail->cpu_time_total += cpu_time; + + /* Sort the array if added */ + qsort (jail->ptops, jail->n_ptops, sizeof (struct ptop), process_compare_ptop_pid); + } + + TAILQ_FOREACH (jail, &jaildats, link) { + + for (i = 0; i < jail->n_ptops; ++i) { + if (jail->ptops[i].status != PROC_GONE) + continue; + + /* + * Add time to the cpu_time_offset if it's a 'top level' + * process that's going away. + */ + + jail->cpu_time_offset += jail->ptops[i].cpu_time; + jail->cpu_time_total += jail->ptops[i].cpu_time; + + /* Remove nonexistant ptop */ + memmove (jail->ptops + i, jail->ptops + i + 1, + (jail->n_ptops - i) * sizeof (struct ptop)); + jail->n_ptops -= 1; + i -= 1; + } } } @@ -858,6 +1031,9 @@ jail_free (struct jaildat *jail) jaildat_count--; } + free (jail->ptops); + jail->ptops = NULL; + free (jail); } @@ -1262,6 +1438,9 @@ op_jailentry (struct snmp_context *ctx, struct snmp_value *value, case LEAF_jailProcesses: value->v.integer = jail->n_processes; return SNMP_ERR_NOERROR; + case LEAF_jailCpuTime: + value->v.integer = jail->cpu_time_total; + return SNMP_ERR_NOERROR; case LEAF_jailDiskSpace: value->v.counter64 = jail->disk_space; return SNMP_ERR_NOERROR; -- cgit v1.2.3