summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2007-10-18 23:39:44 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 11:53:36 -0700
commit846c7bb055747989891f5cd2bb6e8d56243ba1e7 (patch)
treee044041366efa8298157c4ae86615d68d30dd6d2 /kernel
parentc2e2c7fa1cb2cf2b114a6c9bc132b6601db5a7c8 (diff)
Add cgroupstats
This patch is inspired by the discussion at http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263. The patch is on top of 2.6.21-mm1 with Paul's cgroups v9 patches (forward ported) This patch implements per cgroup statistics infrastructure and re-uses code from the taskstats interface. A new set of cgroup operations are registered with commands and attributes. It should be very easy to *extend* per cgroup statistics, by adding members to the cgroupstats structure. The current model for cgroupstats is a pull, a push model (to post statistics on interesting events), should be very easy to add. Currently user space requests for statistics by passing the cgroup file descriptor. Statistics about the state of all the tasks in the cgroup is returned to user space. TODO's/NOTE: This patch provides an infrastructure for implementing cgroup statistics. Based on the needs of each controller, we can incrementally add more statistics, event based support for notification of statistics, accumulation of taskstats into cgroup statistics in the future. Sample output # ./cgroupstats -C /cgroup/a sleeping 2, blocked 0, running 1, stopped 0, uninterruptible 0 # ./cgroupstats -C /cgroup/ sleeping 154, blocked 0, running 0, stopped 0, uninterruptible 0 If the approach looks good, I'll enhance and post the user space utility for the same Feedback, comments, test results are always welcome! [akpm@linux-foundation.org: build fix] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c55
-rw-r--r--kernel/taskstats.c67
2 files changed, 122 insertions, 0 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d65a1246829f..ca38db223f84 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -44,6 +44,9 @@
#include <linux/string.h>
#include <linux/sort.h>
#include <linux/kmod.h>
+#include <linux/delayacct.h>
+#include <linux/cgroupstats.h>
+
#include <asm/atomic.h>
static DEFINE_MUTEX(cgroup_mutex);
@@ -1766,6 +1769,58 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cont)
return n;
}
+/**
+ * Build and fill cgroupstats so that taskstats can export it to user
+ * space.
+ *
+ * @stats: cgroupstats to fill information into
+ * @dentry: A dentry entry belonging to the cgroup for which stats have
+ * been requested.
+ */
+int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
+{
+ int ret = -EINVAL;
+ struct cgroup *cont;
+ struct cgroup_iter it;
+ struct task_struct *tsk;
+ /*
+ * Validate dentry by checking the superblock operations
+ */
+ if (dentry->d_sb->s_op != &cgroup_ops)
+ goto err;
+
+ ret = 0;
+ cont = dentry->d_fsdata;
+ rcu_read_lock();
+
+ cgroup_iter_start(cont, &it);
+ while ((tsk = cgroup_iter_next(cont, &it))) {
+ switch (tsk->state) {
+ case TASK_RUNNING:
+ stats->nr_running++;
+ break;
+ case TASK_INTERRUPTIBLE:
+ stats->nr_sleeping++;
+ break;
+ case TASK_UNINTERRUPTIBLE:
+ stats->nr_uninterruptible++;
+ break;
+ case TASK_STOPPED:
+ stats->nr_stopped++;
+ break;
+ default:
+ if (delayacct_is_task_waiting_on_io(tsk))
+ stats->nr_io_wait++;
+ break;
+ }
+ }
+ cgroup_iter_end(cont, &it);
+
+ rcu_read_unlock();
+err:
+ return ret;
+}
+
static int cmppid(const void *a, const void *b)
{
return *(pid_t *)a - *(pid_t *)b;
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 7d4d7f9c1bb2..9f360f68aad6 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -22,6 +22,10 @@
#include <linux/delayacct.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
+#include <linux/cgroupstats.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/file.h>
#include <net/genetlink.h>
#include <asm/atomic.h>
@@ -49,6 +53,11 @@ __read_mostly = {
[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
+static struct nla_policy
+cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
+ [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
+};
+
struct listener {
struct list_head list;
pid_t pid;
@@ -372,6 +381,51 @@ err:
return NULL;
}
+static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+ int rc = 0;
+ struct sk_buff *rep_skb;
+ struct cgroupstats *stats;
+ struct nlattr *na;
+ size_t size;
+ u32 fd;
+ struct file *file;
+ int fput_needed;
+
+ na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
+ if (!na)
+ return -EINVAL;
+
+ fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
+ file = fget_light(fd, &fput_needed);
+ if (file) {
+ size = nla_total_size(sizeof(struct cgroupstats));
+
+ rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
+ size);
+ if (rc < 0)
+ goto err;
+
+ na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
+ sizeof(struct cgroupstats));
+ stats = nla_data(na);
+ memset(stats, 0, sizeof(*stats));
+
+ rc = cgroupstats_build(stats, file->f_dentry);
+ if (rc < 0)
+ goto err;
+
+ fput_light(file, fput_needed);
+ return send_reply(rep_skb, info->snd_pid);
+ }
+
+err:
+ if (file)
+ fput_light(file, fput_needed);
+ nlmsg_free(rep_skb);
+ return rc;
+}
+
static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
@@ -522,6 +576,12 @@ static struct genl_ops taskstats_ops = {
.policy = taskstats_cmd_get_policy,
};
+static struct genl_ops cgroupstats_ops = {
+ .cmd = CGROUPSTATS_CMD_GET,
+ .doit = cgroupstats_user_cmd,
+ .policy = cgroupstats_cmd_get_policy,
+};
+
/* Needed early in initialization */
void __init taskstats_init_early(void)
{
@@ -546,8 +606,15 @@ static int __init taskstats_init(void)
if (rc < 0)
goto err;
+ rc = genl_register_ops(&family, &cgroupstats_ops);
+ if (rc < 0)
+ goto err_cgroup_ops;
+
family_registered = 1;
+ printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
return 0;
+err_cgroup_ops:
+ genl_unregister_ops(&family, &taskstats_ops);
err:
genl_unregister_family(&family);
return rc;