From 0542f17bf2c1f2430d368f44c8fcf2f82ec9e53e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 17:51:47 -0600 Subject: userns: Document what the invariant required for safe unprivileged mappings. The rule is simple. Don't allow anything that wouldn't be allowed without unprivileged mappings. It was previously overlooked that establishing gid mappings would allow dropping groups and potentially gaining permission to files and directories that had lesser permissions for a specific group than for all other users. This is the rule needed to fix CVE-2014-8989 and prevent any other security issues with new_idmap_permitted. The reason for this rule is that the unix permission model is old and there are programs out there somewhere that take advantage of every little corner of it. So allowing a uid or gid mapping to be established without privielge that would allow anything that would not be allowed without that mapping will result in expectations from some code somewhere being violated. Violated expectations about the behavior of the OS is a long way to say a security issue. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index aa312b0dc3ec..b99c862a2e3f 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -812,7 +812,9 @@ static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { - /* Allow mapping to your own filesystem ids */ + /* Don't allow mappings that would allow anything that wouldn't + * be allowed without the establishment of unprivileged mappings. + */ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { -- cgit v1.2.3 From 273d2c67c3e179adb1e74f403d1e9a06e3f841b5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 18:01:11 -0600 Subject: userns: Don't allow setgroups until a gid mapping has been setablished setgroups is unique in not needing a valid mapping before it can be called, in the case of setgroups(0, NULL) which drops all supplemental groups. The design of the user namespace assumes that CAP_SETGID can not actually be used until a gid mapping is established. Therefore add a helper function to see if the user namespace gid mapping has been established and call that function in the setgroups permission check. This is part of the fix for CVE-2014-8989, being able to drop groups without privilege using user namespaces. Cc: stable@vger.kernel.org Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index b99c862a2e3f..27c8dab48c07 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -843,6 +843,20 @@ static bool new_idmap_permitted(const struct file *file, return false; } +bool userns_may_setgroups(const struct user_namespace *ns) +{ + bool allowed; + + mutex_lock(&id_map_mutex); + /* It is not safe to use setgroups until a gid mapping in + * the user namespace has been established. + */ + allowed = ns->gid_map.nr_extents != 0; + mutex_unlock(&id_map_mutex); + + return allowed; +} + static void *userns_get(struct task_struct *task) { struct user_namespace *user_ns; -- cgit v1.2.3 From be7c6dba2332cef0677fbabb606e279ae76652c3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 18:14:19 -0600 Subject: userns: Don't allow unprivileged creation of gid mappings As any gid mapping will allow and must allow for backwards compatibility dropping groups don't allow any gid mappings to be established without CAP_SETGID in the parent user namespace. For a small class of applications this change breaks userspace and removes useful functionality. This small class of applications includes tools/testing/selftests/mount/unprivilged-remount-test.c Most of the removed functionality will be added back with the addition of a one way knob to disable setgroups. Once setgroups is disabled setting the gid_map becomes as safe as setting the uid_map. For more common applications that set the uid_map and the gid_map with privilege this change will have no affect. This is part of a fix for CVE-2014-8989. Cc: stable@vger.kernel.org Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 27c8dab48c07..1ce6d67c07b7 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -821,10 +821,6 @@ static bool new_idmap_permitted(const struct file *file, kuid_t uid = make_kuid(ns->parent, id); if (uid_eq(uid, file->f_cred->fsuid)) return true; - } else if (cap_setid == CAP_SETGID) { - kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, file->f_cred->fsgid)) - return true; } } -- cgit v1.2.3 From 80dd00a23784b384ccea049bfb3f259d3f973b9d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 18:26:30 -0600 Subject: userns: Check euid no fsuid when establishing an unprivileged uid mapping setresuid allows the euid to be set to any of uid, euid, suid, and fsuid. Therefor it is safe to allow an unprivileged user to map their euid and use CAP_SETUID privileged with exactly that uid, as no new credentials can be obtained. I can not find a combination of existing system calls that allows setting uid, euid, suid, and fsuid from the fsuid making the previous use of fsuid for allowing unprivileged mappings a bug. This is part of a fix for CVE-2014-8989. Cc: stable@vger.kernel.org Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 1ce6d67c07b7..9451b12a9b6c 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -819,7 +819,7 @@ static bool new_idmap_permitted(const struct file *file, u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->fsuid)) + if (uid_eq(uid, file->f_cred->euid)) return true; } } -- cgit v1.2.3 From f95d7918bd1e724675de4940039f2865e5eec5fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 26 Nov 2014 23:22:14 -0600 Subject: userns: Only allow the creator of the userns unprivileged mappings If you did not create the user namespace and are allowed to write to uid_map or gid_map you should already have the necessary privilege in the parent user namespace to establish any mapping you want so this will not affect userspace in practice. Limiting unprivileged uid mapping establishment to the creator of the user namespace makes it easier to verify all credentials obtained with the uid mapping can be obtained without the uid mapping without privilege. Limiting unprivileged gid mapping establishment (which is temporarily absent) to the creator of the user namespace also ensures that the combination of uid and gid can already be obtained without privilege. This is part of the fix for CVE-2014-8989. Cc: stable@vger.kernel.org Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9451b12a9b6c..1e34de2fbd60 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -812,14 +812,16 @@ static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { + const struct cred *cred = file->f_cred; /* Don't allow mappings that would allow anything that wouldn't * be allowed without the establishment of unprivileged mappings. */ - if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { + if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && + uid_eq(ns->owner, cred->euid)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->euid)) + if (uid_eq(uid, cred->euid)) return true; } } -- cgit v1.2.3 From f0d62aec931e4ae3333c797d346dc4f188f454ba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 9 Dec 2014 14:03:14 -0600 Subject: userns: Rename id_map_mutex to userns_state_mutex Generalize id_map_mutex so it can be used for more state of a user namespace. Cc: stable@vger.kernel.org Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 1e34de2fbd60..44a555ac6104 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -24,6 +24,7 @@ #include static struct kmem_cache *user_ns_cachep __read_mostly; +static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, @@ -583,9 +584,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map, return false; } - -static DEFINE_MUTEX(id_map_mutex); - static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -602,7 +600,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; /* - * The id_map_mutex serializes all writes to any given map. + * The userns_state_mutex serializes all writes to any given map. * * Any map is only ever written once. * @@ -620,7 +618,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, * order and smp_rmb() is guaranteed that we don't have crazy * architectures returning stale data. */ - mutex_lock(&id_map_mutex); + mutex_lock(&userns_state_mutex); ret = -EPERM; /* Only allow one successful write to the map */ @@ -750,7 +748,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, *ppos = count; ret = count; out: - mutex_unlock(&id_map_mutex); + mutex_unlock(&userns_state_mutex); if (page) free_page(page); return ret; @@ -845,12 +843,12 @@ bool userns_may_setgroups(const struct user_namespace *ns) { bool allowed; - mutex_lock(&id_map_mutex); + mutex_lock(&userns_state_mutex); /* It is not safe to use setgroups until a gid mapping in * the user namespace has been established. */ allowed = ns->gid_map.nr_extents != 0; - mutex_unlock(&id_map_mutex); + mutex_unlock(&userns_state_mutex); return allowed; } -- cgit v1.2.3 From 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 2 Dec 2014 12:27:26 -0600 Subject: userns: Add a knob to disable setgroups on a per user namespace basis - Expose the knob to user space through a proc file /proc//setgroups A value of "deny" means the setgroups system call is disabled in the current processes user namespace and can not be enabled in the future in this user namespace. A value of "allow" means the segtoups system call is enabled. - Descendant user namespaces inherit the value of setgroups from their parents. - A proc file is used (instead of a sysctl) as sysctls currently do not allow checking the permissions at open time. - Writing to the proc file is restricted to before the gid_map for the user namespace is set. This ensures that disabling setgroups at a user namespace level will never remove the ability to call setgroups from a process that already has that ability. A process may opt in to the setgroups disable for itself by creating, entering and configuring a user namespace or by calling setns on an existing user namespace with setgroups disabled. Processes without privileges already can not call setgroups so this is a noop. Prodcess with privilege become processes without privilege when entering a user namespace and as with any other path to dropping privilege they would not have the ability to call setgroups. So this remains within the bounds of what is possible without a knob to disable setgroups permanently in a user namespace. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 44a555ac6104..6e80f4c1322b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -100,6 +100,11 @@ int create_user_ns(struct cred *new) ns->owner = owner; ns->group = group; + /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ + mutex_lock(&userns_state_mutex); + ns->flags = parent_ns->flags; + mutex_unlock(&userns_state_mutex); + set_cred_user_ns(new, ns); #ifdef CONFIG_PERSISTENT_KEYRINGS @@ -839,6 +844,84 @@ static bool new_idmap_permitted(const struct file *file, return false; } +int proc_setgroups_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + unsigned long userns_flags = ACCESS_ONCE(ns->flags); + + seq_printf(seq, "%s\n", + (userns_flags & USERNS_SETGROUPS_ALLOWED) ? + "allow" : "deny"); + return 0; +} + +ssize_t proc_setgroups_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + char kbuf[8], *pos; + bool setgroups_allowed; + ssize_t ret; + + /* Only allow a very narrow range of strings to be written */ + ret = -EINVAL; + if ((*ppos != 0) || (count >= sizeof(kbuf))) + goto out; + + /* What was written? */ + ret = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + kbuf[count] = '\0'; + pos = kbuf; + + /* What is being requested? */ + ret = -EINVAL; + if (strncmp(pos, "allow", 5) == 0) { + pos += 5; + setgroups_allowed = true; + } + else if (strncmp(pos, "deny", 4) == 0) { + pos += 4; + setgroups_allowed = false; + } + else + goto out; + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') + goto out; + + ret = -EPERM; + mutex_lock(&userns_state_mutex); + if (setgroups_allowed) { + /* Enabling setgroups after setgroups has been disabled + * is not allowed. + */ + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) + goto out_unlock; + } else { + /* Permanently disabling setgroups after setgroups has + * been enabled by writing the gid_map is not allowed. + */ + if (ns->gid_map.nr_extents != 0) + goto out_unlock; + ns->flags &= ~USERNS_SETGROUPS_ALLOWED; + } + mutex_unlock(&userns_state_mutex); + + /* Report a successful write */ + *ppos = count; + ret = count; +out: + return ret; +out_unlock: + mutex_unlock(&userns_state_mutex); + goto out; +} + bool userns_may_setgroups(const struct user_namespace *ns) { bool allowed; @@ -848,6 +931,8 @@ bool userns_may_setgroups(const struct user_namespace *ns) * the user namespace has been established. */ allowed = ns->gid_map.nr_extents != 0; + /* Is setgroups allowed? */ + allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); mutex_unlock(&userns_state_mutex); return allowed; -- cgit v1.2.3 From 66d2f338ee4c449396b6f99f5e75cd18eb6df272 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 19:36:04 -0600 Subject: userns: Allow setting gid_maps without privilege when setgroups is disabled Now that setgroups can be disabled and not reenabled, setting gid_map without privielge can now be enabled when setgroups is disabled. This restores most of the functionality that was lost when unprivileged setting of gid_map was removed. Applications that use this functionality will need to check to see if they use setgroups or init_groups, and if they don't they can be fixed by simply disabling setgroups before writing to gid_map. Cc: stable@vger.kernel.org Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6e80f4c1322b..a2e37c5d2f63 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -826,6 +826,11 @@ static bool new_idmap_permitted(const struct file *file, kuid_t uid = make_kuid(ns->parent, id); if (uid_eq(uid, cred->euid)) return true; + } else if (cap_setid == CAP_SETGID) { + kgid_t gid = make_kgid(ns->parent, id); + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && + gid_eq(gid, cred->egid)) + return true; } } -- cgit v1.2.3 From 36476beac4f8ca9dc7722790b2e8ef0e8e51034e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 20:03:28 -0600 Subject: userns; Correct the comment in map_write It is important that all maps are less than PAGE_SIZE or else setting the last byte of the buffer to '0' could write off the end of the allocated storage. Correct the misleading comment. Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a2e37c5d2f63..ad419b04c146 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -643,7 +643,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (!page) goto out; - /* Only allow <= page size writes at the beginning of the file */ + /* Only allow < page size writes at the beginning of the file */ ret = -EINVAL; if ((*ppos != 0) || (count >= PAGE_SIZE)) goto out; -- cgit v1.2.3