summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2012-03-21 16:34:06 -0700
committerBen Hutchings <ben@decadent.org.uk>2017-09-15 18:30:57 +0100
commit5e7b05a3a1aed0d2b8ecd85188554a44161133b5 (patch)
treee4388d370bb652efa9ab99dc0a70ba41b31e391a /mm
parent1c8d42255f4c55f9550f72bbcda758aeff3fd7c2 (diff)
mm: fix move/migrate_pages() race on task struct
commit 3268c63eded4612a3d07b56d1e02ce7731e6608e upstream. Migration functions perform the rcu_read_unlock too early. As a result the task pointed to may change from under us. This can result in an oops, as reported by Dave Hansen in https://lkml.org/lkml/2012/2/23/302. The following patch extend the period of the rcu_read_lock until after the permissions checks are done. We also take a refcount so that the task reference is stable when calling security check functions and performing cpuset node validation (which takes a mutex). The refcount is dropped before actual page migration occurs so there is no change to the refcounts held during page migration. Also move the determination of the mm of the task struct to immediately before the do_migrate*() calls so that it is clear that we switch from handling the task during permission checks to the mm for the actual migration. Since the determination is only done once and we then no longer use the task_struct we can be sure that we operate on a specific address space that will not change from under us. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Christoph Lameter <cl@linux.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Reported-by: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c32
-rw-r--r--mm/migrate.c36
2 files changed, 38 insertions, 30 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c55593549cc9..bf288e27686c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1334,12 +1334,9 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
err = -ESRCH;
goto out;
}
- mm = get_task_mm(task);
- rcu_read_unlock();
+ get_task_struct(task);
err = -EINVAL;
- if (!mm)
- goto out;
/*
* Check if this process has the right to modify the specified
@@ -1347,14 +1344,13 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- rcu_read_lock();
tcred = __task_cred(task);
if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
rcu_read_unlock();
err = -EPERM;
- goto out;
+ goto out_put;
}
rcu_read_unlock();
@@ -1362,26 +1358,36 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
/* Is the user allowed to access the target nodes? */
if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) {
err = -EPERM;
- goto out;
+ goto out_put;
}
if (!nodes_subset(*new, node_states[N_HIGH_MEMORY])) {
err = -EINVAL;
- goto out;
+ goto out_put;
}
err = security_task_movememory(task);
if (err)
- goto out;
+ goto out_put;
- err = do_migrate_pages(mm, old, new,
- capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
-out:
+ mm = get_task_mm(task);
+ put_task_struct(task);
if (mm)
- mmput(mm);
+ err = do_migrate_pages(mm, old, new,
+ capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
+ else
+ err = -EINVAL;
+
+ mmput(mm);
+out:
NODEMASK_SCRATCH_FREE(scratch);
return err;
+
+out_put:
+ put_task_struct(task);
+ goto out;
+
}
diff --git a/mm/migrate.c b/mm/migrate.c
index f2d86f2bd203..2114bfcea76c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1198,20 +1198,17 @@ set_status:
* Migrate an array of page address onto an array of nodes and fill
* the corresponding array of status.
*/
-static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
unsigned long nr_pages,
const void __user * __user *pages,
const int __user *nodes,
int __user *status, int flags)
{
struct page_to_node *pm;
- nodemask_t task_nodes;
unsigned long chunk_nr_pages;
unsigned long chunk_start;
int err;
- task_nodes = cpuset_mems_allowed(task);
-
err = -ENOMEM;
pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
if (!pm)
@@ -1373,6 +1370,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
struct task_struct *task;
struct mm_struct *mm;
int err;
+ nodemask_t task_nodes;
/* Check flags */
if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1388,11 +1386,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
rcu_read_unlock();
return -ESRCH;
}
- mm = get_task_mm(task);
- rcu_read_unlock();
-
- if (!mm)
- return -EINVAL;
+ get_task_struct(task);
/*
* Check if this process has the right to modify the specified
@@ -1400,7 +1394,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- rcu_read_lock();
tcred = __task_cred(task);
if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
cred->uid != tcred->suid && cred->uid != tcred->uid &&
@@ -1415,16 +1408,25 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
if (err)
goto out;
- if (nodes) {
- err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
- flags);
- } else {
- err = do_pages_stat(mm, nr_pages, pages, status);
- }
+ task_nodes = cpuset_mems_allowed(task);
+ mm = get_task_mm(task);
+ put_task_struct(task);
+
+ if (mm) {
+ if (nodes)
+ err = do_pages_move(mm, task_nodes, nr_pages, pages,
+ nodes, status, flags);
+ else
+ err = do_pages_stat(mm, nr_pages, pages, status);
+ } else
+ err = -EINVAL;
-out:
mmput(mm);
return err;
+
+out:
+ put_task_struct(task);
+ return err;
}
/*