summaryrefslogtreecommitdiff
path: root/mm/mprotect.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r--mm/mprotect.c176
1 files changed, 130 insertions, 46 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c
index b69ce7a7b2b7..ba5592655ee3 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -30,15 +30,17 @@
#include <linux/mm_inline.h>
#include <linux/pgtable.h>
#include <linux/sched/sysctl.h>
+#include <linux/userfaultfd_k.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
+#include <asm/tlb.h>
#include "internal.h"
-static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t newprot,
- unsigned long cp_flags)
+static unsigned long change_pte_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pte_t *pte, oldpte;
spinlock_t *ptl;
@@ -49,6 +51,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ tlb_change_page_size(tlb, PAGE_SIZE);
+
/*
* Can be called with only the mmap_lock for reading by
* prot_numa so we must check the pmd isn't constantly
@@ -149,9 +153,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = pte_mkwrite(ptent);
}
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
+ if (pte_needs_flush(oldpte, ptent))
+ tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
+ struct page *page = pfn_swap_entry_to_page(entry);
pte_t newpte;
if (is_writable_migration_entry(entry)) {
@@ -159,8 +166,11 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
* A protection check is difficult so
* just be safe and disable write
*/
- entry = make_readable_migration_entry(
- swp_offset(entry));
+ if (PageAnon(page))
+ entry = make_readable_exclusive_migration_entry(
+ swp_offset(entry));
+ else
+ entry = make_readable_migration_entry(swp_offset(entry));
newpte = swp_entry_to_pte(entry);
if (pte_swp_soft_dirty(oldpte))
newpte = pte_swp_mksoft_dirty(newpte);
@@ -184,6 +194,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
newpte = pte_swp_mksoft_dirty(newpte);
if (pte_swp_uffd_wp(oldpte))
newpte = pte_swp_mkuffd_wp(newpte);
+ } else if (pte_marker_entry_uffd_wp(entry)) {
+ /*
+ * If this is uffd-wp pte marker and we'd like
+ * to unprotect it, drop it; the next page
+ * fault will trigger without uffd trapping.
+ */
+ if (uffd_wp_resolve) {
+ pte_clear(vma->vm_mm, addr, pte);
+ pages++;
+ }
+ continue;
} else {
newpte = oldpte;
}
@@ -197,6 +218,20 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
set_pte_at(vma->vm_mm, addr, pte, newpte);
pages++;
}
+ } else {
+ /* It must be an none page, or what else?.. */
+ WARN_ON_ONCE(!pte_none(oldpte));
+ if (unlikely(uffd_wp && !vma_is_anonymous(vma))) {
+ /*
+ * For file-backed mem, we need to be able to
+ * wr-protect a none pte, because even if the
+ * pte is none, the page/swap cache could
+ * exist. Doing that by install a marker.
+ */
+ set_pte_at(vma->vm_mm, addr, pte,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+ pages++;
+ }
}
} while (pte++, addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
@@ -230,9 +265,42 @@ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
return 0;
}
-static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
- pud_t *pud, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+/* Return true if we're uffd wr-protecting file-backed memory, or false */
+static inline bool
+uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags)
+{
+ return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma);
+}
+
+/*
+ * If wr-protecting the range for file-backed, populate pgtable for the case
+ * when pgtable is empty but page cache exists. When {pte|pmd|...}_alloc()
+ * failed it means no memory, we don't have a better option but stop.
+ */
+#define change_pmd_prepare(vma, pmd, cp_flags) \
+ do { \
+ if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
+ if (WARN_ON_ONCE(pte_alloc(vma->vm_mm, pmd))) \
+ break; \
+ } \
+ } while (0)
+/*
+ * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to
+ * have separate change_pmd_prepare() because pte_alloc() returns 0 on success,
+ * while {pmd|pud|p4d}_alloc() returns the valid pointer on success.
+ */
+#define change_prepare(vma, high, low, addr, cp_flags) \
+ do { \
+ if (unlikely(uffd_wp_protect_file(vma, cp_flags))) { \
+ low##_t *p = low##_alloc(vma->vm_mm, high, addr); \
+ if (WARN_ON_ONCE(p == NULL)) \
+ break; \
+ } \
+ } while (0)
+
+static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pud_t *pud, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pmd_t *pmd;
unsigned long next;
@@ -248,6 +316,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
next = pmd_addr_end(addr, end);
+ change_pmd_prepare(vma, pmd, cp_flags);
/*
* Automatic NUMA balancing walks the tables with mmap_lock
* held for read. It's possible a parallel update to occur
@@ -269,11 +338,22 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
}
if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
- if (next - addr != HPAGE_PMD_SIZE) {
+ if ((next - addr != HPAGE_PMD_SIZE) ||
+ uffd_wp_protect_file(vma, cp_flags)) {
__split_huge_pmd(vma, pmd, addr, false, NULL);
+ /*
+ * For file-backed, the pmd could have been
+ * cleared; make sure pmd populated if
+ * necessary, then fall-through to pte level.
+ */
+ change_pmd_prepare(vma, pmd, cp_flags);
} else {
- int nr_ptes = change_huge_pmd(vma, pmd, addr,
- newprot, cp_flags);
+ /*
+ * change_huge_pmd() does not defer TLB flushes,
+ * so no need to propagate the tlb argument.
+ */
+ int nr_ptes = change_huge_pmd(tlb, vma, pmd,
+ addr, newprot, cp_flags);
if (nr_ptes) {
if (nr_ptes == HPAGE_PMD_NR) {
@@ -287,8 +367,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
}
/* fall through, the trans huge pmd just split */
}
- this_pages = change_pte_range(vma, pmd, addr, next, newprot,
- cp_flags);
+ this_pages = change_pte_range(tlb, vma, pmd, addr, next,
+ newprot, cp_flags);
pages += this_pages;
next:
cond_resched();
@@ -302,9 +382,9 @@ next:
return pages;
}
-static inline unsigned long change_pud_range(struct vm_area_struct *vma,
- p4d_t *p4d, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+static inline unsigned long change_pud_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pud_t *pud;
unsigned long next;
@@ -313,18 +393,19 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
+ change_prepare(vma, pud, pmd, addr, cp_flags);
if (pud_none_or_clear_bad(pud))
continue;
- pages += change_pmd_range(vma, pud, addr, next, newprot,
+ pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
cp_flags);
} while (pud++, addr = next, addr != end);
return pages;
}
-static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
- pgd_t *pgd, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+static inline unsigned long change_p4d_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
p4d_t *p4d;
unsigned long next;
@@ -333,46 +414,44 @@ static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
+ change_prepare(vma, p4d, pud, addr, cp_flags);
if (p4d_none_or_clear_bad(p4d))
continue;
- pages += change_pud_range(vma, p4d, addr, next, newprot,
+ pages += change_pud_range(tlb, vma, p4d, addr, next, newprot,
cp_flags);
} while (p4d++, addr = next, addr != end);
return pages;
}
-static unsigned long change_protection_range(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end, pgprot_t newprot,
- unsigned long cp_flags)
+static unsigned long change_protection_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
unsigned long next;
- unsigned long start = addr;
unsigned long pages = 0;
BUG_ON(addr >= end);
pgd = pgd_offset(mm, addr);
- flush_cache_range(vma, addr, end);
- inc_tlb_flush_pending(mm);
+ tlb_start_vma(tlb, vma);
do {
next = pgd_addr_end(addr, end);
+ change_prepare(vma, pgd, p4d, addr, cp_flags);
if (pgd_none_or_clear_bad(pgd))
continue;
- pages += change_p4d_range(vma, pgd, addr, next, newprot,
+ pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot,
cp_flags);
} while (pgd++, addr = next, addr != end);
- /* Only flush the TLB if we actually modified any entries: */
- if (pages)
- flush_tlb_range(vma, start, end);
- dec_tlb_flush_pending(mm);
+ tlb_end_vma(tlb, vma);
return pages;
}
-unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+unsigned long change_protection(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgprot_t newprot,
unsigned long cp_flags)
{
@@ -381,9 +460,10 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
if (is_vm_hugetlb_page(vma))
- pages = hugetlb_change_protection(vma, start, end, newprot);
+ pages = hugetlb_change_protection(vma, start, end, newprot,
+ cp_flags);
else
- pages = change_protection_range(vma, start, end, newprot,
+ pages = change_protection_range(tlb, vma, start, end, newprot,
cp_flags);
return pages;
@@ -417,8 +497,9 @@ static const struct mm_walk_ops prot_none_walk_ops = {
};
int
-mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
- unsigned long start, unsigned long end, unsigned long newflags)
+mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ struct vm_area_struct **pprev, unsigned long start,
+ unsigned long end, unsigned long newflags)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long oldflags = vma->vm_flags;
@@ -505,7 +586,7 @@ success:
dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
vma_set_page_prot(vma);
- change_protection(vma, start, end, vma->vm_page_prot,
+ change_protection(tlb, vma, start, end, vma->vm_page_prot,
dirty_accountable ? MM_CP_DIRTY_ACCT : 0);
/*
@@ -539,6 +620,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
(prot & PROT_READ);
+ struct mmu_gather tlb;
start = untagged_addr(start);
@@ -598,6 +680,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
else
prev = vma->vm_prev;
+ tlb_gather_mmu(&tlb, current->mm);
for (nstart = start ; ; ) {
unsigned long mask_off_old_flags;
unsigned long newflags;
@@ -624,18 +707,18 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
/* newflags >> 4 shift VM_MAY% in place of VM_% */
if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) {
error = -EACCES;
- goto out;
+ break;
}
/* Allow architectures to sanity-check the new flags */
if (!arch_validate_flags(newflags)) {
error = -EINVAL;
- goto out;
+ break;
}
error = security_file_mprotect(vma, reqprot, prot);
if (error)
- goto out;
+ break;
tmp = vma->vm_end;
if (tmp > end)
@@ -644,27 +727,28 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
if (vma->vm_ops && vma->vm_ops->mprotect) {
error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags);
if (error)
- goto out;
+ break;
}
- error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
+ error = mprotect_fixup(&tlb, vma, &prev, nstart, tmp, newflags);
if (error)
- goto out;
+ break;
nstart = tmp;
if (nstart < prev->vm_end)
nstart = prev->vm_end;
if (nstart >= end)
- goto out;
+ break;
vma = prev->vm_next;
if (!vma || vma->vm_start != nstart) {
error = -ENOMEM;
- goto out;
+ break;
}
prot = reqprot;
}
+ tlb_finish_mmu(&tlb);
out:
mmap_write_unlock(current->mm);
return error;