diff options
author | Amit Pundir <amit.pundir@linaro.org> | 2018-12-11 00:07:21 +0530 |
---|---|---|
committer | Amit Pundir <amit.pundir@linaro.org> | 2018-12-11 00:07:21 +0530 |
commit | 770f7163da89a92625f78c374e0c9cd82f78d190 (patch) | |
tree | f52e92f8ed6dcd4b01e0e77e71b3ab24d2afd250 /mm | |
parent | 93ca8345070ef87e32a98ed799c473e68595c237 (diff) | |
parent | 684d5e05df8cdc18791e76b11f6bc85ecd4d9416 (diff) |
Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
* linux-linaro-lsk-v4.4: (622 commits)
Linux 4.4.166
drm/ast: Remove existing framebuffers before loading driver
s390/mm: Check for valid vma before zapping in gmap_discard
namei: allow restricted O_CREAT of FIFOs and regular files
sched/core: Allow __sched_setscheduler() in interrupts when PI is not used
btrfs: Ensure btrfs_trim_fs can trim the whole filesystem
usb: xhci: fix uninitialized completion when USB3 port got wrong status
tty: wipe buffer if not echoing data
tty: wipe buffer.
iwlwifi: mvm: fix regulatory domain update when the firmware starts
scsi: qla2xxx: do not queue commands when unloading
scsi: ufshcd: release resources if probe fails
scsi: ufs: fix race between clock gating and devfreq scaling work
scsi: ufshcd: Fix race between clk scaling and ungate work
scsi: ufs: fix bugs related to null pointer access and array size
netfilter: nf_tables: fix oops when inserting an element into a verdict map
mwifiex: fix p2p device doesn't find in scan problem
mwifiex: Fix NULL pointer dereference in skb_dequeue()
cw1200: Don't leak memory if krealloc failes
Input: xpad - add support for Xbox1 PDP Camo series gamepad
...
Conflicts:
Makefile
arch/x86/Makefile
drivers/base/power/main.c
drivers/block/zram/zram_drv.c
kernel/debug/kdb/kdb_io.c
net/ipv6/route.c
scripts/Kbuild.include
Conflicts in above files are fixed as done in AOSP Change-Id:
I5bd20327e0c1139c46f74e8d5916fa0530a307d3 ("Merge 4.4.165 into android-4.4").
arch/arm64/include/asm/cpufeature.h
arch/arm64/kernel/cpufeature.c
Conflicts in above files is due to AOSP Change-Id:
I11cb874d12a7d0921f452c62b0752e0028a8e0a7 ("FROMLIST: arm64: entry: Add
fake CPU feature for unmapping the kernel at EL0"), which needed a minor
rebasing.
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/gup.c | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 6 | ||||
-rw-r--r-- | mm/hugetlb.c | 66 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 32 | ||||
-rw-r--r-- | mm/mmap.c | 13 | ||||
-rw-r--r-- | mm/mremap.c | 21 | ||||
-rw-r--r-- | mm/rmap.c | 56 | ||||
-rw-r--r-- | mm/shmem.c | 4 | ||||
-rw-r--r-- | mm/slub.c | 6 | ||||
-rw-r--r-- | mm/vmstat.c | 4 |
11 files changed, 183 insertions, 29 deletions
@@ -940,8 +940,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) int locked = 0; long ret = 0; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; for (nstart = start; nstart < end; nstart = nend) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 67f5a8ca0af1..b2d56b1c3276 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1510,7 +1510,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, spinlock_t *old_ptl, *new_ptl; int ret = 0; pmd_t pmd; - + bool force_flush = false; struct mm_struct *mm = vma->vm_mm; if ((old_addr & ~HPAGE_PMD_MASK) || @@ -1538,6 +1538,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); + if (pmd_present(pmd)) + force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); if (pmd_move_must_withdraw(new_ptl, old_ptl)) { @@ -1546,6 +1548,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pgtable_trans_huge_deposit(mm, new_pmd, pgtable); } set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a813b03021b7..6f99a0f906bb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3103,7 +3103,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { - pte_t *src_pte, *dst_pte, entry; + pte_t *src_pte, *dst_pte, entry, dst_entry; struct page *ptepage; unsigned long addr; int cow; @@ -3131,15 +3131,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, break; } - /* If the pagetables are shared don't copy or take references */ - if (dst_pte == src_pte) + /* + * If the pagetables are shared don't copy or take references. + * dst_pte == src_pte is the common case of src/dest sharing. + * + * However, src could have 'unshared' and dst shares with + * another vma. If dst_pte !none, this implies sharing. + * Check here before taking page table lock, and once again + * after taking the lock below. + */ + dst_entry = huge_ptep_get(dst_pte); + if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) continue; dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); - if (huge_pte_none(entry)) { /* skip none entry */ + dst_entry = huge_ptep_get(dst_pte); + if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { + /* + * Skip if src entry none. Also, skip in the + * unlikely case dst entry !none as this implies + * sharing with another vma. + */ ; } else if (unlikely(is_hugetlb_entry_migration(entry) || is_hugetlb_entry_hwpoisoned(entry))) { @@ -3537,6 +3552,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, return err; ClearPagePrivate(page); + /* + * set page dirty so that it will not be removed from cache/file + * by non-hugetlbfs specific code paths. + */ + set_page_dirty(page); + spin_lock(&inode->i_lock); inode->i_blocks += blocks_per_huge_page(h); spin_unlock(&inode->i_lock); @@ -4195,13 +4216,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) /* * check on proper vm_flags and page table alignment */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) + if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end)) return true; return false; } /* + * Determine if start,end range within vma could be mapped by shared pmd. + * If yes, adjust start and end to cover range associated with possible + * shared pmd mappings. + */ +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + unsigned long check_addr = *start; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { + unsigned long a_start = check_addr & PUD_MASK; + unsigned long a_end = a_start + PUD_SIZE; + + /* + * If sharing is possible, adjust start/end if necessary. + */ + if (range_in_vma(vma, a_start, a_end)) { + if (a_start < *start) + *start = a_start; + if (a_end > *end) + *end = a_end; + } + } +} + +/* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the * !shared pmd case because we can allocate the pmd later as well, it makes the @@ -4297,6 +4346,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } + +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} #define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ diff --git a/mm/madvise.c b/mm/madvise.c index b04f2d26cdb8..d1d09bdf9a72 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -76,7 +76,7 @@ static long madvise_behavior(struct vm_area_struct *vma, new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: - if (new_flags & VM_SPECIAL) { + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { error = -EINVAL; goto out; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b9b2e25342d4..e48fa18e1828 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2011,8 +2011,36 @@ retry_cpuset: nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); + /* + * We cannot invoke reclaim if __GFP_THISNODE + * is set. Invoking reclaim with + * __GFP_THISNODE set, would cause THP + * allocations to trigger heavy swapping + * despite there may be tons of free memory + * (including potentially plenty of THP + * already available in the buddy) on all the + * other NUMA nodes. + * + * At most we could invoke compaction when + * __GFP_THISNODE is set (but we would need to + * refrain from invoking reclaim even if + * compaction returned COMPACT_SKIPPED because + * there wasn't not enough memory to succeed + * compaction). For now just avoid + * __GFP_THISNODE instead of limiting the + * allocation path to a strict and single + * compaction invocation. + * + * Supposedly if direct reclaim was enabled by + * the caller, the app prefers THP regardless + * of the node it comes from so this would be + * more desiderable behavior than only + * providing THP originated from the local + * node in such case. + */ + if (!(gfp & __GFP_DIRECT_RECLAIM)) + gfp |= __GFP_THISNODE; + page = __alloc_pages_node(hpage_node, gfp, order); goto out; } } diff --git a/mm/mmap.c b/mm/mmap.c index 3f8142e22490..f325aa33f327 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2838,10 +2838,6 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(len); - if (!len) - return addr; - flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); @@ -2909,12 +2905,19 @@ out: return addr; } -unsigned long vm_brk(unsigned long addr, unsigned long len) +unsigned long vm_brk(unsigned long addr, unsigned long request) { struct mm_struct *mm = current->mm; + unsigned long len; unsigned long ret; bool populate; + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; + if (!len) + return addr; + down_write(&mm->mmap_sem); ret = do_brk(addr, len); populate = ((mm->def_flags & VM_LOCKED) != 0); diff --git a/mm/mremap.c b/mm/mremap.c index fe7b7f65f4f4..450b306d473e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -96,6 +96,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -143,12 +145,26 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (pte_none(*old_pte)) continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); + /* + * If we are remapping a valid PTE, make sure + * to flush TLB before we drop the PTL for the PTE. + * + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. + */ + if (pte_present(pte)) + force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); set_pte_at(mm, new_addr, new_pte, pte); } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); @@ -168,7 +184,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -207,7 +222,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, anon_vma_unlock_write(vma->anon_vma); } if (err > 0) { - need_flush = true; continue; } else if (!err) { split_huge_page_pmd(vma, old_addr, old_pmd); @@ -224,10 +238,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; } - if (likely(need_flush)) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); diff --git a/mm/rmap.c b/mm/rmap.c index 1bceb49aa214..488dda209431 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1324,12 +1324,41 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, pte_t pteval; spinlock_t *ptl; int ret = SWAP_AGAIN; + unsigned long sh_address; + bool pmd_sharing_possible = false; + unsigned long spmd_start, spmd_end; enum ttu_flags flags = (enum ttu_flags)arg; /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) goto out; + /* + * Only use the range_start/end mmu notifiers if huge pmd sharing + * is possible. In the normal case, mmu_notifier_invalidate_page + * is sufficient as we only unmap a page. However, if we unshare + * a pmd, we will unmap a PUD_SIZE range. + */ + if (PageHuge(page)) { + spmd_start = address; + spmd_end = spmd_start + vma_mmu_pagesize(vma); + + /* + * Check if pmd sharing is possible. If possible, we could + * unmap a PUD_SIZE range. spmd_start/spmd_end will be + * modified if sharing is possible. + */ + adjust_range_if_pmd_sharing_possible(vma, &spmd_start, + &spmd_end); + if (spmd_end - spmd_start != vma_mmu_pagesize(vma)) { + sh_address = address; + + pmd_sharing_possible = true; + mmu_notifier_invalidate_range_start(vma->vm_mm, + spmd_start, spmd_end); + } + } + pte = page_check_address(page, mm, address, &ptl, 0); if (!pte) goto out; @@ -1356,6 +1385,30 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } } + /* + * Call huge_pmd_unshare to potentially unshare a huge pmd. Pass + * sh_address as it will be modified if unsharing is successful. + */ + if (PageHuge(page) && huge_pmd_unshare(mm, &sh_address, pte)) { + /* + * huge_pmd_unshare unmapped an entire PMD page. There is + * no way of knowing exactly which PMDs may be cached for + * this mm, so flush them all. spmd_start/spmd_end cover + * this PUD_SIZE range. + */ + flush_cache_range(vma, spmd_start, spmd_end); + flush_tlb_range(vma, spmd_start, spmd_end); + + /* + * The ref count of the PMD page was dropped which is part + * of the way map counting is done for shared PMDs. When + * there is no other sharing, huge_pmd_unshare returns false + * and we will unmap the actual page and drop map count + * to zero. + */ + goto out_unmap; + } + /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); if (should_defer_flush(mm, flags)) { @@ -1450,6 +1503,9 @@ out_unmap: if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK)) mmu_notifier_invalidate_page(mm, address); out: + if (pmd_sharing_possible) + mmu_notifier_invalidate_range_end(vma->vm_mm, + spmd_start, spmd_end); return ret; } diff --git a/mm/shmem.c b/mm/shmem.c index afdd2b957f1b..a06b31b8ff8d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1818,9 +1818,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) mutex_lock(&inode->i_mutex); /* We're holding i_mutex so we can access i_size directly */ - if (offset < 0) - offset = -EINVAL; - else if (offset >= inode->i_size) + if (offset < 0 || offset >= inode->i_size) offset = -ENXIO; else { start = offset >> PAGE_CACHE_SHIFT; diff --git a/mm/slub.c b/mm/slub.c index 4a22f9d4e44d..191bbc3378d3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1712,7 +1712,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, { struct page *page, *page2; void *object = NULL; - int available = 0; + unsigned int available = 0; int objects; /* @@ -4791,10 +4791,10 @@ static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, size_t length) { - unsigned long objects; + unsigned int objects; int err; - err = kstrtoul(buf, 10, &objects); + err = kstrtouint(buf, 10, &objects); if (err) return err; if (objects && !kmem_cache_has_cpu_partial(s)) diff --git a/mm/vmstat.c b/mm/vmstat.c index 8640a185dfc6..6af9bbad94c7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -860,6 +860,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", +#else + "", /* nr_tlb_remote_flush */ + "", /* nr_tlb_remote_flush_received */ #endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", @@ -868,7 +871,6 @@ const char * const vmstat_text[] = { #ifdef CONFIG_DEBUG_VM_VMACACHE "vmacache_find_calls", "vmacache_find_hits", - "vmacache_full_flushes", #endif #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; |