From d92725256b4f22d084b813b37ddc394da79aacab Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 30 May 2022 14:34:50 -0400 Subject: mm: avoid unnecessary page fault retires on shared memory types I observed that for each of the shared file-backed page faults, we're very likely to retry one more time for the 1st write fault upon no page. It's because we'll need to release the mmap lock for dirty rate limit purpose with balance_dirty_pages_ratelimited() (in fault_dirty_shared_page()). Then after that throttling we return VM_FAULT_RETRY. We did that probably because VM_FAULT_RETRY is the only way we can return to the fault handler at that time telling it we've released the mmap lock. However that's not ideal because it's very likely the fault does not need to be retried at all since the pgtable was well installed before the throttling, so the next continuous fault (including taking mmap read lock, walk the pgtable, etc.) could be in most cases unnecessary. It's not only slowing down page faults for shared file-backed, but also add more mmap lock contention which is in most cases not needed at all. To observe this, one could try to write to some shmem page and look at "pgfault" value in /proc/vmstat, then we should expect 2 counts for each shmem write simply because we retried, and vm event "pgfault" will capture that. To make it more efficient, add a new VM_FAULT_COMPLETED return code just to show that we've completed the whole fault and released the lock. It's also a hint that we should very possibly not need another fault immediately on this page because we've just completed it. This patch provides a ~12% perf boost on my aarch64 test VM with a simple program sequentially dirtying 400MB shmem file being mmap()ed and these are the time it needs: Before: 650.980 ms (+-1.94%) After: 569.396 ms (+-1.38%) I believe it could help more than that. We need some special care on GUP and the s390 pgfault handler (for gmap code before returning from pgfault), the rest changes in the page fault handlers should be relatively straightforward. Another thing to mention is that mm_account_fault() does take this new fault as a generic fault to be accounted, unlike VM_FAULT_RETRY. I explicitly didn't touch hmm_vma_fault() and break_ksm() because they do not handle VM_FAULT_RETRY even with existing code, so I'm literally keeping them as-is. Link: https://lkml.kernel.org/r/20220530183450.42886-1-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Acked-by: Vineet Gupta Acked-by: Guo Ren Acked-by: Max Filippov Acked-by: Christian Borntraeger Acked-by: Michael Ellerman (powerpc) Acked-by: Catalin Marinas Reviewed-by: Alistair Popple Reviewed-by: Ingo Molnar Acked-by: Russell King (Oracle) [arm part] Acked-by: Heiko Carstens Cc: Vasily Gorbik Cc: Stafford Horne Cc: David S. Miller Cc: Johannes Berg Cc: Brian Cain Cc: Richard Henderson Cc: Richard Weinberger Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Janosch Frank Cc: Albert Ou Cc: Anton Ivanov Cc: Dave Hansen Cc: Borislav Petkov Cc: Sven Schnelle Cc: Andrea Arcangeli Cc: James Bottomley Cc: Al Viro Cc: Alexander Gordeev Cc: Jonas Bonn Cc: Will Deacon Cc: Vlastimil Babka Cc: Michal Simek Cc: Matt Turner Cc: Paul Mackerras Cc: David Hildenbrand Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Stefan Kristiansson Cc: Paul Walmsley Cc: Ivan Kokshaysky Cc: Chris Zankel Cc: Hugh Dickins Cc: Dinh Nguyen Cc: Rich Felker Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Thomas Bogendoerfer Cc: Helge Deller Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/arc/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arc') diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index dad27e4d69ff..5ca59a482632 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -146,6 +146,10 @@ retry: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + /* * Fault retry nuances, mmap_lock already relinquished by core mm */ -- cgit v1.2.3 From 5d260625b1f2affc00c596a2c8d6314865060b11 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 11 Jul 2022 12:35:56 +0530 Subject: arc/mm: enable ARCH_HAS_VM_GET_PAGE_PROT This enables ARCH_HAS_VM_GET_PAGE_PROT on the platform and exports standard vm_get_page_prot() implementation via DECLARE_VM_GET_PAGE_PROT, which looks up a private and static protection_map[] array. Subsequently all __SXXX and __PXXX macros can be dropped which are no longer needed. Link: https://lkml.kernel.org/r/20220711070600.2378316-23-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Vineet Gupta Cc: Arnd Bergmann Cc: Brian Cain Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: Chris Zankel Cc: "David S. Miller" Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: Jonas Bonn Cc: Michael Ellerman Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sam Ravnborg Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/arc/Kconfig | 1 + arch/arc/include/asm/pgtable-bits-arcv2.h | 18 ------------------ arch/arc/mm/mmap.c | 20 ++++++++++++++++++++ 3 files changed, 21 insertions(+), 18 deletions(-) (limited to 'arch/arc') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9e3653253ef2..8be56a5d8a9b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -13,6 +13,7 @@ config ARC select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_VM_GET_PAGE_PROT select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select ARCH_32BIT_OFF_T select BUILDTIME_TABLE_SORT diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index 183d23bc1e00..b23be557403e 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -72,24 +72,6 @@ * This is to enable COW mechanism */ /* xwr */ -#define __P000 PAGE_U_NONE -#define __P001 PAGE_U_R -#define __P010 PAGE_U_R /* Pvt-W => !W */ -#define __P011 PAGE_U_R /* Pvt-W => !W */ -#define __P100 PAGE_U_X_R /* X => R */ -#define __P101 PAGE_U_X_R -#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */ -#define __P111 PAGE_U_X_R /* Pvt-W => !W */ - -#define __S000 PAGE_U_NONE -#define __S001 PAGE_U_R -#define __S010 PAGE_U_W_R /* W => R */ -#define __S011 PAGE_U_W_R -#define __S100 PAGE_U_X_R /* X => R */ -#define __S101 PAGE_U_X_R -#define __S110 PAGE_U_X_W_R /* X => R */ -#define __S111 PAGE_U_X_W_R - #ifndef __ASSEMBLY__ #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index 722d26b94307..fce5fa2b4f52 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -74,3 +74,23 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } + +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_U_NONE, + [VM_READ] = PAGE_U_R, + [VM_WRITE] = PAGE_U_R, + [VM_WRITE | VM_READ] = PAGE_U_R, + [VM_EXEC] = PAGE_U_X_R, + [VM_EXEC | VM_READ] = PAGE_U_X_R, + [VM_EXEC | VM_WRITE] = PAGE_U_X_R, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_U_X_R, + [VM_SHARED] = PAGE_U_NONE, + [VM_SHARED | VM_READ] = PAGE_U_R, + [VM_SHARED | VM_WRITE] = PAGE_U_W_R, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_U_W_R, + [VM_SHARED | VM_EXEC] = PAGE_U_X_R, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_U_X_R, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_U_X_W_R, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_U_X_W_R +}; +DECLARE_VM_GET_PAGE_PROT -- cgit v1.2.3 From 3d923c5f1e21ad491acd4c0d62bf2481ce94016c Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 11 Jul 2022 12:36:00 +0530 Subject: mm/mmap: drop ARCH_HAS_VM_GET_PAGE_PROT Now all the platforms enable ARCH_HAS_GET_PAGE_PROT. They define and export own vm_get_page_prot() whether custom or standard DECLARE_VM_GET_PAGE_PROT. Hence there is no need for default generic fallback for vm_get_page_prot(). Just drop this fallback and also ARCH_HAS_GET_PAGE_PROT mechanism. Link: https://lkml.kernel.org/r/20220711070600.2378316-27-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Reviewed-by: Christophe Leroy Acked-by: Geert Uytterhoeven Cc: Arnd Bergmann Cc: Brian Cain Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Chris Zankel Cc: "David S. Miller" Cc: Dinh Nguyen Cc: Guo Ren Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: Jonas Bonn Cc: Michael Ellerman Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sam Ravnborg Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- arch/arc/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arc') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8be56a5d8a9b..9e3653253ef2 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -13,7 +13,6 @@ config ARC select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_HAS_VM_GET_PAGE_PROT select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select ARCH_32BIT_OFF_T select BUILDTIME_TABLE_SORT -- cgit v1.2.3